From f0488e696a02f5be895732e4cb832cdc6fcf984e Mon Sep 17 00:00:00 2001 From: Scott Lamb Date: Mon, 12 Aug 2024 14:28:32 -0400 Subject: [PATCH] WIP: h.265 support left to do: * use released version of h264-reader * fix definition of is random access point to take into account nuh layer id, check if we're doing the right thing with stsa * fill in validate_order --- .github/workflows/ci.yml | 3 +- Cargo.lock | 36 +- Cargo.toml | 7 +- examples/client/src/mp4.rs | 5 +- fuzz/Cargo.lock | 32 +- fuzz/Cargo.toml | 13 + fuzz/fuzz_targets/depacketize_h265.rs | 60 ++ fuzz/fuzz_targets/h265_nal.rs | 23 + src/client/parse.rs | 21 +- src/codec/h264.rs | 124 +-- src/codec/h265.rs | 931 +++++++++++++++++++ src/codec/h265/nal.rs | 1187 +++++++++++++++++++++++++ src/codec/h265/record.rs | 214 +++++ src/codec/h26x.rs | 73 ++ src/codec/jpeg.rs | 4 +- src/codec/mod.rs | 23 + src/testutil.rs | 20 + 17 files changed, 2632 insertions(+), 144 deletions(-) create mode 100644 fuzz/fuzz_targets/depacketize_h265.rs create mode 100644 fuzz/fuzz_targets/h265_nal.rs create mode 100644 src/codec/h265.rs create mode 100644 src/codec/h265/nal.rs create mode 100644 src/codec/h265/record.rs create mode 100644 src/codec/h26x.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1547e62..99bb553 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,10 +12,11 @@ jobs: matrix: rust: - stable - - 1.67 + - 1.79 include: - rust: stable extra_components: rustfmt + fail-fast: false runs-on: ubuntu-20.04 steps: - name: Checkout diff --git a/Cargo.lock b/Cargo.lock index 430091c..cd3556a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,9 +69,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anyhow" -version = "1.0.58" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] name = "arc-swap" @@ -201,6 +201,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97d524fdb78bf6dc6d2dc4c02043e4b4962ede0a17ae3e13f0ed211a7eda5897" +[[package]] +name = "bitstream-io" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e" + [[package]] name = "block-buffer" version = "0.10.2" @@ -778,9 +784,9 @@ dependencies = [ [[package]] name = "four-cc" -version = "0.1.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3958af68a31b1d1384d3f39b6aa33eb14b6009065b5ca305ddd9712a4237124f" +checksum = "795cbfc56d419a7ce47ccbb7504dd9a5b7c484c083c356e797de08bd988d9629" [[package]] name = "futures" @@ -922,11 +928,10 @@ dependencies = [ [[package]] name = "h264-reader" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd118dcc322cc71cfc33254a19ebece92cfaaf6d4b4793fec3f7f44fbc4150df" +version = "0.8.0-dev" +source = "git+https://github.com/scottlamb/h264-reader?rev=35968d5ca67f317fd35c1e344adb8ae14ee2efd6#35968d5ca67f317fd35c1e344adb8ae14ee2efd6" dependencies = [ - "bitstream-io", + "bitstream-io 2.3.0", "hex-slice", "log", "memchr", @@ -1202,9 +1207,9 @@ dependencies = [ [[package]] name = "mp4ra-rust" -version = "0.1.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be9daf03b43bf3842962947c62ba40f411e46a58774c60838038f04a67d17626" +checksum = "fdbc3d3867085d66ac6270482e66f3dd2c5a18451a3dc9ad7269e94844a536b7" dependencies = [ "four-cc", ] @@ -1262,9 +1267,9 @@ dependencies = [ [[package]] name = "nom" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", @@ -1713,7 +1718,7 @@ name = "retina" version = "0.4.8" dependencies = [ "base64", - "bitstream-io", + "bitstream-io 1.5.0", "bytes", "criterion", "futures", @@ -1738,11 +1743,10 @@ dependencies = [ [[package]] name = "rfc6381-codec" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4395f46a67f0d57c57f6a5361f3a9a0c0183a19cab3998892ecdc003de6d8037" +checksum = "ed54c20f5c3ec82eab6d998b313dc75ec5d5650d4f57675e61d72489040297fd" dependencies = [ - "four-cc", "mp4ra-rust", "mpeg4-audio-const", ] diff --git a/Cargo.toml b/Cargo.toml index f87422a..627977e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,11 @@ categories = ["network-programming", "multimedia"] description = "high-level RTSP multimedia streaming library" repository = "https://github.com/scottlamb/retina" include = ["src/**/*", "benches", "Cargo.toml"] -rust-version = "1.67" +rust-version = "1.79" [features] unstable-sample-entry = [] +unstable-h265 = [] [package.metadata.docs.rs] # https://docs.rs/about/metadata @@ -29,7 +30,9 @@ base64 = "0.21.0" bitstream-io = "1.1" bytes = "1.0.1" futures = "0.3.14" -h264-reader = "0.7.0" +#h264-reader = "0.7.0" +#h264-reader = { path = "../../crates/h264-reader" } +h264-reader = { git = "https://github.com/scottlamb/h264-reader", rev = "35968d5ca67f317fd35c1e344adb8ae14ee2efd6" } hex = "0.4.3" http-auth = "0.1.2" log = "0.4.8" diff --git a/examples/client/src/mp4.rs b/examples/client/src/mp4.rs index a49e637..2b73bf1 100644 --- a/examples/client/src/mp4.rs +++ b/examples/client/src/mp4.rs @@ -709,8 +709,9 @@ pub async fn run(opts: Opts) -> Result<(), Error> { let video_stream_i = if !opts.no_video { let s = session.streams().iter().position(|s| { if s.media() == "video" { - if s.encoding_name() == "h264" || s.encoding_name() == "jpeg" { - log::info!("Using h264 video stream"); + let encoding_name = s.encoding_name(); + if matches!(encoding_name, "h264" | "h265" | "jpeg") { + log::info!("Using {encoding_name} video stream"); return true; } log::info!( diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index ef4f9db..f8e822b 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -32,6 +32,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97d524fdb78bf6dc6d2dc4c02043e4b4962ede0a17ae3e13f0ed211a7eda5897" +[[package]] +name = "bitstream-io" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dcde5f311c85b8ca30c2e4198d4326bc342c76541590106f5fa4a50946ea499" + [[package]] name = "block-buffer" version = "0.9.0" @@ -115,9 +121,9 @@ dependencies = [ [[package]] name = "four-cc" -version = "0.1.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3958af68a31b1d1384d3f39b6aa33eb14b6009065b5ca305ddd9712a4237124f" +checksum = "795cbfc56d419a7ce47ccbb7504dd9a5b7c484c083c356e797de08bd988d9629" [[package]] name = "futures" @@ -236,11 +242,9 @@ dependencies = [ [[package]] name = "h264-reader" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd118dcc322cc71cfc33254a19ebece92cfaaf6d4b4793fec3f7f44fbc4150df" +version = "0.7.1-dev" dependencies = [ - "bitstream-io", + "bitstream-io 2.5.0", "hex-slice", "log", "memchr", @@ -369,9 +373,9 @@ dependencies = [ [[package]] name = "mp4ra-rust" -version = "0.1.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be9daf03b43bf3842962947c62ba40f411e46a58774c60838038f04a67d17626" +checksum = "fdbc3d3867085d66ac6270482e66f3dd2c5a18451a3dc9ad7269e94844a536b7" dependencies = [ "four-cc", ] @@ -384,13 +388,12 @@ checksum = "96a1fe2275b68991faded2c80aa4a33dba398b77d276038b8f50701a22e55918" [[package]] name = "nom" -version = "7.1.0" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", - "version_check", ] [[package]] @@ -536,7 +539,7 @@ name = "retina" version = "0.4.8" dependencies = [ "base64 0.21.7", - "bitstream-io", + "bitstream-io 1.5.0", "bytes", "futures", "h264-reader", @@ -568,11 +571,10 @@ dependencies = [ [[package]] name = "rfc6381-codec" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4395f46a67f0d57c57f6a5361f3a9a0c0183a19cab3998892ecdc003de6d8037" +checksum = "ed54c20f5c3ec82eab6d998b313dc75ec5d5650d4f57675e61d72489040297fd" dependencies = [ - "four-cc", "mp4ra-rust", "mpeg4-audio-const", ] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 71036c6..a89f622 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -15,6 +15,7 @@ libfuzzer-sys = "0.4" [dependencies.retina] path = ".." +features = ["unstable-h265"] # Prevent this from interfering with workspaces [workspace] @@ -26,6 +27,12 @@ path = "fuzz_targets/depacketize_h264.rs" test = false doc = false +[[bin]] +name = "depacketize_h265" +path = "fuzz_targets/depacketize_h265.rs" +test = false +doc = false + [[bin]] name = "roundtrip_h264" path = "fuzz_targets/roundtrip_h264.rs" @@ -37,3 +44,9 @@ name = "depacketize_jpeg" path = "fuzz_targets/depacketize_jpeg.rs" test = false doc = false + +[[bin]] +name = "h265_nal" +path = "fuzz_targets/h265_nal.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/depacketize_h265.rs b/fuzz/fuzz_targets/depacketize_h265.rs new file mode 100644 index 0000000..301ad25 --- /dev/null +++ b/fuzz/fuzz_targets/depacketize_h265.rs @@ -0,0 +1,60 @@ +// Copyright (C) 2021 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +#![no_main] +use libfuzzer_sys::fuzz_target; +use std::num::NonZeroU32; + +fuzz_target!(|data: &[u8]| { + let mut data = data; + let mut depacketizer = retina::codec::Depacketizer::new( + "video", "h265", 90_000, None, Some("profile-id=1;sprop-sps=QgEBAWAAAAMAsAAAAwAAAwBaoAWCAeFja5JFL83BQYFBAAADAAEAAAMADKE=;sprop-pps=RAHA8saNA7NA;sprop-vps=QAEMAf//AWAAAAMAsAAAAwAAAwBarAwAAAMABAAAAwAyqA==")).unwrap(); + let mut timestamp = retina::Timestamp::new(0, NonZeroU32::new(90_000).unwrap(), 0).unwrap(); + let mut sequence_number: u16 = 0; + let conn_ctx = retina::ConnectionContext::dummy(); + let stream_ctx = retina::StreamContext::dummy(); + let pkt_ctx = retina::PacketContext::dummy(); + loop { + let (hdr, rest) = match data.split_first() { + Some(r) => r, + None => return, + }; + let ts_change = (hdr & 0b001) != 0; + let mark = (hdr & 0b010) != 0; + let loss = (hdr & 0b100) != 0; + let len = usize::from(hdr >> 3); + if rest.len() < len { + return; + } + let (payload, rest) = rest.split_at(len); + data = rest; + if loss { + sequence_number = sequence_number.wrapping_add(1); + } + if ts_change { + timestamp = timestamp.try_add(1).unwrap(); + } + let pkt = retina::rtp::ReceivedPacketBuilder { + ctx: pkt_ctx, + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number, + loss: u16::from(loss), + payload_type: 96, + mark, + } + .build(payload.iter().copied()) + .unwrap(); + // println!("pkt: {:#?}", pkt); + if depacketizer.push(pkt).is_err() { + return; + } + while let Some(item) = depacketizer.pull(&conn_ctx, &stream_ctx).transpose() { + if item.is_err() { + return; + } + } + sequence_number = sequence_number.wrapping_add(1); + } +}); diff --git a/fuzz/fuzz_targets/h265_nal.rs b/fuzz/fuzz_targets/h265_nal.rs new file mode 100644 index 0000000..e4d19ba --- /dev/null +++ b/fuzz/fuzz_targets/h265_nal.rs @@ -0,0 +1,23 @@ +// Copyright (C) 2024 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +#![no_main] +use libfuzzer_sys::fuzz_target; + +use retina::codec::h265::nal; + +fuzz_target!(|data: &[u8]| { + let Ok((h, bits)) = nal::split(data) else { + return; + }; + + match h.unit_type() { + nal::UnitType::SpsNut => { + let _ = nal::Sps::from_bits(bits); + } + nal::UnitType::PpsNut => { + let _ = nal::Pps::from_bits(bits); + } + _ => {} + } +}); diff --git a/src/client/parse.rs b/src/client/parse.rs index 98dc483..1d24a9c 100644 --- a/src/client/parse.rs +++ b/src/client/parse.rs @@ -943,14 +943,19 @@ mod tests { assert_eq!(p.streams[0].media(), "video"); assert_eq!(p.streams[0].encoding_name(), "h265"); assert_eq!(p.streams[0].rtp_payload_type, 98); - assert!(p.streams[0].parameters().is_none()); - assert_eq!(p.streams[1].media(), "audio"); - assert_eq!(p.streams[1].encoding_name(), "pcma"); - assert_eq!(p.streams[1].rtp_payload_type, 8); - match p.streams[1].parameters().unwrap() { - ParametersRef::Audio(_) => {} - _ => panic!(), - }; + + if cfg!(feature = "unstable-h265") { + assert!(p.streams[0].parameters().is_some()); + assert_eq!(p.streams[1].media(), "audio"); + assert_eq!(p.streams[1].encoding_name(), "pcma"); + assert_eq!(p.streams[1].rtp_payload_type, 8); + match p.streams[1].parameters().unwrap() { + ParametersRef::Audio(_) => {} + _ => panic!(), + }; + } else { + assert!(p.streams[0].parameters().is_none()); + } } #[test] diff --git a/src/codec/h264.rs b/src/codec/h264.rs index d5f6956..08d3f26 100644 --- a/src/codec/h264.rs +++ b/src/codec/h264.rs @@ -1,7 +1,8 @@ // Copyright (C) 2021 Scott Lamb // SPDX-License-Identifier: MIT OR Apache-2.0 -//! [H.264](https://www.itu.int/rec/T-REC-H.264-201906-I/en)-encoded video. +//! [H.264](https://www.itu.int/rec/T-REC-H.264-201906-I/en)-encoded video, +//! with RTP encoding as in [RFC 6184](https://tools.ietf.org/html/rfc6184). use std::convert::TryFrom; use std::fmt::Write; @@ -12,6 +13,7 @@ use h264_reader::nal::{NalHeader, UnitType}; use log::{debug, log_enabled, trace}; use crate::{ + codec::h26x::TolerantBitReader, codec::write_visual_sample_entry_body, rtp::{ReceivedPacket, ReceivedPacketBuilder}, Error, Timestamp, @@ -23,9 +25,12 @@ use super::VideoFrame; /// and produces unfragmented NAL units as specified in [RFC /// 6184](https://tools.ietf.org/html/rfc6184). /// -/// This doesn't inspect the contents of the NAL units, so it doesn't depend on or -/// verify compliance with H.264 section 7.4.1.2.3 "Order of NAL units and coded -/// pictures and association to access units". +/// This inspects the contents of the NAL units only minimally, and largely for +/// logging. In particular, it doesn't completely enforce verify compliance with H.264 +/// section 7.4.1.2.3 "Order of NAL units and coded pictures and association to +/// access units". For compatibility with some broken cameras that change +/// timestamps mid-AU, it does extend AUs if they end with parameter sets. +/// See `can_end_au`. /// /// Currently expects that the stream starts at an access unit boundary unless /// packet loss is indicated. @@ -230,11 +235,10 @@ impl Depacketizer { return Err("Empty NAL".into()); } // https://tools.ietf.org/html/rfc6184#section-5.2 - let nal_header = data[0]; + let nal_header = data.get_u8(); if (nal_header >> 7) != 0 { return Err(format!("NAL header {nal_header:02x} has F bit set")); } - data.advance(1); // skip the header byte. match nal_header & 0b11111 { 1..=23 => { if access_unit.in_fu_a { @@ -242,7 +246,7 @@ impl Depacketizer { "Non-fragmented NAL {nal_header:02x} while fragment in progress" )); } - let len = u32::try_from(data.len()).expect("data len < u16::MAX") + 1; + let len = u32::try_from(data.len()).expect("data len should be <= u16::MAX") + 1; let next_piece_idx = self.add_piece(data)?; self.nals.push(Nal { hdr: NalHeader::new(nal_header).expect("header w/o F bit set is valid"), @@ -306,14 +310,13 @@ impl Depacketizer { if data.len() < 2 { return Err(format!("FU-A len {} too short", data.len())); } - let fu_header = data[0]; + let fu_header = data.get_u8(); let start = (fu_header & 0b10000000) != 0; let end = (fu_header & 0b01000000) != 0; let reserved = (fu_header & 0b00100000) != 0; let nal_header = NalHeader::new((nal_header & 0b011100000) | (fu_header & 0b00011111)) .expect("NalHeader is valid"); - data.advance(1); if (start && end) || reserved { return Err(format!("Invalid FU-A header {fu_header:02x}")); } @@ -363,7 +366,7 @@ impl Depacketizer { } } } - _ => return Err(format!("bad nal header {nal_header:02x}")), + _ => return Err(format!("unexpected/bad nal header {nal_header:02x}")), } self.input_state = if mark { let last_nal_hdr = self.nals.last().unwrap().hdr; @@ -430,7 +433,7 @@ impl Depacketizer { fn finalize_access_unit(&mut self, au: AccessUnit, reason: &str) -> Result { let mut piece_idx = 0; let mut retained_len = 0usize; - let mut is_random_access_point = false; + let mut is_random_access_point = true; let mut is_disposable = true; let mut new_sps = None; let mut new_pps = None; @@ -462,7 +465,10 @@ impl Depacketizer { new_pps = Some(to_bytes(nal.hdr, nal.len, nal_pieces)); } } - UnitType::SliceLayerWithoutPartitioningIdr => is_random_access_point = true, + UnitType::SliceDataPartitionALayer + | UnitType::SliceDataPartitionBLayer + | UnitType::SliceDataPartitionCLayer + | UnitType::SliceLayerWithoutPartitioningNonIdr => is_random_access_point = false, _ => {} } if nal.hdr.nal_ref_idc() != 0 { @@ -628,84 +634,6 @@ struct InternalParameters { seen_extra_trailing_data: bool, } -/// `h264_reader::rbsp::BitRead` impl that *notes* extra trailing data rather than failing on it. -/// -/// Some (Reolink) cameras appear to have a stray extra byte at the end. Follow the lead of most -/// other RTSP implementations in tolerating this. -#[derive(Debug)] -struct TolerantBitReader<'a, R> { - inner: R, - has_extra_trailing_data: &'a mut bool, -} - -impl<'a, R: h264_reader::rbsp::BitRead> h264_reader::rbsp::BitRead for TolerantBitReader<'a, R> { - fn read_ue(&mut self, name: &'static str) -> Result { - self.inner.read_ue(name) - } - - fn read_se(&mut self, name: &'static str) -> Result { - self.inner.read_se(name) - } - - fn read_bool(&mut self, name: &'static str) -> Result { - self.inner.read_bool(name) - } - - fn read_u8( - &mut self, - bit_count: u32, - name: &'static str, - ) -> Result { - self.inner.read_u8(bit_count, name) - } - - fn read_u16( - &mut self, - bit_count: u32, - name: &'static str, - ) -> Result { - self.inner.read_u16(bit_count, name) - } - - fn read_u32( - &mut self, - bit_count: u32, - name: &'static str, - ) -> Result { - self.inner.read_u32(bit_count, name) - } - - fn read_i32( - &mut self, - bit_count: u32, - name: &'static str, - ) -> Result { - self.inner.read_i32(bit_count, name) - } - - fn has_more_rbsp_data( - &mut self, - name: &'static str, - ) -> Result { - self.inner.has_more_rbsp_data(name) - } - - fn finish_rbsp(self) -> Result<(), h264_reader::rbsp::BitReaderError> { - match self.inner.finish_rbsp() { - Ok(()) => Ok(()), - Err(h264_reader::rbsp::BitReaderError::RemainingData) => { - *self.has_extra_trailing_data = true; - Ok(()) - } - Err(e) => Err(e), - } - } - - fn finish_sei_payload(self) -> Result<(), h264_reader::rbsp::BitReaderError> { - self.inner.finish_sei_payload() - } -} - /// Writes an `avc1` / `AVCSampleEntry` as in ISO/IEC 14496-15 section 5.4.2.1. fn make_video_sample_entry(pixel_dimensions: (u32, u32), extra_data: &[u8]) -> Option> { let pixel_dimensions = ( @@ -880,7 +808,7 @@ impl InternalParameters { /// Returns true iff the bytes of `nal` equal the bytes of `[hdr, ..data]`. fn nal_matches(nal: &[u8], hdr: NalHeader, pieces: &[Bytes]) -> bool { - if nal.is_empty() || nal[0] != u8::from(hdr) { + if nal.first() != Some(&u8::from(hdr)) { return false; } let mut nal_pos = 1; @@ -897,7 +825,7 @@ fn nal_matches(nal: &[u8], hdr: NalHeader, pieces: &[Bytes]) -> bool { nal_pos == nal.len() } -/// Saves the given NAL to a contiguous Bytes. +/// Saves the given NAL to a contiguous `Bytes``. fn to_bytes(hdr: NalHeader, len: u32, pieces: &[Bytes]) -> Bytes { let len = usize::try_from(len).expect("u32 fits in usize"); let mut out = Vec::with_capacity(len); @@ -1122,7 +1050,7 @@ enum PacketizerState { mod tests { use std::num::NonZeroU32; - use crate::testutil::init_logging; + use crate::testutil::{assert_eq_hex, init_logging}; use crate::{codec::CodecItem, rtp::ReceivedPacketBuilder}; /* @@ -1174,7 +1102,7 @@ mod tests { } } } - assert_eq!(frame.unwrap().data(), &sample.bytes); + assert_eq_hex!(frame.unwrap().data(), &sample.bytes); } } */ @@ -1276,7 +1204,7 @@ mod tests { Some(CodecItem::VideoFrame(frame)) => frame, _ => panic!(), }; - assert_eq!( + assert_eq_hex!( frame.data(), b"\x00\x00\x00\x06\x06plain\ \x00\x00\x00\x09\x06stap-a 1\ @@ -1360,7 +1288,7 @@ mod tests { Some(CodecItem::VideoFrame(frame)) => frame, o => panic!("unexpected pull result {o:#?}"), }; - assert_eq!( + assert_eq_hex!( frame.data(), b"\x00\x00\x00\x0C\x67\x64\x00\x33\xac\x15\x14\xa0\xa0\x2f\xf9\x50\ \x00\x00\x00\x04\x68\xee\x3c\xb0\ @@ -1407,7 +1335,7 @@ mod tests { Some(CodecItem::VideoFrame(frame)) => frame, o => panic!("unexpected pull result {o:#?}"), }; - assert_eq!(frame.data(), b"\x00\x00\x00\x06\x01slice"); + assert_eq_hex!(frame.data(), b"\x00\x00\x00\x06\x01slice"); assert_eq!(frame.timestamp, ts1); d.push( ReceivedPacketBuilder { @@ -1463,7 +1391,7 @@ mod tests { Some(CodecItem::VideoFrame(frame)) => frame, o => panic!("unexpected pull result {o:#?}"), }; - assert_eq!( + assert_eq_hex!( frame.data(), b"\x00\x00\x00\x0C\x67\x64\x00\x33\xac\x15\x14\xa0\xa0\x2f\xf9\x50\ \x00\x00\x00\x04\x68\xee\x3c\xb0\ diff --git a/src/codec/h265.rs b/src/codec/h265.rs new file mode 100644 index 0000000..75d47c4 --- /dev/null +++ b/src/codec/h265.rs @@ -0,0 +1,931 @@ +// Copyright (C) 2024 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! [H.265](https://www.itu.int/rec/T-REC-H.265)-encoded video, +//! with RTP encoding as in [RFC 7798](https://tools.ietf.org/html/rfc7798). + +#[doc(hidden)] // `pub` only for fuzz tests. +pub mod nal; + +mod record; + +use std::convert::TryFrom; +use std::fmt::Write; + +use base64::Engine as _; +use bytes::{Buf, Bytes}; +use log::{debug, log_enabled, trace}; + +use crate::codec::{h26x::TolerantBitReader, write_visual_sample_entry_body}; +use crate::rtp::ReceivedPacket; + +use super::VideoFrame; + +/// A [super::Depacketizer] implementation which finds access unit boundaries +/// and produces unfragmented NAL units as specified in [RFC +/// 7798](https://tools.ietf.org/html/rfc7798). +/// +/// This inspects the contents of the NAL units only minimally, and largely for +/// logging. In particular, it doesn't completely enforce verify compliance with +/// H.265 section 7.4.2.4 "Order of NAL units and association to coded pictures, +/// access units and coded video sequences". For compatibility with some broken +/// cameras that change timestamps mid-AU, it does extend AUs if they end with +/// parameter sets. See `can_end_au`. +/// +/// Currently expects that the stream starts at an access unit boundary unless +/// packet loss is indicated. +#[derive(Debug)] +pub(crate) struct Depacketizer { + input_state: DepacketizerInputState, + + /// A complete video frame ready for pull. + pending: Option, + + parameters: Option, + + /// In state `PreMark`, pieces of NALs, excluding their header bytes. + /// Kept around (empty) in other states to re-use the backing allocation. + pieces: Vec, + + /// In state `PreMark`, an entry for each NAL. + /// Kept around (empty) in other states to re-use the backing allocation. + nals: Vec, +} + +#[derive(Debug)] +struct Nal { + hdr: nal::Header, + + /// The length of `Depacketizer::pieces` as this NAL finishes. + next_piece_idx: u32, + + /// The total length of this NAL, including the 2 header bytes. + len: u32, +} + +/// An access unit that is currently being accumulated during `PreMark` state. +#[derive(Debug)] +struct AccessUnit { + start_ctx: crate::PacketContext, + end_ctx: crate::PacketContext, + timestamp: crate::Timestamp, + stream_id: usize, + + /// True iff currently processing a FU-A. + in_fu: bool, + + /// RTP packets lost as this access unit was starting. + loss: u16, + + same_ts_as_prev: bool, +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +enum DepacketizerInputState { + /// Not yet processing an access unit. + New, + + /// Ignoring the remainder of an access unit because of interior packet loss. + Loss { + timestamp: crate::Timestamp, + pkts: u16, + }, + + /// Currently processing an access unit. + /// This will be flushed after a marked packet or when receiving a later timestamp. + PreMark(AccessUnit), + + /// Finished processing the given packet. It's an error to receive the same timestamp again. + PostMark { + timestamp: crate::Timestamp, + loss: u16, + }, +} + +/// Takes a `nal::Header` from `data`, advancing the latter. +/// +/// Fails if `data` is too short or the NAL header is invalid. +fn take_hdr(data: &mut Bytes) -> Result { + let mut hdr_bytes = [0u8; 2]; + if data.len() < hdr_bytes.len() { + return Err("Short NAL".into()); + }; + data.copy_to_slice(&mut hdr_bytes); + Ok(nal::Header::try_from(hdr_bytes).map_err(|e| e.0)?) +} + +impl Depacketizer { + pub(super) fn new( + clock_rate: u32, + format_specific_params: Option<&str>, + ) -> Result { + if clock_rate != 90_000 { + return Err(format!( + "invalid H.265 clock rate {clock_rate}; must always be 90000" + )); + } + + let parameters = match format_specific_params { + None => None, + Some(fp) => match InternalParameters::parse_format_specific_params(fp) { + Ok(p) => Some(p), + Err(e) => { + log::warn!("Ignoring bad H.265 format-specific-params {:?}: {}", fp, e); + None + } + }, + }; + Ok(Depacketizer { + input_state: DepacketizerInputState::New, + pending: None, + pieces: Vec::new(), + nals: Vec::new(), + parameters, + }) + } + + pub(super) fn parameters(&self) -> Option { + self.parameters + .as_ref() + .map(|p| super::ParametersRef::Video(&p.generic_parameters)) + } + + pub(super) fn push(&mut self, pkt: ReceivedPacket) -> Result<(), String> { + // Push shouldn't be called until pull is exhausted. + if let Some(p) = self.pending.as_ref() { + panic!("push with data already pending: {p:?}"); + } + + let mut access_unit = + match std::mem::replace(&mut self.input_state, DepacketizerInputState::New) { + DepacketizerInputState::New => { + debug_assert!(self.nals.is_empty()); + debug_assert!(self.pieces.is_empty()); + AccessUnit::start(&pkt, 0, false) + } + DepacketizerInputState::PreMark(mut access_unit) => { + let loss = pkt.loss(); + if loss > 0 { + self.nals.clear(); + self.pieces.clear(); + if access_unit.timestamp.timestamp == pkt.timestamp().timestamp { + // Loss within this access unit. Ignore until mark or new timestamp. + self.input_state = if pkt.mark() { + DepacketizerInputState::PostMark { + timestamp: pkt.timestamp(), + loss, + } + } else { + self.pieces.clear(); + self.nals.clear(); + DepacketizerInputState::Loss { + timestamp: pkt.timestamp(), + pkts: loss, + } + }; + return Ok(()); + } + // A suffix of a previous access unit was lost; discard it. + // A prefix of the new one may have been lost; try parsing. + AccessUnit::start(&pkt, 0, false) + } else if access_unit.timestamp.timestamp != pkt.timestamp().timestamp { + if access_unit.in_fu { + return Err(format!( + "Timestamp changed from {} to {} in the middle of a fragmented NAL", + access_unit.timestamp, + pkt.timestamp() + )); + } + let last_nal_hdr = self + .nals + .last() + .ok_or("nals should not be empty".to_string())? + .hdr; + if can_end_au(last_nal_hdr.unit_type()) { + access_unit.end_ctx = *pkt.ctx(); + self.pending = + Some(self.finalize_access_unit(access_unit, "ts change")?); + AccessUnit::start(&pkt, 0, false) + } else { + log::debug!( + "Bogus mid-access unit timestamp change after {:?}", + last_nal_hdr + ); + access_unit.timestamp.timestamp = pkt.timestamp().timestamp; + access_unit + } + } else { + access_unit + } + } + DepacketizerInputState::PostMark { + timestamp: state_ts, + loss, + } => { + debug_assert!(self.nals.is_empty()); + debug_assert!(self.pieces.is_empty()); + AccessUnit::start(&pkt, loss, state_ts.timestamp == pkt.timestamp().timestamp) + } + DepacketizerInputState::Loss { + timestamp, + mut pkts, + } => { + debug_assert!(self.nals.is_empty()); + debug_assert!(self.pieces.is_empty()); + if pkt.timestamp().timestamp == timestamp.timestamp { + pkts += pkt.loss(); + self.input_state = DepacketizerInputState::Loss { timestamp, pkts }; + return Ok(()); + } + AccessUnit::start(&pkt, pkts, false) + } + }; + + let ctx = *pkt.ctx(); + let mark = pkt.mark(); + let loss = pkt.loss(); + let timestamp = pkt.timestamp(); + let mut data = pkt.into_payload_bytes(); + + let hdr = take_hdr(&mut data)?; + + match u8::from(hdr.unit_type()) { + 1..=47 => { + // Single NAL Unit. https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.1 + if access_unit.in_fu { + return Err(format!( + "Non-fragmented NAL {hdr:?} while fragment in progress" + )); + } + let len = u32::try_from(data.len() + 2).expect("data.len() should be <= u16::MAX"); + let next_piece_idx = self.add_piece(data)?; + self.nals.push(Nal { + hdr, + next_piece_idx, + len, + }); + } + 48 => { + // Aggregation Packet. https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.2 + loop { + if data.remaining() < 2 { + return Err(format!( + "AP has {} remaining bytes; expecting 2-byte length", + data.remaining() + )); + } + let len = data.get_u16(); + match data.remaining().cmp(&usize::from(len)) { + std::cmp::Ordering::Less => { + return Err(format!( + "AP too short: {} bytes remaining, expecting {}-byte NAL", + data.remaining(), + len + )) + } + std::cmp::Ordering::Equal => { + let hdr = take_hdr(&mut data)?; + let next_piece_idx = self.add_piece(data)?; + self.nals.push(Nal { + hdr, + next_piece_idx, + len: u32::from(len), + }); + break; + } + std::cmp::Ordering::Greater => { + let mut piece = data.split_to(usize::from(len)); + let hdr = take_hdr(&mut piece)?; + let next_piece_idx = self.add_piece(piece)?; + self.nals.push(Nal { + hdr, + next_piece_idx, + len: u32::from(len), + }); + } + } + } + } + 49 => { + // Fragmentation Unit. https://datatracker.ietf.org/doc/html/rfc7798#section-4.4.3 + if data.len() < 2 { + return Err(format!("FU len {} too short", data.len())); + } + let fu_header = data.get_u8(); + let start = (fu_header & 0b10000000) != 0; + let end = (fu_header & 0b01000000) != 0; + let fu_type = nal::UnitType::try_from(fu_header & 0b00111111) + .expect("all 6-bit ints should be valid UnitTypes"); + let hdr = hdr.with_unit_type(fu_type); + + // Note: as only `tx-mode` `SRST` is supported, there is no DONL + // field to decode. + + if start && end { + return Err(format!("Invalid FU header {fu_header:02x}")); + } + if !end && mark { + return Err("FU pkt with MARK && !END".into()); + } + let u32_len = u32::try_from(data.len()) + .map_err(|_| "RTP packet len must be < u16::MAX".to_string())?; + match (start, access_unit.in_fu) { + (true, true) => return Err("FU with start bit while frag in progress".into()), + (true, false) => { + self.add_piece(data)?; + self.nals.push(Nal { + hdr, + next_piece_idx: u32::MAX, // should be overwritten later. + len: 2 + u32_len, + }); + access_unit.in_fu = true; + } + (false, true) => { + let pieces = self.add_piece(data)?; + let nal = self + .nals + .last_mut() + .ok_or("nals non-empty while in fu".to_string())?; + if hdr != nal.hdr { + return Err(format!( + "FU has inconsistent NAL type: {:?} then {:?}", + nal.hdr, hdr, + )); + } + nal.len += u32_len; + if end { + nal.next_piece_idx = pieces; + access_unit.in_fu = false; + } else if mark { + return Err("FU has MARK and no END".into()); + } + } + (false, false) => { + if loss > 0 { + self.pieces.clear(); + self.nals.clear(); + self.input_state = DepacketizerInputState::Loss { + timestamp, + pkts: loss, + }; + return Ok(()); + } + return Err("FU has start bit unset while no frag in progress".into()); + } + } + } + _ => return Err(format!("unexpected/bad nal header {hdr:?}")), + } + + self.input_state = if mark { + let last_nal_hdr = self + .nals + .last() + .ok_or("nals should not be empty after mark".to_string())? + .hdr; + if can_end_au(last_nal_hdr.unit_type()) { + access_unit.end_ctx = ctx; + self.pending = Some(self.finalize_access_unit(access_unit, "mark")?); + DepacketizerInputState::PostMark { timestamp, loss: 0 } + } else { + log::debug!( + "Bogus mid-access unit timestamp change after {:?}", + last_nal_hdr + ); + access_unit.timestamp.timestamp = timestamp.timestamp; + DepacketizerInputState::PreMark(access_unit) + } + } else { + DepacketizerInputState::PreMark(access_unit) + }; + Ok(()) + } + + pub(super) fn pull(&mut self) -> Option { + self.pending.take().map(super::CodecItem::VideoFrame) + } + + /// Adds a piece to `self.pieces`, erroring if it becomes absurdly large. + fn add_piece(&mut self, piece: Bytes) -> Result { + self.pieces.push(piece); + u32::try_from(self.pieces.len()).map_err(|_| "more than u32::MAX pieces!".to_string()) + } + + /// Logs information about each access unit. + /// Currently, "bad" access units (violating certain specification rules) + /// are logged at debug priority, and others are logged at trace priority. + fn log_access_unit(&self, au: &AccessUnit, reason: &str) { + let mut errs = String::new(); + if au.same_ts_as_prev { + errs.push_str("\n* same timestamp as previous access unit"); + } + validate_order(&self.nals, &mut errs); + if !errs.is_empty() { + let mut nals = String::new(); + for (i, nal) in self.nals.iter().enumerate() { + let _ = write!(&mut nals, "\n {}: {:?}", i, nal.hdr); + } + debug!( + "bad access unit (ended by {}) at ts {}\nerrors are:{}\nNALs are:{}", + reason, au.timestamp, errs, nals + ); + } else if log_enabled!(log::Level::Trace) { + let mut nals = String::new(); + for (i, nal) in self.nals.iter().enumerate() { + let _ = write!(&mut nals, "\n {}: {:?}", i, nal.hdr); + } + trace!( + "access unit (ended by {}) at ts {}; NALS are:{}", + reason, + au.timestamp, + nals + ); + } + } + + fn finalize_access_unit(&mut self, au: AccessUnit, reason: &str) -> Result { + let mut piece_idx = 0; + let mut retained_len = 0usize; + + // In H.265 terms, this is an IRAP. The coded picture with + // `nuh_layer_id == 0` must have only VCLs with `nal_unit_type` in + // the range `[BLA_W_LP, RSV_IRAP_VCL23]`. + let mut is_random_access_point = false; + let is_disposable = false; + let mut new_vps = None::; + let mut new_sps = None::; + let mut new_pps = None::; + + if log_enabled!(log::Level::Debug) { + self.log_access_unit(&au, reason); + } + for nal in &self.nals { + let next_piece_idx = usize::try_from(nal.next_piece_idx).expect("u32 fits in usize"); + let nal_pieces = &self.pieces[piece_idx..next_piece_idx]; + match nal.hdr.unit_type() { + nal::UnitType::VpsNut => { + if self + .parameters + .as_ref() + .map(|p| !nal_matches(&p.vps_nal[..], nal.hdr, nal_pieces)) + .unwrap_or(true) + { + new_vps = Some(to_bytes(nal.hdr, nal.len, nal_pieces)); + } + } + nal::UnitType::SpsNut => { + if self + .parameters + .as_ref() + .map(|p| !nal_matches(&p.sps_nal[..], nal.hdr, nal_pieces)) + .unwrap_or(true) + { + new_sps = Some(to_bytes(nal.hdr, nal.len, nal_pieces)); + } + } + nal::UnitType::PpsNut => { + if self + .parameters + .as_ref() + .map(|p| !nal_matches(&p.pps_nal[..], nal.hdr, nal_pieces)) + .unwrap_or(true) + { + new_pps = Some(to_bytes(nal.hdr, nal.len, nal_pieces)); + } + } + // TODO: invert; start with rap, set to false on non-IDR. + nal::UnitType::IdrNLp // IDR_N_LP + | nal::UnitType::IdrWRadl // IDR_W_RADL + | nal::UnitType::CraNut // CRA_NUT + | nal::UnitType::BlaNLp // BLA_N_LP + | nal::UnitType::BlaWLp // BLA_W_LP + | nal::UnitType::BlaWRadl => { // BLA_W_RADL + is_random_access_point = true; + } + _ => {} + } + retained_len += 4usize + usize::try_from(nal.len).expect("u32 fits in usize"); + piece_idx = next_piece_idx; + } + let mut data = Vec::with_capacity(retained_len); + piece_idx = 0; + for nal in &self.nals { + let next_piece_idx = usize::try_from(nal.next_piece_idx).expect("u32 fits in usize"); + let nal_pieces = &self.pieces[piece_idx..next_piece_idx]; + + data.extend_from_slice(&nal.len.to_be_bytes()); + data.extend_from_slice(&nal.hdr[..]); + + let mut actual_len = 2; + for piece in nal_pieces { + data.extend_from_slice(&piece[..]); + actual_len += piece.len(); + } + debug_assert_eq!( + usize::try_from(nal.len).expect("u32 fits in usize"), + actual_len + ); + piece_idx = next_piece_idx; + } + debug_assert_eq!(retained_len, data.len()); + + self.nals.clear(); + self.pieces.clear(); + + // TODO: simpler if we require all or none to be set? + // although only one could be different. + let all_new_params = new_vps.is_some() && new_sps.is_some() && new_pps.is_some(); + let some_new_params = new_vps.is_some() || new_sps.is_some() || new_pps.is_some(); + let has_new_parameters = if all_new_params || (some_new_params && self.parameters.is_some()) + { + let old_ip = self.parameters.as_ref(); + let vps_nal = new_vps + .as_deref() + .unwrap_or_else(|| &old_ip.unwrap().vps_nal); + let sps_nal = new_sps + .as_deref() + .unwrap_or_else(|| &old_ip.unwrap().sps_nal); + let pps_nal = new_pps + .as_deref() + .unwrap_or_else(|| &old_ip.unwrap().pps_nal); + let seen_extra_trailing_data = + old_ip.map(|o| o.seen_extra_trailing_data).unwrap_or(false); + self.parameters = Some(InternalParameters::parse_vps_sps_pps( + vps_nal, + sps_nal, + pps_nal, + seen_extra_trailing_data, + )?); + true + } else { + false + }; + + Ok(VideoFrame { + has_new_parameters, + loss: au.loss, + start_ctx: au.start_ctx, + end_ctx: au.end_ctx, + timestamp: au.timestamp, + stream_id: au.stream_id, + is_random_access_point, + is_disposable, + data, + }) + } +} + +/// Returns true if we allow the given NAL unit type to end an access unit. +fn can_end_au(nal_unit_type: nal::UnitType) -> bool { + // H.265 section 7.4.2.4.4 "Order of NAL units and coded pictures and their + // association to access units" says "When any VPS NAL units, SPS NAL units, + // PPS NAL units, prefix SEI NAL units, NAL units with nal_unit_type in the + // range of RSV_NVCL41..RSV_NVCL44, or NAL units with nal_unit_type in the + // range of UNSPEC48..UNSPEC55 are present, they shall not follow the last + // VCL NAL unit of the access unit." + !matches!( + nal_unit_type, + nal::UnitType::VpsNut + | nal::UnitType::SpsNut + | nal::UnitType::PpsNut + | nal::UnitType::RsvNvcl41 + | nal::UnitType::RsvNvcl42 + | nal::UnitType::RsvNvcl43 + | nal::UnitType::RsvNvcl44 + | nal::UnitType::Unspec48 + | nal::UnitType::Unspec49 + | nal::UnitType::Unspec50 + | nal::UnitType::Unspec51 + | nal::UnitType::Unspec52 + | nal::UnitType::Unspec53 + | nal::UnitType::Unspec54 + | nal::UnitType::Unspec55 + ) +} + +impl AccessUnit { + fn start( + pkt: &crate::rtp::ReceivedPacket, + additional_loss: u16, + same_ts_as_prev: bool, + ) -> Self { + AccessUnit { + start_ctx: *pkt.ctx(), + end_ctx: *pkt.ctx(), + timestamp: pkt.timestamp(), + stream_id: pkt.stream_id(), + in_fu: false, + + // TODO: overflow? + loss: pkt.loss() + additional_loss, + same_ts_as_prev, + } + } +} + +/// Checks NAL unit type ordering against rules of H.265 section 7.4.2.4. +fn validate_order(_nals: &[Nal], _errs: &mut String) { + // TODO! +} + +#[derive(Clone, Debug)] +struct InternalParameters { + generic_parameters: super::VideoParameters, + + /// The (single) VPS NAL. + vps_nal: Bytes, + + /// The (single) SPS NAL. + sps_nal: Bytes, + + /// The (single) PPS NAL. + pps_nal: Bytes, + + seen_extra_trailing_data: bool, +} + +/// Writes an `hvc1` / `HEVCSampleEntry` as in ... +fn make_video_sample_entry(pixel_dimensions: (u32, u32), extra_data: &[u8]) -> Option> { + let pixel_dimensions = ( + u16::try_from(pixel_dimensions.0).ok()?, + u16::try_from(pixel_dimensions.1).ok()?, + ); + let mut buf = Vec::new(); + write_mp4_box!(&mut buf, b"hvc1", { + write_visual_sample_entry_body(&mut buf, pixel_dimensions); + write_mp4_box!(&mut buf, b"hvcC", { + buf.extend_from_slice(extra_data); + }); + }); + Some(buf) +} + +impl InternalParameters { + /// Parses metadata from the `format-specific-params` of a SDP `fmtp` media attribute. + fn parse_format_specific_params(format_specific_params: &str) -> Result { + let mut sps_nal = None; + let mut pps_nal = None; + let mut vps_nal = None; + for p in format_specific_params.split(';') { + match p.trim().split_once('=') { + Some(("tx-mode", "SRST")) => {} + Some(("tx-mode", v)) => { + return Err(format!("unsupported/unexpected tx-mode {v}; expected SRST")); + } + Some(("sprop-vps", v)) => Self::store_sprop_nal("sprop-vps", v, &mut vps_nal)?, + Some(("sprop-sps", v)) => Self::store_sprop_nal("sprop-sps", v, &mut sps_nal)?, + Some(("sprop-pps", v)) => Self::store_sprop_nal("sprop-pps", v, &mut pps_nal)?, + Some((_, _)) => {} + None => return Err(format!("key {p} without value")), + } + } + let vps_nal = vps_nal.ok_or_else(|| "no vps".to_string())?; + let sps_nal = sps_nal.ok_or_else(|| "no sps".to_string())?; + let pps_nal = pps_nal.ok_or_else(|| "no pps".to_string())?; + Self::parse_vps_sps_pps(&vps_nal, &sps_nal, &pps_nal, false) + } + + fn store_sprop_nal(key: &str, value: &str, out: &mut Option>) -> Result<(), String> { + let nal = base64::engine::general_purpose::STANDARD + .decode(value) + .map_err(|e| format!("bad parameter {key}: NAL has invalid base64 encoding: {e}"))?; + if nal.is_empty() { + return Err(format!("bad parameter {key}: empty NAL")); + } + if out.is_some() { + return Err(format!("multiple {key} parameters")); + } + *out = Some(nal); + Ok(()) + } + + fn parse_vps_sps_pps( + vps_nal: &[u8], + sps_nal: &[u8], + pps_nal: &[u8], + mut seen_extra_trailing_data: bool, + ) -> Result { + let (vps_h, _vps_bits) = + nal::split(vps_nal).map_err(|e| format!("failed to parse VPS: {e}"))?; + if vps_h.unit_type() != nal::UnitType::VpsNut { + return Err("VPS NAL is not VPS".into()); + } + + let (sps_h, sps_bits) = + nal::split(sps_nal).map_err(|e| format!("failed to parse SPS: {e}"))?; + if sps_h.unit_type() != nal::UnitType::SpsNut { + return Err("SPS NAL is not SPS".into()); + } + let mut sps_has_extra_trailing_data = false; + let sps_hex = crate::hex::LimitedHex::new(sps_nal, 256); + let sps_bits = TolerantBitReader { + inner: sps_bits, + has_extra_trailing_data: &mut sps_has_extra_trailing_data, + }; + let sps = nal::Sps::from_bits(sps_bits).map_err(|e| format!("failed to parse SPS: {e}"))?; + if sps_has_extra_trailing_data && !seen_extra_trailing_data { + log::warn!("Ignoring trailing data in SPS {sps_hex}; will not log about trailing data again for this stream."); + seen_extra_trailing_data = true; + } + + let (pps_h, pps_bits) = + nal::split(pps_nal).map_err(|e| format!("failed to parse PPS: {e}"))?; + if pps_h.unit_type() != nal::UnitType::PpsNut { + return Err("PPS NAL is not PPS".into()); + } + let mut pps_has_extra_trailing_data = false; + let pps_hex = crate::hex::LimitedHex::new(pps_nal, 256); + let pps_bits = TolerantBitReader { + inner: pps_bits, + has_extra_trailing_data: &mut pps_has_extra_trailing_data, + }; + let pps = nal::Pps::from_bits(pps_bits).map_err(|e| format!("failed to parse PPS: {e}"))?; + if pps_has_extra_trailing_data && !seen_extra_trailing_data { + log::warn!("Ignoring trailing data in PPS {pps_hex}; will not log about trailing data again for this stream."); + seen_extra_trailing_data = true; + } + + let rfc6381_codec = sps.rfc6381_codec(); + + let pixel_dimensions = sps.pixel_dimensions()?; + let (pixel_aspect_ratio, frame_rate); + if let Some(v) = sps.vui() { + pixel_aspect_ratio = v + .aspect_ratio() + .and_then(nal::AspectRatioInfo::get) + .map(|(v, h)| (u32::from(v), u32::from(h))); + frame_rate = v + .timing_info() + .map(|t| (t.num_units_in_tick(), t.time_scale())) + } else { + pixel_aspect_ratio = None; + frame_rate = None; + } + + let hevc_decoder_config = + record::decoder_configuration_record(pps_nal, &pps, sps_nal, &sps, vps_nal); + let sample_entry = make_video_sample_entry(pixel_dimensions, &hevc_decoder_config.record); + Ok(InternalParameters { + generic_parameters: super::VideoParameters { + rfc6381_codec, + pixel_dimensions, + pixel_aspect_ratio, + frame_rate, + extra_data: hevc_decoder_config.record, + sample_entry, + }, + vps_nal: hevc_decoder_config.vps, + sps_nal: hevc_decoder_config.sps, + pps_nal: hevc_decoder_config.pps, + seen_extra_trailing_data, + }) + } +} + +/// Returns true iff the bytes of `nal` equal the bytes of `[hdr, ..data]`. +fn nal_matches(nal: &[u8], hdr: nal::Header, pieces: &[Bytes]) -> bool { + if nal.first_chunk() != Some(&*hdr) { + return false; + } + let mut nal_pos = 2; + for piece in pieces { + let new_pos = nal_pos + piece.len(); + if nal.len() < new_pos { + return false; + } + if piece[..] != nal[nal_pos..new_pos] { + return false; + } + nal_pos = new_pos; + } + nal_pos == nal.len() +} + +/// Saves the given NAL to a contiguous `Bytes`. +fn to_bytes(hdr: nal::Header, len: u32, pieces: &[Bytes]) -> Bytes { + let len = usize::try_from(len).expect("u32 fits in usize"); + let mut out = Vec::with_capacity(len); + out.extend(&*hdr); + for piece in pieces { + out.extend_from_slice(&piece[..]); + } + debug_assert_eq!(len, out.len()); + out.into() +} + +#[cfg(test)] +mod tests { + use std::num::NonZeroU32; + + use crate::{ + codec::CodecItem, rtp::ReceivedPacketBuilder, testutil::init_logging, PacketContext, + }; + + #[test] + fn depacketize() { + init_logging(); + let mut d = super::Depacketizer::new(90_000, Some("profile-id=1;sprop-sps=QgEBAWAAAAMAsAAAAwAAAwBaoAWCAeFja5JFL83BQYFBAAADAAEAAAMADKE=;sprop-pps=RAHA8saNA7NA;sprop-vps=QAEMAf//AWAAAAMAsAAAAwAAAwBarAwAAAMABAAAAwAyqA==")).unwrap(); + let timestamp = crate::Timestamp { + timestamp: 0, + clock_rate: NonZeroU32::new(90_000).unwrap(), + start: 0, + }; + d.push( + ReceivedPacketBuilder { + // plain PREFIX_SEI packet. + ctx: PacketContext::dummy(), + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number: 0, + loss: 0, + mark: false, + payload_type: 0, + } + .build(*b"\x4e\x01plain") + .unwrap(), + ) + .unwrap(); + assert!(d.pull().is_none()); + d.push( + ReceivedPacketBuilder { + // aggregation packet. + ctx: crate::PacketContext::dummy(), + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number: 1, + loss: 0, + mark: false, + payload_type: 0, + } + // .build(*b"\x18\x00\x09\x06stap-a 1\x00\x09\x06stap-a 2") + .build(*b"\x60\x01\x00\x0a\x4e\x01stap-a 1\x00\x0a\x4e\x01stap-a 2") + .unwrap(), + ) + .unwrap(); + assert!(d.pull().is_none()); + d.push( + ReceivedPacketBuilder { + // FU packet, start. + ctx: crate::PacketContext::dummy(), + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number: 2, + loss: 0, + mark: false, + payload_type: 0, + } + .build(*b"\x62\x01\x94fu start, ") + .unwrap(), + ) + .unwrap(); + assert!(d.pull().is_none()); + d.push( + ReceivedPacketBuilder { + // FU-A packet, middle. + ctx: crate::PacketContext::dummy(), + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number: 3, + loss: 0, + mark: false, + payload_type: 0, + } + .build(*b"\x62\x01\x14fu middle, ") + .unwrap(), + ) + .unwrap(); + assert!(d.pull().is_none()); + d.push( + ReceivedPacketBuilder { + // FU-A packet, end. + ctx: crate::PacketContext::dummy(), + stream_id: 0, + timestamp, + ssrc: 0, + sequence_number: 4, + loss: 0, + mark: true, + payload_type: 0, + } + .build(*b"\x62\x01\x54fu end") + .unwrap(), + ) + .unwrap(); + let frame = match d.pull() { + Some(CodecItem::VideoFrame(frame)) => frame, + _ => panic!(), + }; + assert_eq!( + frame.data(), + b"\x00\x00\x00\x07\x4e\x01plain\ + \x00\x00\x00\x0a\x4e\x01stap-a 1\ + \x00\x00\x00\x0a\x4e\x01stap-a 2\ + \x00\x00\x00\x1d\x28\x01fu start, fu middle, fu end" + ); + } +} diff --git a/src/codec/h265/nal.rs b/src/codec/h265/nal.rs new file mode 100644 index 0000000..df38367 --- /dev/null +++ b/src/codec/h265/nal.rs @@ -0,0 +1,1187 @@ +// Copyright (C) 2024 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! H.265 NAL unit parsing. +//! +//! This is an internal module, `pub` only for the benefit of fuzz testing. +//! +//! Relevant specifications: +//! +//! * [ITU-T H.265 "High efficiency video coding"](https://www.itu.int/rec/T-REC-H.265) is the +//! main H.265 specification, including all the RBSP layouts described here. +//! * [ISO/IEC 14496-15 "Carriage of network abstraction layer (NAL) unit structured video in the ISO base media file format"](https://www.iso.org/standard/68933.html) +//! defines the format of the RFC 6381 codec ID. I have been unable to +//! find a legal, free copy of the finalized document. However, I believe the relevant parts are +//! unchanged since [this working draft](https://mpeg.chiariglione.org/standards/mpeg-4/carriage-nal-unit-structured-video-iso-base-media-file-format/wd-isoiec-14496). + +use h264_reader::rbsp::{BitRead, BitReaderError}; + +/// Whether a unit type is VCL or non-VCL, as defined in T.REC H.265 Table 7-1. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum UnitTypeClass { + Vcl { intra_coded: bool }, + NonVcl, +} + +/// NAL unit type, as in T.REC H.265 Table 7-1. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[repr(u8)] +pub enum UnitType { + TrailN = 0, + TrailR = 1, + TsaN = 2, + TsaR = 3, + StsaN = 4, + StsaR = 5, + RadlN = 6, + RadlR = 7, + RaslN = 8, + RaslR = 9, + RsvVclN10 = 10, + RsvVclR11 = 11, + RsvVclN12 = 12, + RsvVclR13 = 13, + RsvVclN14 = 14, + RsvVclR15 = 15, + BlaWLp = 16, + BlaWRadl = 17, + BlaNLp = 18, + IdrWRadl = 19, + IdrNLp = 20, + CraNut = 21, + RsvIrapVcl22 = 22, + RsvIrapVcl23 = 23, + RsvVcl24 = 24, + RsvVcl25 = 25, + RsvVcl26 = 26, + RsvVcl27 = 27, + RsvVcl28 = 28, + RsvVcl29 = 29, + RsvVcl30 = 30, + RsvVcl31 = 31, + VpsNut = 32, + SpsNut = 33, + PpsNut = 34, + AudNut = 35, + EosNut = 36, + EobNut = 37, + FdNut = 38, + PrefixSeiNut = 39, + SuffixSeiNut = 40, + RsvNvcl41 = 41, + RsvNvcl42 = 42, + RsvNvcl43 = 43, + RsvNvcl44 = 44, + RsvNvcl45 = 45, + RsvNvcl46 = 46, + RsvNvcl47 = 47, + Unspec48 = 48, + Unspec49 = 49, + Unspec50 = 50, + Unspec51 = 51, + Unspec52 = 52, + Unspec53 = 53, + Unspec54 = 54, + Unspec55 = 55, + Unspec56 = 56, + Unspec57 = 57, + Unspec58 = 58, + Unspec59 = 59, + Unspec60 = 60, + Unspec61 = 61, + Unspec62 = 62, + Unspec63 = 63, +} + +impl UnitType { + pub fn unit_type_class(self) -> UnitTypeClass { + match self { + UnitType::TrailN + | UnitType::TrailR + | UnitType::TsaN + | UnitType::TsaR + | UnitType::StsaN + | UnitType::StsaR + | UnitType::RadlN + | UnitType::RadlR + | UnitType::RaslN + | UnitType::RaslR + | UnitType::RsvVclN10 + | UnitType::RsvVclR11 + | UnitType::RsvVclN12 + | UnitType::RsvVclR13 + | UnitType::RsvVclN14 + | UnitType::RsvVclR15 + | UnitType::BlaWLp + | UnitType::BlaWRadl + | UnitType::BlaNLp + | UnitType::CraNut + | UnitType::RsvIrapVcl22 + | UnitType::RsvIrapVcl23 + | UnitType::RsvVcl24 + | UnitType::RsvVcl25 + | UnitType::RsvVcl26 + | UnitType::RsvVcl27 + | UnitType::RsvVcl28 + | UnitType::RsvVcl29 + | UnitType::RsvVcl30 + | UnitType::RsvVcl31 => UnitTypeClass::Vcl { intra_coded: false }, + UnitType::IdrWRadl | UnitType::IdrNLp => UnitTypeClass::Vcl { intra_coded: true }, + _ => UnitTypeClass::NonVcl, + } + } +} + +impl TryFrom for UnitType { + type Error = Error; + + fn try_from(value: u8) -> Result { + if value > 63 { + return Err(Error(format!("NAL 0x{:02X} is out of range", value))); + } + + // SAFETY: `UnitType` is `repr(u8)` and C-like; `value` is in range. + Ok(unsafe { std::mem::transmute(value) }) + } +} + +impl From for u8 { + fn from(t: UnitType) -> u8 { + // SAFETY: `UnitType` is `repr(u8)` and C-like. + unsafe { std::mem::transmute(t) } + } +} + +/// `nal_unit_header` as in T.REC H.265 section 7.3.1.2. +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Header([u8; 2]); + +impl std::fmt::Debug for Header { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Header") + .field("unit_type", &self.unit_type()) + .field("nuh_layer_id", &self.nuh_layer_id()) + .field("nuh_temporal_id_plus1", &self.nuh_temporal_id_plus1()) + .finish() + } +} + +impl TryFrom<[u8; 2]> for Header { + type Error = Error; + + fn try_from(value: [u8; 2]) -> Result { + if (value[0] & 0b1000_0000) != 0 { + return Err(Error(format!( + "forbidden zero bit is set in NAL header 0x{:02X}{:02X}", + value[0], value[1] + ))); + } + if (value[1] & 0b111) == 0 { + return Err(Error(format!( + "zero temporal_id_plus1 in NAL header 0x{:02X}{:02X}", + value[0], value[1] + ))); + } + Ok(Self(value)) + } +} + +impl std::ops::Deref for Header { + type Target = [u8; 2]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Header { + /// Returns a new header of the given unit type. + pub fn with_unit_type(self, t: UnitType) -> Self { + let mut out = self.0; + out[0] = (out[0] & 0b1000_0001) | (u8::from(t) << 1); + Self(out) + } + + /// The NAL unit type. + pub fn unit_type(self) -> UnitType { + UnitType::try_from(self.0[0] >> 1).expect("6-bit value must be valid NAL type") + } + + /// The `nul_layer_id`, as a 6-bit value. + pub fn nuh_layer_id(self) -> u8 { + (self.0[0] & 0b1) << 5 | (self.0[1] >> 3) + } + + /// The `num_temporal_id_plus1`, as a non-zero 3-bit value. + pub fn nuh_temporal_id_plus1(self) -> u8 { + self.0[1] & 0b111 + } +} + +/// Splits a NAL unit into the header and a `BitReader` that can be used with +/// the respective NAL type's `from_bits` method. +pub fn split<'n>(nal: &'n [u8]) -> Result<(Header, impl BitRead + 'n), Error> { + let Some((hdr_bytes, rest)) = nal.split_first_chunk::<2>() else { + return Err(Error("NAL unit too short".to_owned())); + }; + let header = Header::try_from(*hdr_bytes)?; + let bytes = h264_reader::rbsp::ByteReader::without_skip(rest); + let bits = h264_reader::rbsp::BitReader::new(bytes); + Ok((header, bits)) +} + +#[derive(Debug)] +pub struct Error(pub(crate) String); + +impl From for Error { + fn from(e: BitReaderError) -> Self { + Error(format!("{:?}", e)) + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::error::Error for Error {} + +// T.REC H.265 section 7.3.2.2 +#[derive(Debug)] +pub struct Sps { + sps_max_sub_layers_minus1: u8, + sps_temporal_id_nesting_flag: bool, + profile_tier_level: ProfileTierLevel, + chroma_format_idc: u8, + pic_width_in_luma_samples: u32, + pic_height_in_luma_samples: u32, + conformance_window: Option, + bit_depth_luma_minus8: u8, + bit_depth_chroma_minus8: u8, + vui: Option, +} + +impl Sps { + pub fn from_bits(mut r: R) -> Result { + // See T.REC H.265 section 7.3.2.2.1, seq_parameter_set_rbsp. + r.skip(4, "sps_video_parameter_set_id")?; + let sps_max_sub_layers_minus1: u8 = r.read(3, "sps_max_sub_layers_minus1")?; + if sps_max_sub_layers_minus1 > 6 { + return Err(Error( + "sps_max_sub_layers_minus1 must be in [0, 6]".to_owned(), + )); + } + let sps_temporal_id_nesting_flag = r.read_bool("sps_temporal_id_nesting_flag")?; + let profile_tier_level = + ProfileTierLevel::from_bits(&mut r, true, sps_max_sub_layers_minus1)?; + let _ = r.read_ue("sps_seq_parameter_set_id")?; + let chroma_format_idc = r.read_ue("chroma_format_idc")?; + if chroma_format_idc > 3 { + return Err(Error("chroma_format_idc must be in [0, 3]".to_owned())); + } + let chroma_format_idc = chroma_format_idc as u8; + let _ = chroma_format_idc == 3 && r.read_bool("separate_colour_plane_flag")?; + let pic_width_in_luma_samples = r.read_ue("pic_width_in_luma_samples")?; + let pic_height_in_luma_samples = r.read_ue("pic_height_in_luma_samples")?; + let conformance_window = if r.read_bool("conformance_window_flag")? { + Some(ConformanceWindow::from_bits(&mut r)?) + } else { + None + }; + let bit_depth_luma_minus8 = r.read_ue("bit_depth_luma_minus8")?; + if bit_depth_luma_minus8 > 8 { + return Err(Error("bit_depth_luma_minus8 must be in [0, 8]".to_owned())); + } + let bit_depth_luma_minus8 = bit_depth_luma_minus8 as u8; + let bit_depth_chroma_minus8 = r.read_ue("bit_depth_chroma_minus8")?; + if bit_depth_chroma_minus8 > 8 { + return Err(Error( + "bit_depth_chroma_minus8 must be in [0, 8]".to_owned(), + )); + } + let bit_depth_chroma_minus8 = bit_depth_chroma_minus8 as u8; + let log2_max_pic_order_cnt_lsb_minus4 = r.read_ue("log2_max_pic_order_cnt_lsb_minus4")?; + let sps_sub_layer_ordering_info_present_flag = + r.read_bool("sps_sub_layer_ordering_info_present_flag")?; + if sps_sub_layer_ordering_info_present_flag { + for _ in 0..=sps_max_sub_layers_minus1 { + let _sps_max_dec_pic_buffering_minus1 = + r.read_ue("sps_max_dec_pic_buffering_minus1")?; + let _sps_max_num_reorder_pics = r.read_ue("sps_max_num_reorder_pics")?; + let _sps_max_latency_increase_plus1 = + r.read_ue("sps_max_latency_increase_plus1")?; + } + } + let _ = r.read_ue("log2_min_luma_coding_block_size_minus3")?; + let _ = r.read_ue("log2_diff_max_min_luma_coding_block_size")?; + let _ = r.read_ue("log2_min_luma_transform_block_size_minus2")?; + let _ = r.read_ue("log2_diff_max_min_luma_transform_block_size")?; + let _ = r.read_ue("max_transform_hierarchy_depth_inter")?; + let _ = r.read_ue("max_transform_hierarchy_depth_intra")?; + let scaling_list_enabled_flag = r.read_bool("scaling_list_enabled_flag")?; + if scaling_list_enabled_flag { + let sps_scaling_list_data_present_flag = + r.read_bool("sps_scaling_list_data_present_flag")?; + if sps_scaling_list_data_present_flag { + let _scaling_list_data = ScalingListData::from_bits(&mut r)?; + } + } + let _ = r.read_bool("amp_enabled_flag")?; + let _ = r.read_bool("sample_adaptive_offset_enabled_flag")?; + let pcm_enabled_flag = r.read_bool("pcm_enabled_flag")?; + if pcm_enabled_flag { + r.skip(4, "pcm_sample_bit_depth_luma_minus1")?; + r.skip(4, "pcm_sample_bit_depth_chroma_minus1")?; + let _log2_min_pcm_luma_coding_block_size_minus3 = + r.read_ue("log2_min_pcm_luma_coding_block_size_minus3")?; + let _log2_diff_max_min_pcm_luma_coding_block_size = + r.read_ue("log2_diff_max_min_pcm_luma_coding_block_size")?; + let _pcm_loop_filter_disabled_flag = r.read_bool("pcm_loop_filter_disabled_flag")?; + } + let num_short_term_ref_pic_sets = r.read_ue("num_short_term_ref_pic_sets")?; + for i in 0..num_short_term_ref_pic_sets as usize { + let _short_term_ref_pic_set = ShortTermRefPicSet::from_bits(&mut r, i)?; + } + let long_term_ref_pics_present_flag = r.read_bool("long_term_ref_pics_present_flag")?; + if long_term_ref_pics_present_flag { + let num_long_term_ref_pics_sps = r.read_ue("num_long_term_ref_pics_sps")?; + for _i in 0..num_long_term_ref_pics_sps { + // XXX: right bit count? + r.skip( + log2_max_pic_order_cnt_lsb_minus4 + 4, + "lt_ref_pic_poc_lsb_sps", + )?; + let _used_by_curr_pic_lt_sps_flag = r.read_bool("used_by_curr_pic_lt_sps_flag")?; + } + } + let _ = r.read_bool("sps_temporal_mvp_enabled_flag")?; + let _ = r.read_bool("strong_intra_smoothing_enabled_flag")?; + let vui = if r.read_bool("vui_parameters_present_flag")? { + Some(VuiParameters::from_bits(&mut r)?) + } else { + None + }; + let sps_extension_flag = r.read_bool("sps_extension_flag")?; + if sps_extension_flag { + let sps_range_extension_flag = r.read_bool("sps_range_extension_flag")?; + let sps_multilayer_extension_flag = r.read_bool("sps_multilayer_extension_flag")?; + let sps_3d_extension_flag = r.read_bool("sps_3d_extension_flag")?; + let sps_scc_extension_flag = r.read_bool("sps_scc_extension_flag")?; + let sps_extension_4bits: u8 = r.read(4, "sps_extension_4bits")?; + if sps_range_extension_flag { + // H.265 section 7.3.2.2.2, `sps_range_extension`. + r.skip(9, "sps_range_extension")?; + } + if sps_multilayer_extension_flag { + // H.265 section F.7.3.2.2.4, `sps_multilayer_extension`. + r.skip(1, "inter_view_mv_vert_constraint_flag")?; + } + if sps_3d_extension_flag { + // d == 0 + r.skip(1, "iv_di_mc_enabled_flag")?; + r.skip(1, "iv_mv_scal_enabled_flag")?; + let _ = r.read_ue("log2_ivmc_sub_pb_size_minus3")?; + r.skip(1, "iv_res_pred_enabled_flag")?; + r.skip(1, "depth_ref_enabled_flag")?; + r.skip(1, "vsp_mc_enabled_flag")?; + r.skip(1, "dbbp_enabled_flag")?; + + // d == 1 + r.skip(1, "tex_mc_enabled_flag")?; + let _ = r.read_ue("log2_texmc_sub_pb_size_minus3")?; + r.skip(1, "intra_contour_enabled_flag")?; + r.skip(1, "intra_dc_only_wedge_enabled_flag")?; + r.skip(1, "cqt_cu_part_pred_enabled_flag")?; + r.skip(1, "inter_dc_only_enabled_flag")?; + r.skip(1, "skip_intra_enabled_flag")?; + } + if sps_scc_extension_flag { + // H.265 section 7.3.2.2.3, `sps_scc_extension`. + r.skip(1, "sps_curr_pic_ref_enabled_flag")?; + if r.read_bool("palette_mode_enabled_flag")? { + let _ = r.read_ue("palette_max_size"); + let _ = r.read_ue("delta_palette_max_predictor_size")?; + if r.read_bool("sps_palette_predictor_initializers_present_flag")? { + let _ = r.read_ue("sps_num_palette_predictor_initializers_minus1")?; + } + } + } + if sps_extension_4bits != 0 { + return Err(Error("sps_extension_4bits unimplemented".to_owned())); + } + } + r.finish_rbsp()?; + Ok(Self { + sps_max_sub_layers_minus1, + sps_temporal_id_nesting_flag, + profile_tier_level, + chroma_format_idc, + pic_width_in_luma_samples, + pic_height_in_luma_samples, + conformance_window, + bit_depth_luma_minus8, + bit_depth_chroma_minus8, + vui, + }) + } + + pub(crate) fn profile(&self) -> &Profile { + self.profile_tier_level + .profile + .as_ref() + .expect("profile must be set on sps") + } + + pub(crate) fn general_level_idc(&self) -> u8 { + self.profile_tier_level.general_level_idc + } + + /// The maximum sub layers, in the range [1, 7]. + pub fn max_sub_layers(&self) -> u8 { + self.sps_max_sub_layers_minus1 + 1 + } + + pub fn temporal_id_nesting_flag(&self) -> bool { + self.sps_temporal_id_nesting_flag + } + + pub fn vui(&self) -> Option<&VuiParameters> { + self.vui.as_ref() + } + + /// Returns the pixel dimensions `(width, height)`, unless the conformance + /// cropping window is larger than the picture. + pub fn pixel_dimensions(&self) -> Result<(u32, u32), String> { + let mut width = self.pic_width_in_luma_samples; + let mut height = self.pic_height_in_luma_samples; + if let Some(ref c) = self.conformance_window { + // Subtract out the conformance window, which is specified in + // *chroma* samples. + let width_shift = (self.chroma_format_idc == 1 || self.chroma_format_idc == 2) as u32; + let height_shift = (self.chroma_format_idc == 1) as u32; + let sub_width = c + .left_offset + .checked_add(c.right_offset) + .and_then(|x| x.checked_shl(width_shift)) + .ok_or("bad conformance window")?; + let sub_height = c + .top_offset + .checked_add(c.bottom_offset) + .and_then(|x| x.checked_shl(height_shift)) + .ok_or("bad conformance window")?; + width = width + .checked_sub(sub_width) + .ok_or("bad conformance window")?; + height = height + .checked_sub(sub_height) + .ok_or("bad conformance window")?; + } + Ok((width, height)) + } + + pub fn rfc6381_codec(&self) -> String { + let profile = self.profile(); + + // See ISO/IEC 14496-15, or the working draft mentioned here: + // . + // Section E.3. + + // > When the first element of a value is a code indicating a codec from + // > the High Efficiency Video Coding specification (ISO/IEC 23008-2), + // > as documented in clause 8 (such as 'hev1', 'hev2', 'hvc1', 'hvc2', + // > 'shv1' or 'shc1'), the elements following are a series of values + // > from the HEVC or SHVC decoder configuration record, separated by + // > period characters (“.”). In all numeric encodings, leading zeroes + // > may be omitted, + + // > 1. the general_profile_space, encoded as no character + // > (general_profile_space == 0), or ‘A’, ‘B’, ‘C’ for + // > general_profile_space 1, 2, 3, followed by the general_profile_idc + // > encoded as a decimal number; + let general_profile_space = match profile.general_profile_space() { + 0 => "", + 1 => "A", + 2 => "B", + 3 => "C", + _ => unreachable!("profile_space is 2 bits"), + }; + let general_profile_idc = profile.general_profile_idc(); + + // > 2. the general_profile_compatibility_flags, encoded in hexadecimal + // > (leading zeroes may be omitted); + let general_profile_compatibility_flags = profile.general_profile_compatibility_flags(); + + // > 3. the general_tier_flag, encoded as ‘L’ (general_tier_flag==0) or + // > ‘H’ (general_tier_flag==1), followed by the general_level_idc, + // > encoded as a decimal number; + let general_tier_flag = match profile.general_tier_flag() { + true => "H", + false => "L", + }; + let general_level_idc = self.profile_tier_level.general_level_idc; + let mut out = format!("hvc1.{general_profile_space}{general_profile_idc}.{general_profile_compatibility_flags:02X}.{general_tier_flag}{general_level_idc}"); + + // > 4. each of the 6 bytes of the constraint flags, starting from the + // byte containing the general_progressive_source_flag, each encoded + // encoded as a hexadecimal number, and the encoding of each byte + // separated by a period; trailing bytes that are zero may be + // omitted. + let mut general_constraint_indicator_flags = + &profile.general_constraint_indicator_flags()[..]; + while let [head @ .., 0] = general_constraint_indicator_flags { + // XXX: this `if` is probably unnecessary? + if head.is_empty() { + break; + } + general_constraint_indicator_flags = head; + } + use std::fmt::Write as _; + for b in general_constraint_indicator_flags { + write!(&mut out, ".{b:02X}").expect("write to String should succeed"); + } + out + } + + pub(crate) fn chroma_format_idc(&self) -> u8 { + self.chroma_format_idc + } + + pub(crate) fn bit_depth_luma_minus8(&self) -> u8 { + self.bit_depth_luma_minus8 + } + + pub(crate) fn bit_depth_chroma_minus8(&self) -> u8 { + self.bit_depth_chroma_minus8 + } +} + +/// Conformance cropping window, in luma samples. +#[derive(Debug)] +pub struct ConformanceWindow { + pub left_offset: u32, + pub right_offset: u32, + pub top_offset: u32, + pub bottom_offset: u32, +} + +impl ConformanceWindow { + pub fn from_bits(r: &mut R) -> Result { + let left_offset = r.read_ue("left_offset")?; + let right_offset = r.read_ue("right_offset")?; + let top_offset = r.read_ue("top_offset")?; + let bottom_offset = r.read_ue("bottom_offset")?; + Ok(Self { + left_offset, + right_offset, + top_offset, + bottom_offset, + }) + } +} + +/// H.265 section 7.3.3, `profile_tier_level`, `if( profilePresentFlag )` block. +#[derive(Debug)] +pub struct Profile(pub [u8; 11]); + +impl Profile { + pub fn from_bits(r: &mut R) -> Result { + Ok(Profile(r.read_to("profile")?)) + } + + #[inline] + pub fn general_profile_space(&self) -> u8 { + self.0[0] >> 6 + } + + /// Returns the `general_profile_compatibility_flags` as defined in ISO/IEC 14496-15 section 8.3.3.1.3: + /// "`general_profile_compatibility_flag[ i ]`` for i from 0 to 31, inclusive". + #[inline] + pub fn general_profile_compatibility_flags(&self) -> u32 { + u32::from_be_bytes([self.0[1], self.0[2], self.0[3], self.0[4]]) + } + + /// Returns the `general_constraint_indicator_flags` as defined in ISO/IEC 14496-15 section 8.3.3.1.3: + /// "the 6 bytes starting with the byte containing the `general_progressive_source_flag`". + #[inline] + pub fn general_constraint_indicator_flags(&self) -> &[u8; 6] { + self.0[5..11].try_into().expect("6 bytes") + } + + #[inline] + pub fn general_tier_flag(&self) -> bool { + (self.0[0] & 0b0010_0000) != 0 + } + + #[inline] + pub fn general_profile_idc(&self) -> u8 { + self.0[0] & 0b0001_1111 + } +} + +/// H.265 section 7.3.3. +#[derive(Debug)] +pub struct ProfileTierLevel { + profile: Option, + general_level_idc: u8, +} + +impl ProfileTierLevel { + pub fn from_bits( + r: &mut R, + profile_present_flag: bool, + sps_max_sub_layers_minus1: u8, + ) -> Result { + // See H.265 section 7.3.3, profile_tier_level( 1, sps_max_sub_layers_minus1 ). + let profile = if profile_present_flag { + Some(Profile::from_bits(r)?) + } else { + None + }; + let general_level_idc: u8 = r.read(8, "general_level_idc")?; + if sps_max_sub_layers_minus1 > 0 { + let sub_layer_presence_flags: u16 = r.read(16, "sub_layer_presence_flags")?; + for i in 0..sps_max_sub_layers_minus1 { + // TODO: check endianness here. + let sub_layer_profile_present_flag = sub_layer_presence_flags & (1 << (2 * i)) != 0; + let sub_layer_level_present_flag = + sub_layer_presence_flags & (1 << (2 * i + 1)) != 0; + if sub_layer_profile_present_flag { + r.skip(2, "sub_layer_profile_space")?; + r.skip(1, "sub_layer_tier_flag")?; + r.skip(5, "sub_layer_profile_idc")?; + r.skip(32, "sub_layer_profile_compatibility_flags")?; + r.skip(1, "sub_layer_progressive_source_flag")?; + r.skip(1, "sub_layer_interlaced_source_flag")?; + r.skip(1, "sub_layer_non_packed_constraint_flag")?; + r.skip(1, "sub_layer_frame_only_constraint_flag")?; + } + r.skip(44, "stuff")?; + if sub_layer_level_present_flag { + r.skip(8, "sub_layer_level_idc")?; + } + } + } + Ok(Self { + profile, + general_level_idc, + }) + } +} + +// H.265 section 7.3.2.3. +#[derive(Debug)] +pub struct Pps { + tiles_enabled_flag: bool, + entropy_coding_sync_enabled_flag: bool, +} + +impl Pps { + pub fn from_bits(mut r: R) -> Result { + let _pps_pic_parameter_set_id = r.read_ue("pps_pic_parameter_set_id")?; + let _pps_seq_parameter_set_id = r.read_ue("pps_seq_parameter_set_id")?; + let _dependent_slice_segments_enabled_flag = + r.read_bool("dependent_slice_segments_enabled_flag")?; + let _output_flag_present_flag = r.read_bool("output_flag_present_flag")?; + let _num_extra_slice_header_bits: u8 = r.read(3, "num_extra_slice_header_bits")?; + let _sign_data_hiding_enabled_flag = r.read_bool("sign_data_hiding_enabled_flag")?; + let _cabac_init_present_flag = r.read_bool("cabac_init_present_flag")?; + let _num_ref_idx_l0_default_active_minus1 = + r.read_ue("num_ref_idx_l0_default_active_minus1")?; + let _num_ref_idx_l1_default_active_minus1 = + r.read_ue("num_ref_idx_l1_default_active_minus1")?; + let _init_qp_minus26 = r.read_se("init_qp_minus26")?; + let _constrained_intra_pred_flag = r.read_bool("constrained_intra_pred_flag")?; + let _transform_skip_enabled_flag = r.read_bool("transform_skip_enabled_flag")?; + let cu_qp_delta_enabled_flag = r.read_bool("cu_qp_delta_enabled_flag")?; + if cu_qp_delta_enabled_flag { + let _diff_cu_qp_delta_depth = r.read_ue("diff_cu_qp_delta_depth")?; + } + let _pps_cb_qp_offset = r.read_se("pps_cb_qp_offset")?; + let _pps_cr_qp_offset = r.read_se("pps_cr_qp_offset")?; + let _pps_slice_chroma_qp_offsets_present_flag = + r.read_bool("pps_slice_chroma_qp_offsets_present_flag")?; + let _weighted_pred_flag = r.read_bool("weighted_pred_flag")?; + let _weighted_bipred_flag = r.read_bool("weighted_bipred_flag")?; + let _transquant_bypass_enabled_flag = r.read_bool("transquant_bypass_enabled_flag")?; + let tiles_enabled_flag = r.read_bool("tiles_enabled_flag")?; + let entropy_coding_sync_enabled_flag = r.read_bool("entropy_coding_sync_enabled_flag")?; + if tiles_enabled_flag { + let _num_tile_columns_minus1 = r.read_ue("num_tile_columns_minus1")?; + let _num_tile_rows_minus1 = r.read_ue("num_tile_rows_minus1")?; + let uniform_spacing_flag = r.read_bool("uniform_spacing_flag")?; + if !uniform_spacing_flag { + for _i in 0..=_num_tile_columns_minus1 { + let _column_width_minus1 = r.read_ue("column_width_minus1")?; + } + for _i in 0..=_num_tile_rows_minus1 { + let _row_height_minus1 = r.read_ue("row_height_minus1")?; + } + } + let _loop_filter_across_tiles_enabled_flag = + r.read_bool("loop_filter_across_tiles_enabled_flag")?; + } + let _pps_loop_filter_across_slices_enabled_flag = + r.read_bool("pps_loop_filter_across_slices_enabled_flag")?; + let deblocking_filter_control_present_flag = + r.read_bool("deblocking_filter_control_present_flag")?; + if deblocking_filter_control_present_flag { + let _deblocking_filter_override_enabled_flag = + r.read_bool("deblocking_filter_override_enabled_flag")?; + let pps_deblocking_filter_disabled_flag = + r.read_bool("pps_deblocking_filter_disabled_flag")?; + if !pps_deblocking_filter_disabled_flag { + let _pps_beta_offset_div2 = r.read_se("pps_beta_offset_div2")?; + let _pps_tc_offset_div2 = r.read_se("pps_tc_offset_div2")?; + } + } + let pps_scaling_list_data_present_flag = + r.read_bool("pps_scaling_list_data_present_flag")?; + if pps_scaling_list_data_present_flag { + return Err(Error( + "pps_scaling_list_data_present unimplemented".to_owned(), + )); + } + let _lists_modification_present_flag = r.read_bool("lists_modification_present_flag")?; + let _log2_parallel_merge_level_minus2 = r.read_ue("log2_parallel_merge_level_minus2")?; + let _slice_segment_header_extension_present_flag = + r.read_bool("slice_segment_header_extension_present_flag")?; + let pps_extension_present_flag = r.read_bool("pps_extension_present_flag")?; + if pps_extension_present_flag { + let pps_range_extension_flag = r.read_bool("pps_range_extension_flag")?; + let pps_multilayer_extension_flag = r.read_bool("pps_multilayer_extension_flag")?; + let pps_3d_extension_flag = r.read_bool("pps_3d_extension_flag")?; + let pps_scc_extension_flag = r.read_bool("pps_scc_extension_flag")?; + let pps_extension_4bits: u8 = r.read(4, "pps_extension_4bits")?; + if pps_range_extension_flag { + return Err(Error("pps_range_extension_flag unimplemented".to_owned())); + } + if pps_multilayer_extension_flag { + return Err(Error( + "pps_multilayer_extension_flag unimplemented".to_owned(), + )); + } + if pps_3d_extension_flag { + return Err(Error("pps_3d_extension_flag unimplemented".to_owned())); + } + if pps_scc_extension_flag { + return Err(Error("pps_scc_extension_flag unimplemented".to_owned())); + } + if pps_extension_4bits != 0 { + return Err(Error("pps_extension_4bits unimplemented".to_owned())); + } + } + r.finish_rbsp()?; + Ok(Self { + tiles_enabled_flag, + entropy_coding_sync_enabled_flag, + }) + } + + pub(crate) fn entropy_coding_sync_enabled_flag(&self) -> bool { + self.entropy_coding_sync_enabled_flag + } + + pub(crate) fn tiles_enabled_flag(&self) -> bool { + self.tiles_enabled_flag + } +} + +/// T.REC H.265 section 7.3.4, `scaling_list_data`. +#[derive(Debug)] +pub struct ScalingListData {} + +impl ScalingListData { + pub fn from_bits(r: &mut R) -> Result { + for size_id in 0..4 { + let num_matrices = if size_id == 3 { 2 } else { 6 }; + for _ in 0..num_matrices { + if !r.read_bool("scaling_list_pred_mode_flag")? { + let _ = r.read_ue("scaling_list_pred_matrix_id_delta")?; + } else { + let coef_num = std::cmp::min(64, 1 << (4 + size_id << 1)); + if size_id > 1 { + let _ = r.read_se("scaling_list_dc_coef_minus8")?; + } + for _ in 0..coef_num { + let _ = r.read_se("scaling_list_delta_coef"); + } + } + } + } + Ok(Self {}) + } +} + +/// T.REC H.265 section 7.3.7. +#[derive(Debug)] +pub struct ShortTermRefPicSet {} + +impl ShortTermRefPicSet { + pub fn from_bits(r: &mut R, st_rps_idx: usize) -> Result { + // See T.REC H.265 section 7.3.7, st_ref_pic_set. + let inter_ref_pic_set_prediction_flag = + st_rps_idx != 0 && r.read_bool("inter_ref_pic_set_prediction_flag")?; + if inter_ref_pic_set_prediction_flag { + return Err(Error( + "inter_ref_pic_set_prediction_flag unimplemented".to_owned(), + )); + } else { + let num_negative_pics = r.read_ue("num_negative_pics")?; + let num_positive_pics = r.read_ue("num_positive_pics")?; + for _i in 0..num_negative_pics { + let _delta_poc_s0_minus1 = r.read_ue("delta_poc_s0_minus1")?; + let _used_by_curr_pic_s0_flag = r.read_bool("used_by_curr_pic_s0_flag")?; + } + for _i in 0..num_positive_pics { + let _delta_poc_s1_minus1 = r.read_ue("delta_poc_s1_minus1")?; + let _used_by_curr_pic_s1_flag = r.read_bool("used_by_curr_pic_s1_flag")?; + } + } + Ok(Self {}) + } +} + +/// Aspect ratio information. +// This is copied from `h264_reader`; the H.264 and H.265 formats are +// apparently identical. Licenses are compatible. Copying seems safer in case +// the formats diverge in future specifications, and in any case +// `h264_reader::nal::sps::AspectRatioInfo::read` is private at present. +#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] +pub enum AspectRatioInfo { + #[default] + Unspecified, + Ratio1_1, + Ratio12_11, + Ratio10_11, + Ratio16_11, + Ratio40_33, + Ratio24_11, + Ratio20_11, + Ratio32_11, + Ratio80_33, + Ratio18_11, + Ratio15_11, + Ratio64_33, + Ratio160_99, + Ratio4_3, + Ratio3_2, + Ratio2_1, + Reserved(u8), + Extended(u16, u16), +} +impl AspectRatioInfo { + fn from_bits(r: &mut R) -> Result, BitReaderError> { + let aspect_ratio_info_present_flag = r.read_bool("aspect_ratio_info_present_flag")?; + Ok(if aspect_ratio_info_present_flag { + let aspect_ratio_idc = r.read(8, "aspect_ratio_idc")?; + Some(match aspect_ratio_idc { + 0 => AspectRatioInfo::Unspecified, + 1 => AspectRatioInfo::Ratio1_1, + 2 => AspectRatioInfo::Ratio12_11, + 3 => AspectRatioInfo::Ratio10_11, + 4 => AspectRatioInfo::Ratio16_11, + 5 => AspectRatioInfo::Ratio40_33, + 6 => AspectRatioInfo::Ratio24_11, + 7 => AspectRatioInfo::Ratio20_11, + 8 => AspectRatioInfo::Ratio32_11, + 9 => AspectRatioInfo::Ratio80_33, + 10 => AspectRatioInfo::Ratio18_11, + 11 => AspectRatioInfo::Ratio15_11, + 12 => AspectRatioInfo::Ratio64_33, + 13 => AspectRatioInfo::Ratio160_99, + 14 => AspectRatioInfo::Ratio4_3, + 15 => AspectRatioInfo::Ratio3_2, + 16 => AspectRatioInfo::Ratio2_1, + 255 => { + AspectRatioInfo::Extended(r.read(16, "sar_width")?, r.read(16, "sar_height")?) + } + _ => AspectRatioInfo::Reserved(aspect_ratio_idc), + }) + } else { + None + }) + } + + /// Returns the aspect ratio as `(width, height)`, if specified. + pub fn get(self) -> Option<(u16, u16)> { + match self { + AspectRatioInfo::Unspecified => None, + AspectRatioInfo::Ratio1_1 => Some((1, 1)), + AspectRatioInfo::Ratio12_11 => Some((12, 11)), + AspectRatioInfo::Ratio10_11 => Some((10, 11)), + AspectRatioInfo::Ratio16_11 => Some((16, 11)), + AspectRatioInfo::Ratio40_33 => Some((40, 33)), + AspectRatioInfo::Ratio24_11 => Some((24, 11)), + AspectRatioInfo::Ratio20_11 => Some((20, 11)), + AspectRatioInfo::Ratio32_11 => Some((32, 11)), + AspectRatioInfo::Ratio80_33 => Some((80, 33)), + AspectRatioInfo::Ratio18_11 => Some((18, 11)), + AspectRatioInfo::Ratio15_11 => Some((15, 11)), + AspectRatioInfo::Ratio64_33 => Some((64, 33)), + AspectRatioInfo::Ratio160_99 => Some((160, 99)), + AspectRatioInfo::Ratio4_3 => Some((4, 3)), + AspectRatioInfo::Ratio3_2 => Some((3, 2)), + AspectRatioInfo::Ratio2_1 => Some((2, 1)), + AspectRatioInfo::Reserved(_) => None, + AspectRatioInfo::Extended(width, height) => { + // ISO/IEC 14496-10 section E.2.1: "When ... sar_width is equal to 0 or sar_height + // is equal to 0, the sample aspect ratio shall be considered unspecified by this + // Recommendation | International Standard." + if width == 0 || height == 0 { + None + } else { + Some((width, height)) + } + } + } + } +} + +/// T.REC H.265 section E.2.1 `vui_parameters`. +#[derive(Debug)] +pub struct VuiParameters { + aspect_ratio: Option, + timing_info: Option, + bitstream_restriction: Option, +} + +impl VuiParameters { + pub fn from_bits(r: &mut R) -> Result { + // See T.REC H.265 section E.2.1, vui_parameters. + let aspect_ratio = AspectRatioInfo::from_bits(r)?; + let overscan_info_present_flag = r.read_bool("overscan_info_present_flag")?; + if overscan_info_present_flag { + let _overscan_appropriate_flag = r.read_bool("overscan_appropriate_flag")?; + } + let video_signal_type_present_flag = r.read_bool("video_signal_type_present_flag")?; + if video_signal_type_present_flag { + r.skip(3, "video_format")?; + let _video_full_range_flag = r.read_bool("video_full_range_flag")?; + let colour_description_present_flag = r.read_bool("colour_description_present_flag")?; + if colour_description_present_flag { + r.skip(8, "colour_primaries")?; + r.skip(8, "transfer_characteristics")?; + r.skip(8, "matrix_coeffs")?; + } + } + let chroma_loc_info_present_flag = r.read_bool("chroma_loc_info_present_flag")?; + if chroma_loc_info_present_flag { + let _chroma_sample_loc_type_top_field = + r.read_ue("chroma_sample_loc_type_top_field")?; + let _chroma_sample_loc_type_bottom_field = + r.read_ue("chroma_sample_loc_type_bottom_field")?; + } + let _neutral_chroma_indication_flag = r.read_bool("neutral_chroma_indication_flag")?; + let _field_seq_flag = r.read_bool("field_seq_flag")?; + let _frame_field_info_present_flag = r.read_bool("frame_field_info_present_flag")?; + let default_display_window_flag = r.read_bool("default_display_window_flag")?; + if default_display_window_flag { + let _def_disp_win_left_offset = r.read_ue("def_disp_win_left_offset")?; + let _def_disp_win_right_offset = r.read_ue("def_disp_win_right_offset")?; + let _def_disp_win_top_offset = r.read_ue("def_disp_win_top_offset")?; + let _def_disp_win_bottom_offset = r.read_ue("def_disp_win_bottom_offset")?; + } + let timing_info = if r.read_bool("vui_timing_info_present_flag")? { + Some(VuiTimingInfo::from_bits(r)?) + } else { + None + }; + let bitstream_restriction = if r.read_bool("bitstream_restriction_flag")? { + Some(BitstreamRestriction::from_bits(r)?) + } else { + None + }; + Ok(Self { + aspect_ratio, + timing_info, + bitstream_restriction, + }) + } + + pub fn aspect_ratio(&self) -> Option { + self.aspect_ratio + } + + pub fn timing_info(&self) -> Option<&VuiTimingInfo> { + self.timing_info.as_ref() + } + + pub fn min_spatial_segmentation_idc(&self) -> Option { + self.bitstream_restriction + .as_ref() + .map(|b| b.min_spatial_segmentation_idc) + } +} + +#[derive(Debug)] +struct BitstreamRestriction { + min_spatial_segmentation_idc: u16, +} + +impl BitstreamRestriction { + fn from_bits(r: &mut R) -> Result { + let _tiles_fixed_structure_flag = r.read_bool("tiles_fixed_structure_flag")?; + let _motion_vectors_over_pic_boundaries_flag = + r.read_bool("motion_vectors_over_pic_boundaries_flag")?; + let _restricted_ref_pic_lists_flag = r.read_bool("restricted_ref_pic_lists_flag")?; + let min_spatial_segmentation_idc = r.read_ue("min_spatial_segmentation_idc")?; + if min_spatial_segmentation_idc >= 4096 { + return Err(Error( + "min_spatial_segmentation_idc must be less than 4096".into(), + )); + } + let min_spatial_segmentation_idc = min_spatial_segmentation_idc as u16; + let _max_bytes_per_pic_denom = r.read_ue("max_bytes_per_pic_denom")?; + let _max_bits_per_min_cu_denom = r.read_ue("max_bits_per_min_cu_denom")?; + let _log2_max_mv_length_horizontal = r.read_ue("log2_max_mv_length_horizontal")?; + let _log2_max_mv_length_vertical = r.read_ue("log2_max_mv_length_vertical")?; + Ok(Self { + min_spatial_segmentation_idc, + }) + } +} + +/// T.REC H.265 section E.2.1 `vui_parameters`, `if( vui_timing_info_present_flag )` block. +#[derive(Debug)] +pub struct VuiTimingInfo { + num_units_in_tick: u32, + time_scale: u32, +} + +impl VuiTimingInfo { + pub fn from_bits(r: &mut R) -> Result { + let num_units_in_tick = r.read(32, "vui_num_units_in_tick")?; + let time_scale = r.read(32, "vui_time_scale")?; + if r.read_bool("vui_poc_proportional_to_timing_flag")? { + let _ = r.read_ue("vui_num_ticks_poc_diff_one_minus1")?; + } + let hrd_parameters_present_flag = r.read_bool("vui_hrd_parameters_present_flag")?; + if hrd_parameters_present_flag { + return Err(Error( + "hrd_parameters_present_flag unimplemented".to_owned(), + )); + } + Ok(Self { + num_units_in_tick, + time_scale, + }) + } + + pub fn num_units_in_tick(&self) -> u32 { + self.num_units_in_tick + } + + pub fn time_scale(&self) -> u32 { + self.time_scale + } +} + +#[cfg(test)] +mod tests { + use crate::testutil::init_logging; + + use super::*; + + struct LoggingBitReader(R); + + impl h264_reader::rbsp::BitRead for LoggingBitReader { + fn read_ue(&mut self, name: &'static str) -> Result { + let res = self.0.read_ue(name)?; + log::debug!("read_ue: {} -> {}", name, res); + Ok(res) + } + + fn read_se(&mut self, name: &'static str) -> Result { + let res = self.0.read_se(name)?; + log::debug!("read_se: {} -> {}", name, res); + Ok(res) + } + + fn read_bool(&mut self, name: &'static str) -> Result { + let res = self.0.read_bool(name)?; + log::debug!("read_bool: {} -> {}", name, res); + Ok(res) + } + + fn read( + &mut self, + bit_count: u32, + name: &'static str, + ) -> Result { + let res = self.0.read(bit_count, name)?; + log::debug!("read: {}({}) -> {:?}", name, bit_count, res); + Ok(res) + } + + fn read_to( + &mut self, + name: &'static str, + ) -> Result { + let res = self.0.read_to(name)?; + log::debug!("read_to: {}({})", name, std::mem::size_of::() * 8); + Ok(res) + } + + fn skip(&mut self, bit_count: u32, name: &'static str) -> Result<(), BitReaderError> { + self.0.skip(bit_count, name)?; + log::debug!("skip: {}({})", name, bit_count); + Ok(()) + } + + fn has_more_rbsp_data(&mut self, name: &'static str) -> Result { + let res = self.0.has_more_rbsp_data(name)?; + log::debug!("has_more_rbsp_data: {} -> {}", name, res); + Ok(res) + } + + fn finish_rbsp(self) -> Result<(), BitReaderError> { + self.0.finish_rbsp()?; + log::debug!("finish_rbsp"); + Ok(()) + } + + fn finish_sei_payload(self) -> Result<(), BitReaderError> { + self.0.finish_sei_payload()?; + log::debug!("finish_sei_payload"); + Ok(()) + } + } + + #[test] + fn parse_sps_own() { + init_logging(); + let data = &b"\x42\x01\x01\x01\x60\x00\x00\x03\x00\xb0\x00\x00\x03\x00\x00\x03\x00\x5a\xa0\x05\x82\x01\xe1\x63\x6b\x92\x45\x2f\xcd\xc1\x41\x81\x41\x00\x00\x03\x00\x01\x00\x00\x03\x00\x0c\xa1"[..]; + let (h, bits) = split(data).unwrap(); + assert_eq!(h.unit_type(), UnitType::SpsNut); + let bits = LoggingBitReader(bits); + let sps = dbg!(Sps::from_bits(bits).unwrap()); + let rfc6381_codec = sps.rfc6381_codec(); + assert_eq!(rfc6381_codec, "hvc1.1.60000000.L90.B0"); + assert_eq!(sps.pixel_dimensions().unwrap(), (704, 480)); + let vui = sps.vui().unwrap(); + let timing = vui.timing_info().unwrap(); + assert_eq!(timing.num_units_in_tick(), 1); + assert_eq!(timing.time_scale(), 12); + } + + #[test] + fn parse_pps() { + init_logging(); + let data = &b"D\x01\xc0\xf2\xc6\x8d\x03\xb3@"[..]; + let (h, bits) = split(data).unwrap(); + assert_eq!(h.unit_type(), UnitType::PpsNut); + let bits = LoggingBitReader(bits); + let _pps = dbg!(Pps::from_bits(bits).unwrap()); + // panic!("pps: {pps:#?}"); + } + + #[test] + fn unit_type_roundtrip() { + init_logging(); + for raw in 0..64 { + let unit_type = UnitType::try_from(raw).unwrap(); + assert_eq!(raw, u8::from(unit_type)); + } + } +} diff --git a/src/codec/h265/record.rs b/src/codec/h265/record.rs new file mode 100644 index 0000000..c09a330 --- /dev/null +++ b/src/codec/h265/record.rs @@ -0,0 +1,214 @@ +// Copyright (C) 2024 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Creates a `HEVCDecoderConfigurationRecord`. + +use std::ops::Range; + +use bytes::Bytes; + +use super::nal::{Pps, Sps, UnitType}; + +/// A constructed record and parameter sets, all sharing the same underlying +/// allocation by reference count. +pub struct Out { + pub record: Bytes, + pub sps: Bytes, + pub pps: Bytes, + pub vps: Bytes, +} + +/// Creates a `HEVCDecoderConfigurationRecord` for the active PPS, SPS, and VPS. +/// +/// Only a single of each of may be active at a time according to H.265, so this +/// should be sufficient. If the active parameter set changes, +/// `retina::VideoFrame::has_new_parameters` will return true. +/// +/// Always declares `lengthSizeMinusOne` of 3, meaning that NAL units are +/// prefixed with a 4-byte length. +pub(crate) fn decoder_configuration_record( + raw_pps: &[u8], + pps: &Pps, + raw_sps: &[u8], + sps: &Sps, + raw_vps: &[u8], +) -> Out { + let mut record = Vec::new(); + + // unsigned int(8) configurationVersion = 1; + record.push(1); + + // All 11 bytes of Profile: + // unsigned int(2) general_profile_space; + // unsigned int(1) general_tier_flag; + // unsigned int(5) general_profile_idc; + // unsigned int(32) general_profile_compatibility_flags; + // unsigned int(48) general_constraint_indicator_flags; + let profile = sps.profile(); + record.extend(&profile.0[..]); + + // unsigned int(8) general_level_idc; + record.push(sps.general_level_idc()); + + // bit(4) reserved = ‘1111’b; + // unsigned int(12) min_spatial_segmentation_idc; + let min_spatial_segmentation_idc = sps + .vui() + .and_then(|v| v.min_spatial_segmentation_idc()) + .unwrap_or(0); + record.extend(&(0b1111_0000_0000_0000 | min_spatial_segmentation_idc).to_be_bytes()[..]); + let parallelism_type: u8 = if min_spatial_segmentation_idc == 0 { + 0 + } else { + match ( + pps.entropy_coding_sync_enabled_flag(), + pps.tiles_enabled_flag(), + ) { + (true, true) => 0, + (true, false) => 3, + (false, true) => 2, + (false, false) => 1, + } + }; + + // bit(6) reserved = ‘111111’b; + // unsigned int(2) parallelismType; + record.push(0b1111_1100 | parallelism_type); + + // bit(6) reserved = ‘111111’b; + // unsigned int(2) chromaFormat; + record.push(0b1111_1100 | sps.chroma_format_idc()); + + // bit(5) reserved = ‘11111’b; + // unsigned int(3) bitDepthLumaMinus8; + // bit(5) reserved = ‘11111’b; + // unsigned int(3) bitDepthChromaMinus8; + record.push(0b1111_1000 | sps.bit_depth_luma_minus8()); + record.push(0b1111_1000 | sps.bit_depth_chroma_minus8()); + + // bit(16) avgFrameRate; + record.extend([0, 0]); + + // bit(2) constantFrameRate; + // bit(3) numTemporalLayers; + // bit(1) temporalIdNested; + // unsigned int(2) lengthSizeMinusOne; + + // Note: H.265 section 7.4.3.2.1 states + // `sps_max_sub_layers_minus1 <= vps_max_sub_layers_minus1`. Declare + // the more constrained value. + record.push( + (sps.max_sub_layers() << 3) | (u8::from(sps.temporal_id_nesting_flag()) << 2) | 0b0011, + ); + + // unsigned int(8) numOfArrays; + // for (j=0; j < numOfArrays; j++) { + // bit(1) array_completeness; + // unsigned int(1) reserved = 0; + // unsigned int(6) NAL_unit_type; + // unsigned int(16) numNalus; + // for (i=0; i< numNalus; i++) { + // unsigned int(16) nalUnitLength; + // bit(8*nalUnitLength) nalUnit; + // } + // } + record.push(3); // 3 arrays: VPS, SPS, PPS + let vps_range = append_array(raw_vps, UnitType::VpsNut, &mut record); + let sps_range = append_array(raw_sps, UnitType::SpsNut, &mut record); + let pps_range = append_array(raw_pps, UnitType::PpsNut, &mut record); + let record = Bytes::from(record); + let vps = record.slice(vps_range); + let sps = record.slice(sps_range); + let pps = record.slice(pps_range); + + Out { + record, + vps, + sps, + pps, + } +} + +fn append_array(nal: &[u8], unit_type: UnitType, record: &mut Vec) -> Range { + record.extend([0b1000_0000 | u8::from(unit_type), 0, 1]); + record.extend( + &u16::try_from(nal.len()) + .expect("nalUnitLength must fit in u16") + .to_be_bytes()[..], + ); + let start = record.len(); + record.extend_from_slice(nal); + start..record.len() +} + +#[cfg(test)] +mod tests { + use base64::Engine; + + use super::super::nal; + use super::*; + use crate::testutil::{assert_eq_hex, init_logging}; + + #[test] + fn simple() { + init_logging(); + let raw_pps = base64::engine::general_purpose::STANDARD + .decode("RAHA8saNA7NA") + .unwrap(); + let raw_sps = base64::engine::general_purpose::STANDARD + .decode("QgEBAWAAAAMAsAAAAwAAAwBaoAWCAeFja5JFL83BQYFBAAADAAEAAAMADKE=") + .unwrap(); + let raw_vps = base64::engine::general_purpose::STANDARD + .decode("QAEMAf//AWAAAAMAsAAAAwAAAwBarAwAAAMABAAAAwAyqA==") + .unwrap(); + let (pps_h, pps_bits) = nal::split(&raw_pps).unwrap(); + assert_eq!(pps_h.unit_type(), nal::UnitType::PpsNut); + let pps = nal::Pps::from_bits(pps_bits).unwrap(); + let (sps_h, sps_bits) = nal::split(&raw_sps).unwrap(); + assert_eq!(sps_h.unit_type(), nal::UnitType::SpsNut); + let sps = nal::Sps::from_bits(sps_bits).unwrap(); + let (vps_h, _vps_bits) = nal::split(&raw_vps).unwrap(); + assert_eq!(vps_h.unit_type(), nal::UnitType::VpsNut); + let record = decoder_configuration_record(&raw_pps, &pps, &raw_sps, &sps, &raw_vps); + assert_eq_hex!(record.pps, raw_pps); + assert_eq_hex!(record.sps, raw_sps); + assert_eq_hex!(record.vps, raw_vps); + const EXPECTED: &[u8; 125] = b"\ + \x01\x01\x60\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x5a\xf0\x00\xfc\ + \xfd\xf8\xf8\x00\x00\x0f\x03\xa0\x00\x01\x00\x22\x40\x01\x0c\x01\ + \xff\xff\x01\x60\x00\x00\x03\x00\xb0\x00\x00\x03\x00\x00\x03\x00\ + \x5a\xac\x0c\x00\x00\x03\x00\x04\x00\x00\x03\x00\x32\xa8\xa1\x00\ + \x01\x00\x2c\x42\x01\x01\x01\x60\x00\x00\x03\x00\xb0\x00\x00\x03\ + \x00\x00\x03\x00\x5a\xa0\x05\x82\x01\xe1\x63\x6b\x92\x45\x2f\xcd\ + \xc1\x41\x81\x41\x00\x00\x03\x00\x01\x00\x00\x03\x00\x0c\xa1\xa2\ + \x00\x01\x00\x09\x44\x01\xc0\xf2\xc6\x8d\x03\xb3\x40\ + "; + assert_eq_hex!(&*record.record, EXPECTED); + } + + #[test] + fn geovision() { + init_logging(); + let raw_vps = &b"\x40\x01\x0c\x01\xff\xff\x01\x40\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x99\xac\x09"[..]; + let raw_sps = &b"\x42\x01\x01\x01\x40\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x99\xa0\x01\x50\x20\x06\x01\xf1\x39\x6b\xb9\x1b\x06\xb9\x54\x4d\xc0\x40\x40\x41\x00\x00\x03\x00\x01\x00\x00\x03\x00\x1e\x08"[..]; + let raw_pps = &b"\x44\x01\xc0\x73\xc0\x4c\x90"[..]; + + // rfc6381_codec: hvc1.1.40000000.L153.00 + // hevc_decoder_config: 01 01 40 00 00 00 00 00 00 00 00 00 99 f0 00 fc fd f8 f8 00 00 0f 03 a0 00 01 00 18 40 01 0c 01 ff ff 01 40 00 00 03 00 00 03 00 00 03 00 00 03 00 99 ac 09 a1 00 01 00 31 42 01 01 01 40 00 00 03 00 00 03 00 00 03 00 00 03 00 99 a0 01 50 20 06 01 f1 39 6b b9 1b 06 b9 54 4d c0 40 40 41 00 00 03 00 01 00 00 03 00 1e 08 a2 00 01 00 07 44 01 c0 73 c0 4c 90 + let (pps_h, pps_bits) = nal::split(&raw_pps).unwrap(); + assert_eq!(pps_h.unit_type(), nal::UnitType::PpsNut); + let pps = nal::Pps::from_bits(pps_bits).unwrap(); + let (sps_h, sps_bits) = nal::split(&raw_sps).unwrap(); + assert_eq!(sps_h.unit_type(), nal::UnitType::SpsNut); + let sps = nal::Sps::from_bits(sps_bits).unwrap(); + assert_eq!(sps.rfc6381_codec(), "hvc1.1.40000000.L153.00"); + let (vps_h, _vps_bits) = nal::split(&raw_vps).unwrap(); + assert_eq!(vps_h.unit_type(), nal::UnitType::VpsNut); + let record = decoder_configuration_record(&raw_pps, &pps, &raw_sps, &sps, &raw_vps); + assert_eq_hex!(record.pps, raw_pps); + assert_eq_hex!(record.sps, raw_sps); + assert_eq_hex!(record.vps, raw_vps); + let expected = &b"\x01\x01\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x99\xf0\x00\xfc\xfd\xf8\xf8\x00\x00\x0f\x03\xa0\x00\x01\x00\x18\x40\x01\x0c\x01\xff\xff\x01\x40\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x99\xac\x09\xa1\x00\x01\x00\x31\x42\x01\x01\x01\x40\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x00\x03\x00\x99\xa0\x01\x50\x20\x06\x01\xf1\x39\x6b\xb9\x1b\x06\xb9\x54\x4d\xc0\x40\x40\x41\x00\x00\x03\x00\x01\x00\x00\x03\x00\x1e\x08\xa2\x00\x01\x00\x07\x44\x01\xc0\x73\xc0\x4c\x90"[..]; + assert_eq_hex!(&*record.record, expected); + } +} diff --git a/src/codec/h26x.rs b/src/codec/h26x.rs new file mode 100644 index 0000000..8d2e590 --- /dev/null +++ b/src/codec/h26x.rs @@ -0,0 +1,73 @@ +// Copyright (C) 2024 Scott Lamb +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Common logic between H.264 and H.265. + +/// `h264_reader::rbsp::BitRead` impl that *notes* extra trailing data rather than failing on it. +/// +/// Some (Reolink) cameras appear to have a stray extra byte at the end. Follow the lead of most +/// other RTSP implementations in tolerating this. +#[derive(Debug)] +pub(super) struct TolerantBitReader<'a, R> { + pub(super) inner: R, + pub(super) has_extra_trailing_data: &'a mut bool, +} + +impl<'a, R: h264_reader::rbsp::BitRead> h264_reader::rbsp::BitRead for TolerantBitReader<'a, R> { + fn read_ue(&mut self, name: &'static str) -> Result { + self.inner.read_ue(name) + } + + fn read_se(&mut self, name: &'static str) -> Result { + self.inner.read_se(name) + } + + fn read_bool(&mut self, name: &'static str) -> Result { + self.inner.read_bool(name) + } + + fn skip( + &mut self, + bit_count: u32, + name: &'static str, + ) -> Result<(), h264_reader::rbsp::BitReaderError> { + self.inner.skip(bit_count, name) + } + + fn read( + &mut self, + bit_count: u32, + name: &'static str, + ) -> Result { + self.inner.read(bit_count, name) + } + + fn read_to( + &mut self, + name: &'static str, + ) -> Result { + self.inner.read_to(name) + } + + fn has_more_rbsp_data( + &mut self, + name: &'static str, + ) -> Result { + self.inner.has_more_rbsp_data(name) + } + + fn finish_rbsp(self) -> Result<(), h264_reader::rbsp::BitReaderError> { + match self.inner.finish_rbsp() { + Ok(()) => Ok(()), + Err(h264_reader::rbsp::BitReaderError::RemainingData) => { + *self.has_extra_trailing_data = true; + Ok(()) + } + Err(e) => Err(e), + } + } + + fn finish_sei_payload(self) -> Result<(), h264_reader::rbsp::BitReaderError> { + self.inner.finish_sei_payload() + } +} diff --git a/src/codec/jpeg.rs b/src/codec/jpeg.rs index bed89e0..ae5dbf4 100644 --- a/src/codec/jpeg.rs +++ b/src/codec/jpeg.rs @@ -581,7 +581,7 @@ impl Default for Depacketizer { mod tests { use std::num::NonZeroU32; - use crate::testutil::init_logging; + use crate::testutil::{assert_eq_hex, init_logging}; use crate::{codec::CodecItem, rtp::ReceivedPacketBuilder}; // Raw RTP payload from a MJPEG encoded Big Buck Bunny stream @@ -881,6 +881,6 @@ mod tests { Some(CodecItem::VideoFrame(frame)) => frame, _ => panic!(), }; - assert_eq!(frame.data(), VALID_JPEG_IMAGE) + assert_eq_hex!(frame.data(), VALID_JPEG_IMAGE) } } diff --git a/src/codec/mod.rs b/src/codec/mod.rs index 07b9619..062b887 100644 --- a/src/codec/mod.rs +++ b/src/codec/mod.rs @@ -130,11 +130,16 @@ fn write_visual_sample_entry_body(buf: &mut Vec, pixel_dimensions: (u16, u16 pub(crate) mod aac; pub(crate) mod g723; +mod h26x; pub(crate) mod jpeg; #[doc(hidden)] pub mod h264; +#[cfg(feature = "unstable-h265")] +#[doc(hidden)] +pub mod h265; + pub(crate) mod onvif; pub(crate) mod simple_audio; @@ -560,6 +565,8 @@ enum DepacketizerInner { SimpleAudio(Box), G723(Box), H264(Box), + #[cfg(feature = "unstable-h265")] + H265(Box), Onvif(Box), Jpeg(Box), } @@ -581,6 +588,11 @@ impl Depacketizer { clock_rate, format_specific_params, )?)), + #[cfg(feature = "unstable-h265")] + ("video", "h265") => DepacketizerInner::H265(Box::new(h265::Depacketizer::new( + clock_rate, + format_specific_params, + )?)), ("image" | "video", "jpeg") => { DepacketizerInner::Jpeg(Box::new(jpeg::Depacketizer::new())) } @@ -652,6 +664,8 @@ impl Depacketizer { DepacketizerInner::Aac(d) => d.parameters(), DepacketizerInner::G723(d) => d.parameters(), DepacketizerInner::H264(d) => d.parameters(), + #[cfg(feature = "unstable-h265")] + DepacketizerInner::H265(d) => d.parameters(), DepacketizerInner::Onvif(d) => d.parameters(), DepacketizerInner::SimpleAudio(d) => d.parameters(), DepacketizerInner::Jpeg(d) => d.parameters(), @@ -668,6 +682,8 @@ impl Depacketizer { DepacketizerInner::Aac(d) => d.push(input), DepacketizerInner::G723(d) => d.push(input), DepacketizerInner::H264(d) => d.push(input), + #[cfg(feature = "unstable-h265")] + DepacketizerInner::H265(d) => d.push(input), DepacketizerInner::Onvif(d) => d.push(input), DepacketizerInner::SimpleAudio(d) => d.push(input), DepacketizerInner::Jpeg(d) => d.push(input), @@ -687,6 +703,8 @@ impl Depacketizer { DepacketizerInner::Aac(d) => d.pull(conn_ctx, stream_ctx), DepacketizerInner::G723(d) => Ok(d.pull()), DepacketizerInner::H264(d) => Ok(d.pull()), + #[cfg(feature = "unstable-h265")] + DepacketizerInner::H265(d) => Ok(d.pull()), DepacketizerInner::Onvif(d) => Ok(d.pull()), DepacketizerInner::SimpleAudio(d) => Ok(d.pull()), DepacketizerInner::Jpeg(d) => Ok(d.pull()), @@ -716,6 +734,11 @@ mod tests { "h264::Depacketizer", std::mem::size_of::(), ), + #[cfg(feature = "unstable-h265")] + ( + "h265::Depacketizer", + std::mem::size_of::(), + ), ( "onvif::Depacketizer", std::mem::size_of::(), diff --git a/src/testutil.rs b/src/testutil.rs index 4ae76c5..9a5ed31 100644 --- a/src/testutil.rs +++ b/src/testutil.rs @@ -5,6 +5,26 @@ use std::str::FromStr; use bytes::Bytes; +macro_rules! assert_eq_hex { + ($left:expr, $right:expr) => {{ + use pretty_hex::config_hex; + let left: &[u8] = &*$left; + let right: &[u8] = &*$right; + let cfg = pretty_hex::HexConfig { + ..Default::default() + }; + if left != right { + panic!( + "hex strings are not equal.\n\nleft: {}\n\nright: {}", + config_hex(&left, cfg), + config_hex(&right, cfg), + ); + } + }}; +} + +pub(crate) use assert_eq_hex; + pub(crate) fn init_logging() { let h = mylog::Builder::new() .set_format(