diff --git a/examples/client/src/mp4.rs b/examples/client/src/mp4.rs index a49e637..2922fb6 100644 --- a/examples/client/src/mp4.rs +++ b/examples/client/src/mp4.rs @@ -382,13 +382,14 @@ impl Mp4Writer { buf.put_u32(0); // version buf.put_u32(u32::try_from(self.video_params.len())?); // entry_count for p in &self.video_params { - let e = p.sample_entry().ok_or_else(|| { + let e = p.sample_entry().build().map_err(|e| { anyhow!( - "unable to produce VisualSampleEntry for {} stream", - p.rfc6381_codec() + "unable to produce VisualSampleEntry for {} stream: {}", + p.rfc6381_codec(), + e, ) })?; - buf.extend_from_slice(e); + buf.extend_from_slice(&e); } }); self.video_trak.write_common_stbl_parts(buf)?; diff --git a/src/codec/aac.rs b/src/codec/aac.rs index dab8116..65d5aff 100644 --- a/src/codec/aac.rs +++ b/src/codec/aac.rs @@ -186,7 +186,7 @@ fn make_sample_entry( // Write an MP4AudioSampleEntry (`mp4a`), as in ISO/IEC 14496-14 section 5.6.1. // It's based on AudioSampleEntry, ISO/IEC 14496-12 section 12.2.3.2, // in turn based on SampleEntry, ISO/IEC 14496-12 section 8.5.2.2. - write_mp4_box!(&mut buf, b"mp4a", { + write_mp4_box!(&mut buf, *b"mp4a", { buf.extend_from_slice(&[ 0, 0, 0, 0, // SampleEntry.reserved 0, 0, 0, 1, // SampleEntry.reserved, SampleEntry.data_reference_index (1) @@ -209,7 +209,7 @@ fn make_sample_entry( buf.put_u32(u32::from(sampling_frequency) << 16); // Write the embedded ESDBox (`esds`), as in ISO/IEC 14496-14 section 5.6.1. - write_mp4_box!(&mut buf, b"esds", { + write_mp4_box!(&mut buf, *b"esds", { buf.put_u32(0); // version write_mpeg4_descriptor!(&mut buf, 0x03 /* ES_DescrTag */, { diff --git a/src/codec/h264.rs b/src/codec/h264.rs index d5f6956..f43f76f 100644 --- a/src/codec/h264.rs +++ b/src/codec/h264.rs @@ -12,7 +12,6 @@ use h264_reader::nal::{NalHeader, UnitType}; use log::{debug, log_enabled, trace}; use crate::{ - codec::write_visual_sample_entry_body, rtp::{ReceivedPacket, ReceivedPacketBuilder}, Error, Timestamp, }; @@ -706,22 +705,6 @@ impl<'a, R: h264_reader::rbsp::BitRead> h264_reader::rbsp::BitRead for TolerantB } } -/// Writes an `avc1` / `AVCSampleEntry` as in ISO/IEC 14496-15 section 5.4.2.1. -fn make_video_sample_entry(pixel_dimensions: (u32, u32), extra_data: &[u8]) -> Option> { - let pixel_dimensions = ( - u16::try_from(pixel_dimensions.0).ok()?, - u16::try_from(pixel_dimensions.1).ok()?, - ); - let mut buf = Vec::new(); - write_mp4_box!(&mut buf, b"avc1", { - write_visual_sample_entry_body(&mut buf, pixel_dimensions); - write_mp4_box!(&mut buf, b"avcC", { - buf.extend_from_slice(extra_data); - }); - }); - Some(buf) -} - impl InternalParameters { /// Parses metadata from the `format-specific-params` of a SDP `fmtp` media attribute. fn parse_format_specific_params(format_specific_params: &str) -> Result { @@ -800,6 +783,16 @@ impl InternalParameters { let pixel_dimensions = sps .pixel_dimensions() .map_err(|e| format!("SPS has invalid pixel dimensions: {e:?}"))?; + let e = |_| { + format!( + "SPS has invalid pixel dimensions: {}x{} is too large", + pixel_dimensions.0, pixel_dimensions.1 + ) + }; + let pixel_dimensions = ( + u16::try_from(pixel_dimensions.0).map_err(e)?, + u16::try_from(pixel_dimensions.1).map_err(e)?, + ); // Create the AVCDecoderConfiguration, ISO/IEC 14496-15 section 5.2.4.1. // The beginning of the AVCDecoderConfiguration takes a few values from @@ -861,7 +854,6 @@ impl InternalParameters { let avc_decoder_config = avc_decoder_config.freeze(); let sps_nal = avc_decoder_config.slice(sps_nal_start..sps_nal_end); let pps_nal = avc_decoder_config.slice(pps_nal_start..pps_nal_end); - let sample_entry = make_video_sample_entry(pixel_dimensions, &avc_decoder_config); Ok(InternalParameters { generic_parameters: super::VideoParameters { rfc6381_codec, @@ -869,7 +861,7 @@ impl InternalParameters { pixel_aspect_ratio, frame_rate, extra_data: avc_decoder_config, - sample_entry, + codec: super::VideoCodec::H264, }, sps_nal, pps_nal, diff --git a/src/codec/jpeg.rs b/src/codec/jpeg.rs index bed89e0..c8f5363 100644 --- a/src/codec/jpeg.rs +++ b/src/codec/jpeg.rs @@ -19,7 +19,7 @@ use bytes::{Buf, Bytes}; -use crate::{codec::write_visual_sample_entry_body, rtp::ReceivedPacket, PacketContext, Timestamp}; +use crate::{rtp::ReceivedPacket, PacketContext, Timestamp}; use super::{VideoFrame, VideoParameters}; @@ -446,12 +446,12 @@ impl Depacketizer { start_ctx: ctx, timestamp, parameters: Some(VideoParameters { - pixel_dimensions: (u32::from(width), u32::from(height)), + pixel_dimensions: (width, height), rfc6381_codec: "mp4v.6C".to_owned(), pixel_aspect_ratio: None, frame_rate: None, extra_data: Bytes::new(), - sample_entry: Some(make_video_sample_entry(width, height)), + codec: super::VideoCodec::Jpeg, }), }); } @@ -524,51 +524,45 @@ impl Depacketizer { } } -fn make_video_sample_entry(width: u16, height: u16) -> Vec { - let mut buf = Vec::new(); - - // Write an MP4VisualSampleEntry (`mp4v`), as in ISO/IEC 14496-14 section 5.6.1. - // It's based on VisualSampleEntry, ISO/IEC 14496-12 section 12.1.3. - // in turn based on SampleEntry, ISO/IEC 14496-12 section 8.5.2.2. - write_mp4_box!(&mut buf, b"mp4v", { - write_visual_sample_entry_body(&mut buf, (width, height)); - - // Write the embedded ESDBox (`esds`), as in ISO/IEC 14496-14 section 5.6.1. - write_mp4_box!(&mut buf, b"esds", { - buf.extend_from_slice(&0u32.to_be_bytes()[..]); // version - write_mpeg4_descriptor!(&mut buf, 0x03 /* ES_DescrTag */, { - // The ESDBox contains an ES_Descriptor, defined in ISO/IEC 14496-1 section 8.3.3. - // ISO/IEC 14496-14 section 3.1.2 has advice on how to set its - // fields within the scope of a .mp4 file. +/// Writes the embedded ESDBox (`esds`), as in ISO/IEC 14496-14 section 5.6.1. +/// +/// This is actually entirely static, but we construct it at runtime with the +/// `write_mp4_box!` and `write_mpeg4_descriptor!` macros for readability. +#[cfg(feature = "unstable-sample-entry")] +pub(super) fn append_esds(buf: &mut Vec) { + write_mp4_box!(buf, *b"esds", { + buf.extend_from_slice(&0u32.to_be_bytes()[..]); // version + write_mpeg4_descriptor!(buf, 0x03 /* ES_DescrTag */, { + // The ESDBox contains an ES_Descriptor, defined in ISO/IEC 14496-1 section 8.3.3. + // ISO/IEC 14496-14 section 3.1.2 has advice on how to set its + // fields within the scope of a .mp4 file. + buf.extend_from_slice(&[ + 0, 0, // ES_ID=0 + 0x00, // streamDependenceFlag, URL_Flag, OCRStreamFlag, streamPriority. + ]); + + // DecoderConfigDescriptor, defined in ISO/IEC 14496-1 section 7.2.6.6. + write_mpeg4_descriptor!(buf, 0x04 /* DecoderConfigDescrTag */, { buf.extend_from_slice(&[ - 0, 0, // ES_ID=0 - 0x00, // streamDependenceFlag, URL_Flag, OCRStreamFlag, streamPriority. + 0x6C, // objectTypeIndication = Visual ISO/IEC 10918-1 (aka JPEG) + 0x11, // streamType = visual, upstream = false, reserved = 1 + // XXX: does any reader expect valid values here? They wouldn't be + // trivial to calculate ahead of time. + 0x00, 0x00, 0x00, // bufferSizeDB + 0x00, 0x00, 0x00, 0x00, // maxBitrate + 0x00, 0x00, 0x00, 0x00, // avgBitrate ]); + // No DecoderSpecificInfo. + // DecoderSpecificInfo, 2 of them? + // No profileLevelIndicatorIndexDescr. + }); - // DecoderConfigDescriptor, defined in ISO/IEC 14496-1 section 7.2.6.6. - write_mpeg4_descriptor!(&mut buf, 0x04 /* DecoderConfigDescrTag */, { - buf.extend_from_slice(&[ - 0x6C, // objectTypeIndication = Visual ISO/IEC 10918-1 (aka JPEG) - 0x11, // streamType = visual, upstream = false, reserved = 1 - // XXX: does any reader expect valid values here? They wouldn't be - // trivial to calculate ahead of time. - 0x00, 0x00, 0x00, // bufferSizeDB - 0x00, 0x00, 0x00, 0x00, // maxBitrate - 0x00, 0x00, 0x00, 0x00, // avgBitrate - ]); - // No DecoderSpecificInfo. - // DecoderSpecificInfo, 2 of them? - // No profileLevelIndicatorIndexDescr. - }); - - // SLConfigDescriptor, ISO/IEC 14496-1 section 7.3.2.3.1. - write_mpeg4_descriptor!(&mut buf, 0x06 /* SLConfigDescrTag */, { - buf.push(2); // predefined = reserved for use in MP4 files - }); + // SLConfigDescriptor, ISO/IEC 14496-1 section 7.3.2.3.1. + write_mpeg4_descriptor!(buf, 0x06 /* SLConfigDescrTag */, { + buf.push(2); // predefined = reserved for use in MP4 files }); }); }); - buf } impl Default for Depacketizer { diff --git a/src/codec/mod.rs b/src/codec/mod.rs index 07b9619..14b9cf9 100644 --- a/src/codec/mod.rs +++ b/src/codec/mod.rs @@ -28,7 +28,7 @@ macro_rules! write_mp4_box { ($buf:expr, $fourcc:expr, $b:block) => {{ let _: &mut Vec = $buf; // type-check. let pos_start = $buf.len(); - let fourcc: &[u8; 4] = $fourcc; + let fourcc: [u8; 4] = $fourcc; $buf.extend_from_slice(&[0, 0, 0, 0, fourcc[0], fourcc[1], fourcc[2], fourcc[3]]); let r = { $b; @@ -101,33 +101,6 @@ macro_rules! write_mpeg4_descriptor { }}; } -/// Writes the boilerplate of a ISO/IEC 14496-12 `VisualSampleEntry`. -fn write_visual_sample_entry_body(buf: &mut Vec, pixel_dimensions: (u16, u16)) { - // SampleEntry, section 8.5.2.2. - buf.extend_from_slice(&0u32.to_be_bytes()[..]); // pre_defined + reserved - buf.extend_from_slice(&1u32.to_be_bytes()[..]); // data_reference_index = 1 - - // VisualSampleEntry, section 12.1.3.2. - buf.extend_from_slice(&[0; 16]); - buf.extend_from_slice(&pixel_dimensions.0.to_be_bytes()[..]); - buf.extend_from_slice(&pixel_dimensions.1.to_be_bytes()[..]); - buf.extend_from_slice(&[ - 0x00, 0x48, 0x00, 0x00, // horizresolution - 0x00, 0x48, 0x00, 0x00, // vertresolution - 0x00, 0x00, 0x00, 0x00, // reserved - 0x00, 0x01, // frame count - 0x00, 0x00, 0x00, 0x00, // compressorname - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x18, 0xff, 0xff, // depth + pre_defined - ]); -} - pub(crate) mod aac; pub(crate) mod g723; pub(crate) mod jpeg; @@ -177,14 +150,17 @@ pub enum ParametersRef<'a> { /// calls to [`crate::client::Stream::parameters`] will return the new value. #[derive(Clone, PartialEq, Eq, Hash)] pub struct VideoParameters { - pixel_dimensions: (u32, u32), + pixel_dimensions: (u16, u16), rfc6381_codec: String, pixel_aspect_ratio: Option<(u32, u32)>, frame_rate: Option<(u32, u32)>, extra_data: Bytes, + /// The codec, for internal use in sample entry construction. + /// + /// This is more straightforward than reparsing the RFC 6381 codec string. #[cfg_attr(not(feature = "unstable-sample-entry"), allow(unused))] - sample_entry: Option>, + codec: VideoCodec, } impl VideoParameters { @@ -195,19 +171,21 @@ impl VideoParameters { &self.rfc6381_codec } - /// An `.mp4` `VideoSampleEntry` box (as defined in ISO/IEC 14496-12), if possible. - /// - /// Not all codecs can be placed into a `.mp4` file, and even for supported codecs there - /// may be unsupported edge cases. + /// Returns a builder for an `.mp4` `VideoSampleEntry` box (as defined in + /// ISO/IEC 14496-12). #[cfg(feature = "unstable-sample-entry")] #[cfg_attr(docsrs, doc(cfg(feature = "unstable-sample-entry")))] - pub fn sample_entry(&self) -> Option<&[u8]> { - self.sample_entry.as_deref() + pub fn sample_entry(&self) -> VideoSampleEntryBuilder { + VideoSampleEntryBuilder { + params: self, + aspect_ratio_override: None, + } } /// Returns the overall dimensions of the video frame in pixels, as `(width, height)`. pub fn pixel_dimensions(&self) -> (u32, u32) { - self.pixel_dimensions + let (width, height) = self.pixel_dimensions; + (width.into(), height.into()) } /// Returns the displayed size of a pixel, if known, as a dimensionless ratio `(h_spacing, v_spacing)`. @@ -259,6 +237,97 @@ impl std::fmt::Debug for VideoParameters { } } +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +enum VideoCodec { + H264, + Jpeg, +} + +impl VideoCodec { + #[cfg(feature = "unstable-sample-entry")] + fn visual_sample_entry_box_type(self) -> [u8; 4] { + match self { + VideoCodec::H264 => *b"avc1", + VideoCodec::Jpeg => *b"mp4v", + } + } +} + +#[cfg(feature = "unstable-sample-entry")] +#[cfg_attr(docsrs, doc(cfg(feature = "unstable-sample-entry")))] +pub struct VideoSampleEntryBuilder<'p> { + params: &'p VideoParameters, + aspect_ratio_override: Option<(u16, u16)>, +} + +#[cfg(feature = "unstable-sample-entry")] +#[cfg_attr(docsrs, doc(cfg(feature = "unstable-sample-entry")))] +impl VideoSampleEntryBuilder<'_> { + /// Overrides the codec-level pixel aspect ratio via a `pasp` box. + #[inline] + pub fn with_aspect_ratio(self, aspect_ratio: (u16, u16)) -> Self { + Self { + aspect_ratio_override: Some(aspect_ratio), + ..self + } + } + + /// Builds the `.mp4` `VisualSampleEntry` box, if possible. + pub fn build(self) -> Result, Error> { + let mut buf = Vec::new(); + write_mp4_box!( + &mut buf, + self.params.codec.visual_sample_entry_box_type(), + { + // SampleEntry, section 8.5.2.2. + buf.extend_from_slice(&0u32.to_be_bytes()[..]); // pre_defined + reserved + buf.extend_from_slice(&1u32.to_be_bytes()[..]); // data_reference_index = 1 + + // VisualSampleEntry, section 12.1.3.2. + buf.extend_from_slice(&[0; 16]); + buf.extend_from_slice(&self.params.pixel_dimensions.0.to_be_bytes()[..]); + buf.extend_from_slice(&self.params.pixel_dimensions.1.to_be_bytes()[..]); + buf.extend_from_slice(&[ + 0x00, 0x48, 0x00, 0x00, // horizresolution + 0x00, 0x48, 0x00, 0x00, // vertresolution + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // frame count + 0x00, 0x00, 0x00, 0x00, // compressorname + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x18, 0xff, 0xff, // depth + pre_defined + ]); + + // Codec-specific portion. + match self.params.codec { + VideoCodec::H264 => { + write_mp4_box!(&mut buf, *b"avcC", { + buf.extend_from_slice(&self.params.extra_data); + }); + } + VideoCodec::Jpeg => { + jpeg::append_esds(&mut buf); + } + } + + // pasp box, if requested. + if let Some(aspect_ratio) = self.aspect_ratio_override { + write_mp4_box!(&mut buf, *b"pasp", { + buf.extend_from_slice(&u32::from(aspect_ratio.0).to_be_bytes()[..]); + buf.extend_from_slice(&u32::from(aspect_ratio.1).to_be_bytes()[..]); + }); + } + } + ); + Ok(buf) + } +} + /// Parameters which describe an audio stream. #[derive(Clone, PartialEq, Eq, Hash)] pub struct AudioParameters {