From 0ace44292821935b745abadf813faf07919ecc1e Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Wed, 5 Jun 2024 16:25:48 +0300 Subject: [PATCH 01/12] Preliminary implementation of MIME type sniffing --- Cargo.lock | 35 +++- Cargo.toml | 4 + src/filesystem/drive/directory_entry.rs | 15 ++ src/filesystem/drive/directory_handle.rs | 17 +- src/filesystem/nodes/metadata/mime_type.rs | 212 +++++++++++++++++++++ src/filesystem/nodes/metadata/mod.rs | 41 ++++ src/filesystem/nodes/mod.rs | 24 ++- src/filesystem/nodes/node_builder.rs | 3 +- 8 files changed, 338 insertions(+), 13 deletions(-) create mode 100644 src/filesystem/nodes/metadata/mime_type.rs create mode 100644 src/filesystem/nodes/metadata/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 8881738..94f8f47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -258,6 +258,8 @@ dependencies = [ "futures", "getrandom", "js-sys", + "mime 0.4.0-a.0", + "mime_guess", "p384", "rand", "rand_chacha", @@ -1093,13 +1095,27 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime" +version = "0.4.0-a.0" +source = "git+https://github.com/PlamenHristov/mime.git?rev=c30e3db#c30e3db1b4414036baaac50e0914355385f56ec1" +dependencies = [ + "mime-parse", + "quoted-string", +] + +[[package]] +name = "mime-parse" +version = "0.0.0" +source = "git+https://github.com/PlamenHristov/mime.git?rev=c30e3db#c30e3db1b4414036baaac50e0914355385f56ec1" + [[package]] name = "mime_guess" version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" dependencies = [ - "mime", + "mime 0.3.17", "unicase", ] @@ -1433,6 +1449,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.36" @@ -1442,6 +1464,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "quoted-string" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9586f8867f39941d8e796c18340a9cb5221a018df021169dc3e61c87d9f5f567" +dependencies = [ + "quick-error", +] + [[package]] name = "rand" version = "0.8.5" @@ -1544,7 +1575,7 @@ dependencies = [ "ipnet", "js-sys", "log", - "mime", + "mime 0.3.17", "mime_guess", "once_cell", "percent-encoding", diff --git a/Cargo.toml b/Cargo.toml index 414fc07..021207d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,10 @@ reqwest = { version = "^0.12", default-features = false, optional = true, featur serde = { version = "^1", features = ["derive"], optional = true } serde_json = { version = "^1", optional = true } url = { version = "^2", optional = true } +#mime = "0.3.17" +mime = {git = "https://github.com/PlamenHristov/mime.git", rev = "c30e3db"} +mime_guess = "2.0.4" + [[example]] name = "full_fs_exercise" diff --git a/src/filesystem/drive/directory_entry.rs b/src/filesystem/drive/directory_entry.rs index 2bcbd87..ed03f47 100644 --- a/src/filesystem/drive/directory_entry.rs +++ b/src/filesystem/drive/directory_entry.rs @@ -14,6 +14,8 @@ pub struct DirectoryEntry { name: NodeName, kind: NodeKind, + mime_type: Option, + size: u64, } @@ -48,6 +50,17 @@ impl DirectoryEntry { pub fn size(&self) -> u64 { self.size } + + pub fn mime_type(&self) -> Option { + match self.kind { + NodeKind::File => self.mime_type.clone(), + NodeKind::Directory => None, + NodeKind::AssociatedData => None, + NodeKind::InternalLink => None, + NodeKind::NativeMount => None, + NodeKind::Unknown(_) => None, + } + } } impl TryFrom<&Node> for DirectoryEntry { @@ -63,6 +76,8 @@ impl TryFrom<&Node> for DirectoryEntry { name: node.name().clone(), kind: node.kind().clone(), + mime_type: node.mime_type(), + size: node.size(), }) } diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index 1dc00db..0db16bb 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -13,7 +13,7 @@ use crate::codec::crypto::{AccessKey, SigningKey}; use crate::codec::data_storage::{data_chunk::DataChunk, DataBlock}; use crate::codec::filesystem::BlockKind; use crate::filesystem::drive::{DirectoryEntry, InnerDrive, OperationError, WalkState}; -use crate::filesystem::nodes::{Node, NodeData, NodeId, NodeName}; +use crate::filesystem::nodes::{MetadataKey, MimeGuesser, Node, NodeData, NodeId, NodeName}; use crate::filesystem::{ContentLocation, ContentReference, FileContent, NodeBuilder}; use crate::stores::DataStore; @@ -65,7 +65,7 @@ impl DirectoryHandle { } /// Changes the permission on the target node. Currently not implemented and changes are - /// expected to combine the [`FilePermissions`] with the [`crate::codec::filesystem::DirectoryPermissions`] all at once. + /// expected to combine the [`FilePermissions`] with the ::new(crate::codec::filesystem::DirectoryPermissions`] all at once. pub async fn chmod( &self, _path: &[&str], @@ -693,8 +693,19 @@ impl DirectoryHandle { let mut inner_write = self.inner.write().await; let node = inner_write.by_perm_id_mut(&new_permanent_id).await?; - let node_data = node.data_mut().await; + let mime_type = { + let node_name = node.name().clone(); + MimeGuesser::default() + .with_name(node_name) + // .with_data(content_references.map()) + .guess_mime_type() + }; + if let Some(mime_type) = mime_type { + node.set_attribute(MetadataKey::MimeType, mime_type.to_string().into()) + .await; + } + let node_data = node.data_mut().await; let file_content = FileContent::encrypted(locked_key, plaintext_cid, data_size, content_references); *node_data = NodeData::full_file(file_content); diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs new file mode 100644 index 0000000..53c34dd --- /dev/null +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -0,0 +1,212 @@ +use crate::prelude::nodes::NodeName; + + +#[derive(Default)] +pub struct MimeGuesser { + name: Option, + data: Vec, +} + +impl MimeGuesser { + const MP3_RATES: [u32; 15] = [ + 0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, + 256000, 320000, + ]; + + const MP25_RATES: [u32; 15] = [ + 0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, + 144000, 160000, + ]; + + const SAMPLE_RATES: [u32; 3] = [44100, 48000, 32000]; + + pub fn with_name(mut self, name: NodeName) -> Self { + match name { + NodeName::Named(name) => self.name = Some(name.clone()), + NodeName::Root => {} + } + self + } + + pub fn with_data(mut self, data: &[u8]) -> Self { + self.data.extend_from_slice(data); + self + } + + pub fn guess_mime_type(&self) -> Option { + self.pattern_match() + .or_else(|| self.algorithm_match()) + .or_else(|| self.extension_match()) + } + + fn extension_match(&self) -> Option { + let guess = mime_guess::get_mime_extensions_str( + self.name.as_ref().map_or("", |name| name.as_str()), + ); + if let Some(guess) = guess { + return mime::MediaType::parse(*guess.first()?).ok(); + } + None + } + + fn pattern_match(&self) -> Option { + let magic_bytes = &self.data.get(0..34)?; + + // Taken from https://mimesniff.spec.whatwg.org/ + match magic_bytes { + [0xFF, 0xD8, 0xFF, ..] => Some(mime::IMAGE_JPEG), + [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] => Some(mime::IMAGE_PNG), + [0x47, 0x49, 0x46, 0x38, 0x37, 0x61, ..] | [0x47, 0x49, 0x46, 0x38, 0x39, 0x61, ..] => { + Some(mime::IMAGE_GIF) + } + [0x42, 0x4D, ..] => Some(mime::IMAGE_BMP), + [0x3C, 0x3F, 0x78, 0x6D, 0x6C, ..] => Some(mime::TEXT_XML), + [0x3C, 0x73, 0x76, 0x67, ..] => Some(mime::IMAGE_SVG), + [0x77, 0x4F, 0x46, 0x46, ..] => Some(mime::FONT_WOFF), + [0x77, 0x4F, 0x46, 0x32, ..] => Some(mime::FONT_WOFF2), + [0x25, 0x50, 0x44, 0x46, 0x2D, ..] => Some(mime::APPLICATION_PDF), + [0x7B, ..] => Some(mime::APPLICATION_JSON), + [0x46, 0x4F, 0x52, 0x4D, _, _, _, _, 0x41, 0x49, 0x46, 0x46, ..] => { + Some(mime::AUDIO_AIFF) + } + [0x49, 0x44, 0x33, ..] => Some(mime::AUDIO_MPEG), + [0x4F, 0x67, 0x67, 0x53, 0x00, ..] => Some(mime::AUDIO_OGG), + [0x4D, 0x54, 0x68, 0x64, 0x00, 0x00, 0x00, 0x06, ..] => Some(mime::AUDIO_MIDI), + [0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x41, 0x56, 0x49, 0x20, ..] => { + Some(mime::VIDEO_AVI) + } + [0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x57, 0x41, 0x56, 0x45, ..] => { + Some(mime::AUDIO_WAVE) + } + [0x1F, 0x8B, 0x08, ..] => Some(mime::APPLICATION_GZIP), + [0x50, 0x4B, 0x03, 0x04, ..] => Some(mime::APPLICATION_ZIP), + [0x52, 0x61, 0x72, 0x20, 0x1A, 0x07, 0x00, ..] => Some(mime::APPLICATION_RAR), + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x50, ..] => { + Some(mime::APPLICATION_VND_MS_FONTOBJECT) + } + [0x00, 0x01, 0x00, 0x00, ..] => Some(mime::FONT_TTF), + [0x4F, 0x54, 0x54, 0x4F, ..] => Some(mime::FONT_OTF), + [0x74, 0x74, 0x63, 0x66, ..] => Some(mime::FONT_COLLECTION), + [0x25, 0x21, 0x50, 0x53, 0x2D, 0x41, 0x64, 0x6F, 0x62, 0x65, 0x2D, ..] => { + Some(mime::APPLICATION_POSTSCRIPT) + } + [0xFE, 0xFF, 0x00, 0x00, ..] + | [0xFF, 0xFE, 0x00, 0x00, ..] + | [0xEF, 0xBB, 0xBF, 0x00, ..] => Some(mime::TEXT_PLAIN), + // TODO: And the mask + [0x3C, 0x21, 0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45, 0x20, 0x48, 0x54, 0x4D, 0x4C, ..] + | [0x3C, 0x53, 0x43, 0x52, 0x49, 0x50, 0x54, ..] + | [0x3C, 0x49, 0x46, 0x52, 0x41, 0x4D, 0x45, ..] + | [0x3C, 0x54, 0x41, 0x42, 0x4C, 0x45, ..] + | [0x3C, 0x53, 0x54, 0x59, 0x4C, 0x45, ..] + | [0x3C, 0x54, 0x49, 0x54, 0x4C, 0x45, ..] + | [0x3C, 0x48, 0x45, 0x41, 0x44, ..] + | [0x3C, 0x48, 0x54, 0x4D, 0x4C, ..] + | [0x3C, 0x46, 0x4F, 0x4E, 0x54, ..] + | [0x3C, 0x42, 0x4F, 0x44, 0x59, ..] + | [0x3C, 0x44, 0x49, 0x56, ..] + | [0x3C, 0x21, 0x2D, 0x2D, ..] + | [0x3C, 0x48, 0x31, ..] + | [0x3C, 0x42, 0x52, ..] + | [0x3C, 0x41, ..] + | [0x3C, 0x42, ..] + | [0x3C, 0x50, ..] => Some(mime::TEXT_HTML), + _ => None, + } + } + + fn algorithm_match(&self) -> Option { + if self.is_mp4() == Some(mime::AUDIO_MP4) { + return Some(mime::AUDIO_MP4); + } + if self.is_mp3() { + return Some(mime::AUDIO_MPEG); + } + None + } + fn is_mp4(&self) -> Option { + let length = self.data.len(); + if length < 12 { + return None; + } + let box_size = u32::from_be_bytes([self.data[0], self.data[1], self.data[2], self.data[3]]); + if length < box_size as usize || box_size % 4 != 0 { + return None; + } + if self.data[4..8] != [0x66, 0x74, 0x79, 0x70] { + return None; + } + if self.data[8..11] == [0x6D, 0x70, 0x34] { + return Some(mime::AUDIO_MP4); + } + let mut bytes_read = 16; + while bytes_read < box_size as usize { + if self.data[bytes_read..bytes_read + 3] == [0x6D, 0x70, 0x34] { + return Some(mime::AUDIO_MP4); + } + bytes_read += 4; + } + None + } + + fn is_mp3(&self) -> bool { + let sequence = &self.data; + let length = sequence.len(); + let mut s = 0; + + if !match_mp3_header(sequence, s) { + return false; + } + + let (version, bitrate_index, samplerate_index, pad) = parse_mp3_frame(sequence, s); + let bitrate = if version & 0x01 != 0 { + MimeGuesser::MP25_RATES[bitrate_index as usize] + } else { + MimeGuesser::MP3_RATES[bitrate_index as usize] + }; + let sample_rate = MimeGuesser::SAMPLE_RATES[samplerate_index as usize]; + let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad); + + if skipped_bytes < 4 || skipped_bytes > length - s { + return false; + } + s += skipped_bytes; + + if !match_mp3_header(sequence, s) { + return false; + } + + true + } +} + +fn match_mp3_header(sequence: &[u8], s: usize) -> bool { + let length = sequence.len(); + if length - s < 4 { + return false; + } + + sequence[s] == 0xff + && sequence[s + 1] & 0xe0 == 0xe0 + && (sequence[s + 1] & 0x06 >> 1) != 0 + && (sequence[s + 2] & 0xf0 >> 4) != 15 + && (sequence[s + 2] & 0x0c >> 2) != 3 + && (4 - (sequence[s + 1] & 0x06 >> 1)) == 3 +} + +fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) { + let version = sequence[s + 1] & 0x18 >> 3; + let bitrate_index = sequence[s + 2] & 0xf0 >> 4; + let samplerate_index = sequence[s + 2] & 0x0c >> 2; + let pad = sequence[s + 2] & 0x02 >> 1; + (version, bitrate_index, samplerate_index, pad) +} + +fn compute_mp3_frame_size(version: u8, bitrate: u32, samplerate: u32, pad: u8) -> usize { + let scale = if version == 1 { 72 } else { 144 }; + let mut size = (bitrate as usize * scale / samplerate as usize) as usize; + if pad != 0 { + size += 1; + } + size +} diff --git a/src/filesystem/nodes/metadata/mod.rs b/src/filesystem/nodes/metadata/mod.rs new file mode 100644 index 0000000..29bb6e9 --- /dev/null +++ b/src/filesystem/nodes/metadata/mod.rs @@ -0,0 +1,41 @@ +use std::str::FromStr; +mod mime_type; + +pub use mime_type::MimeGuesser; + +#[derive(Hash, Eq, PartialEq)] +pub enum MetadataKey { + MimeType, +} + +impl MetadataKey { + pub fn as_str(&self) -> &'static str { + match self { + MetadataKey::MimeType => "mime", + } + } + + pub fn as_bytes(&self) -> &'static [u8] { + match self { + MetadataKey::MimeType => b"mime", + } + } + + pub fn from_bytes(key: &[u8]) -> Option { + match key { + b"mime" => Some(MetadataKey::MimeType), + _ => None, + } + } +} + +impl FromStr for MetadataKey { + type Err = winnow::error::ErrorKind; + + fn from_str(s: &str) -> Result { + match s { + "mime" => Ok(MetadataKey::MimeType), + _ => Err(winnow::error::ErrorKind::Token), + } + } +} diff --git a/src/filesystem/nodes/mod.rs b/src/filesystem/nodes/mod.rs index 93bb1fb..98fd983 100644 --- a/src/filesystem/nodes/mod.rs +++ b/src/filesystem/nodes/mod.rs @@ -13,6 +13,7 @@ //! guarantee the major version will be increased when a breaking change is made). mod cid_cache; +mod metadata; mod node_builder; mod node_data; mod node_name; @@ -27,6 +28,7 @@ use std::collections::HashMap; use std::io::{Error as StdError, ErrorKind as StdErrorKind}; use futures::{AsyncWrite, AsyncWriteExt}; +use mime; use winnow::binary::{le_i64, le_u32, le_u8}; use winnow::stream::Offset; use winnow::token::take; @@ -36,6 +38,7 @@ use crate::codec::filesystem::NodeKind; use crate::codec::meta::{ActorId, Cid, PermanentId}; use crate::codec::{ParserResult, Stream, VectorClock}; use crate::filesystem::drive::OperationError; +pub use crate::prelude::nodes::metadata::{MetadataKey, MimeGuesser}; pub(crate) type NodeId = usize; @@ -75,7 +78,7 @@ pub struct Node { modified_at: i64, name: NodeName, - metadata: HashMap>, + metadata: HashMap>, inner: NodeData, } @@ -250,7 +253,7 @@ impl Node { self.inner.kind() } - pub fn metadata(&self) -> &HashMap> { + pub fn metadata(&self) -> &HashMap> { &self.metadata } @@ -293,7 +296,7 @@ impl Node { encoded_size += self .metadata() .iter() - .map(|(k, v)| (2 + k.as_bytes().len() + v.len()) as u64) + .map(|(k, v)| (2 + k.as_str().len() + v.len()) as u64) .sum::(); encoded_size @@ -355,18 +358,17 @@ impl Node { for _ in 0..metadata_entries { let (meta_buf, key_len) = le_u8.parse_peek(input)?; let (meta_buf, key) = take(key_len).parse_peek(meta_buf)?; - let key_str = String::from_utf8(key.to_vec()).map_err(|_| { + let key_metadata = MetadataKey::from_bytes(key).ok_or_else(|| { winnow::error::ErrMode::Cut(winnow::error::ParserError::from_error_kind( &input, winnow::error::ErrorKind::Token, )) })?; - let (meta_buf, val_len) = le_u8.parse_peek(meta_buf)?; let (meta_buf, val) = take(val_len).parse_peek(meta_buf)?; let val = val.to_vec(); - metadata.insert(key_str, val); + metadata.insert(key_metadata, val); input = meta_buf; } @@ -440,7 +442,15 @@ impl Node { self.permanent_id } - pub async fn set_attribute(&mut self, key: String, value: Vec) -> Option> { + pub fn mime_type(&self) -> Option { + self.metadata + .get(&MetadataKey::MimeType) + .and_then(|mime_str| { + mime::MediaType::parse(std::str::from_utf8(mime_str).unwrap()).ok() + }) + } + + pub async fn set_attribute(&mut self, key: MetadataKey, value: Vec) -> Option> { let old_value = self.metadata.insert(key, value); self.notify_of_change().await; old_value diff --git a/src/filesystem/nodes/node_builder.rs b/src/filesystem/nodes/node_builder.rs index 181a222..43c95e4 100644 --- a/src/filesystem/nodes/node_builder.rs +++ b/src/filesystem/nodes/node_builder.rs @@ -7,6 +7,7 @@ use crate::codec::meta::{ActorId, VectorClock}; use crate::filesystem::nodes::{ CidCache, Node, NodeData, NodeId, NodeName, NodeNameError, PermanentId, }; +use crate::prelude::nodes::metadata::MetadataKey; pub(crate) struct NodeBuilder { id: Option, @@ -17,7 +18,7 @@ pub(crate) struct NodeBuilder { size_hint: Option, kind: NodeKind, - metadata: HashMap>, + metadata: HashMap>, } impl NodeBuilder { From 8bc6ab87e766745a05904e17eaa81cf39ae712af Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Thu, 6 Jun 2024 16:52:40 +0300 Subject: [PATCH 02/12] Some refactoring and improvements. --- src/codec/crypto/authentication_tag.rs | 2 +- src/codec/crypto/nonce.rs | 2 +- src/codec/data_storage/data_block.rs | 4 +- .../data_storage/encrypted_data_chunk.rs | 4 +- src/filesystem/drive/directory_handle.rs | 81 ++++++-- src/filesystem/drive/inner.rs | 14 +- src/filesystem/nodes/metadata/mime_type.rs | 188 +++++++++--------- src/filesystem/nodes/metadata/mod.rs | 2 +- 8 files changed, 169 insertions(+), 128 deletions(-) diff --git a/src/codec/crypto/authentication_tag.rs b/src/codec/crypto/authentication_tag.rs index bf14288..3bb374d 100644 --- a/src/codec/crypto/authentication_tag.rs +++ b/src/codec/crypto/authentication_tag.rs @@ -18,7 +18,7 @@ impl AuthenticationTag { } pub fn from_bytes(data: &[u8; TAG_LENGTH]) -> Self { - Self(data.clone()) + Self(*data) } pub async fn encode( diff --git a/src/codec/crypto/nonce.rs b/src/codec/crypto/nonce.rs index dc7bb7c..9a8d978 100644 --- a/src/codec/crypto/nonce.rs +++ b/src/codec/crypto/nonce.rs @@ -18,7 +18,7 @@ impl Nonce { } pub fn from_bytes(data: &[u8; NONCE_LENGTH]) -> Self { - Self(data.clone()) + Self(*data) } pub(crate) async fn encode( diff --git a/src/codec/data_storage/data_block.rs b/src/codec/data_storage/data_block.rs index 565e319..ab63e8d 100644 --- a/src/codec/data_storage/data_block.rs +++ b/src/codec/data_storage/data_block.rs @@ -122,7 +122,7 @@ impl DataBlock { self.contents.len() >= self.data_options.chunk_count().into() } - pub fn parse<'a>(input: Stream<'a>) -> ParserResult<'a, Self> { + pub fn parse(input: Stream<'_>) -> ParserResult<'_, Self> { let (input, version) = le_u8.parse_peek(input)?; if version != 0x01 { @@ -172,7 +172,7 @@ impl DataBlock { Ok((input, block)) } - pub fn parse_with_magic<'a>(input: Stream<'a>) -> ParserResult<'a, Self> { + pub fn parse_with_magic(input: Stream<'_>) -> ParserResult<'_, Self> { let (input, _magic) = banyan_data_magic_tag(input)?; Self::parse(input) } diff --git a/src/codec/data_storage/encrypted_data_chunk.rs b/src/codec/data_storage/encrypted_data_chunk.rs index bf36a04..462c708 100644 --- a/src/codec/data_storage/encrypted_data_chunk.rs +++ b/src/codec/data_storage/encrypted_data_chunk.rs @@ -47,8 +47,8 @@ impl EncryptedDataChunk { Ok((self.0.len(), cid)) } - pub fn decrypt<'a>( - &'a self, + pub fn decrypt( + &self, options: &DataOptions, access_key: &AccessKey, ) -> Result { diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index 0db16bb..bbed2ea 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -693,18 +693,14 @@ impl DirectoryHandle { let mut inner_write = self.inner.write().await; let node = inner_write.by_perm_id_mut(&new_permanent_id).await?; - let mime_type = { - let node_name = node.name().clone(); - MimeGuesser::default() - .with_name(node_name) - // .with_data(content_references.map()) - .guess_mime_type() - }; - if let Some(mime_type) = mime_type { + if let Some(mime_type) = MimeGuesser::default() + .with_name(node.name().clone()) + .with_data(data) + .guess_mime_type() + { node.set_attribute(MetadataKey::MimeType, mime_type.to_string().into()) .await; } - let node_data = node.data_mut().await; let file_content = FileContent::encrypted(locked_key, plaintext_cid, data_size, content_references); @@ -778,12 +774,13 @@ fn walk_path<'a>( mod test { use super::*; use crate::filesystem::drive::inner::test::build_interesting_inner; + use crate::prelude::MemoryDataStore; #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_cwd_specify_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv(&mut rng, &["dir_1", "dir_2"], &["dir_2_new"]) .await @@ -803,7 +800,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_dir_specify_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv( &mut rng, @@ -827,7 +824,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_file_from_dir_to_cwd_specify_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv( &mut rng, @@ -851,7 +848,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_file_from_dir_to_dir_specify_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv( &mut rng, @@ -875,7 +872,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_cwd_no_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle.mv(&mut rng, &["dir_1", "dir_2"], &[]).await.unwrap(); let cwd_ls = handle.ls(&[]).await.unwrap(); @@ -892,7 +889,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_dir_no_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv(&mut rng, &["dir_1", "dir_2", "dir_3"], &["dir_1"]) .await @@ -912,7 +909,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_file_from_dir_to_cwd_no_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv(&mut rng, &["dir_1", "dir_2", "dir_3", "file_3"], &[]) .await @@ -932,7 +929,7 @@ mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_file_from_dir_to_dir_no_name() { let mut rng = crate::utils::crypto_rng(); - let mut handle = interesting_handle().await; + let mut handle = interesting_handle(None).await; handle .mv(&mut rng, &["dir_1", "dir_2", "dir_3", "file_3"], &["dir_1"]) .await @@ -948,7 +945,7 @@ mod test { ); } - async fn interesting_handle() -> DirectoryHandle { + async fn interesting_handle(current_key: Option) -> DirectoryHandle { // -----file_1 // / // root ---------file_2 @@ -959,13 +956,57 @@ mod test { // \ // ----file_5 let mut rng = crate::utils::crypto_rng(); - let inner = build_interesting_inner().await; + let inner = build_interesting_inner(current_key.clone()).await; let root_id = inner.root_node().unwrap().id(); let inner = Arc::new(RwLock::new(inner)); DirectoryHandle { - current_key: Arc::new(SigningKey::generate(&mut rng)), + current_key: Arc::new(current_key.unwrap_or_else(|| SigningKey::generate(&mut rng))), inner, cwd_id: root_id, } } + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn write_file_with_html_tags() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + + let test_cases = vec![ + // (b"Test File

Hello World!

".to_vec(), "test.html"), + // (b"Test File

Hello World!

".to_vec(), "TEST.HTML"), + // (b"

Heading

Paragraph

".to_vec(), "file.htm"), + (b"
Some text
".to_vec(), "page.php"), + // (b"Content".to_vec(), "invalid_file_name"), + ]; + for (data, file_name) in test_cases { + handle + .write(&mut rng, &mut store, &[file_name], &data) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), data); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "text/html"); + } + } } diff --git a/src/filesystem/drive/inner.rs b/src/filesystem/drive/inner.rs index 205fd90..34b830c 100644 --- a/src/filesystem/drive/inner.rs +++ b/src/filesystem/drive/inner.rs @@ -494,10 +494,10 @@ pub(crate) mod test { use super::*; - fn initialize_inner_drive() -> (ActorId, InnerDrive) { + fn initialize_inner_drive(signing_key: Option) -> (ActorId, InnerDrive) { let mut rng = crate::utils::crypto_rng(); - let signing_key = SigningKey::generate(&mut rng); + let signing_key = signing_key.unwrap_or_else(|| SigningKey::generate(&mut rng)); let verifying_key = signing_key.verifying_key(); let actor_id = verifying_key.actor_id(); @@ -511,7 +511,7 @@ pub(crate) mod test { #[test] fn test_drive_initialization() { - let (_, inner) = initialize_inner_drive(); + let (_, inner) = initialize_inner_drive(None); assert!(inner.nodes.capacity() == 32); assert!(inner.nodes.len() == 1); } @@ -520,7 +520,7 @@ pub(crate) mod test { #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn test_node_creation() { let mut rng = crate::utils::crypto_rng(); - let (actor_id, mut inner) = initialize_inner_drive(); + let (actor_id, mut inner) = initialize_inner_drive(None); let create_node_res = inner .create_node( @@ -549,7 +549,7 @@ pub(crate) mod test { #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn test_drive_round_tripping() { - let inner = build_interesting_inner().await; + let inner = build_interesting_inner(None).await; let access = inner.access(); let journal = inner.journal_start(); @@ -570,9 +570,9 @@ pub(crate) mod test { } // A fixture to make a relatively interesting inner - pub(crate) async fn build_interesting_inner() -> InnerDrive { + pub(crate) async fn build_interesting_inner(current_key: Option) -> InnerDrive { let mut rng = crate::utils::crypto_rng(); - let (actor_id, mut inner) = initialize_inner_drive(); + let (actor_id, mut inner) = initialize_inner_drive(current_key); // -----file_1 // / diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index 53c34dd..db3c569 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -1,6 +1,5 @@ use crate::prelude::nodes::NodeName; - #[derive(Default)] pub struct MimeGuesser { name: Option, @@ -40,83 +39,91 @@ impl MimeGuesser { } fn extension_match(&self) -> Option { - let guess = mime_guess::get_mime_extensions_str( - self.name.as_ref().map_or("", |name| name.as_str()), - ); - if let Some(guess) = guess { - return mime::MediaType::parse(*guess.first()?).ok(); + if let Some(name) = self.name.as_ref() { + let guess = mime_guess::from_path(name); + if !guess.is_empty() { + return mime::MediaType::parse(guess.first()?.as_ref()).ok(); + } } None } fn pattern_match(&self) -> Option { - let magic_bytes = &self.data.get(0..34)?; + let magic_bytes = &self.data[..]; // Taken from https://mimesniff.spec.whatwg.org/ match magic_bytes { - [0xFF, 0xD8, 0xFF, ..] => Some(mime::IMAGE_JPEG), - [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] => Some(mime::IMAGE_PNG), - [0x47, 0x49, 0x46, 0x38, 0x37, 0x61, ..] | [0x47, 0x49, 0x46, 0x38, 0x39, 0x61, ..] => { - Some(mime::IMAGE_GIF) - } - [0x42, 0x4D, ..] => Some(mime::IMAGE_BMP), - [0x3C, 0x3F, 0x78, 0x6D, 0x6C, ..] => Some(mime::TEXT_XML), - [0x3C, 0x73, 0x76, 0x67, ..] => Some(mime::IMAGE_SVG), - [0x77, 0x4F, 0x46, 0x46, ..] => Some(mime::FONT_WOFF), - [0x77, 0x4F, 0x46, 0x32, ..] => Some(mime::FONT_WOFF2), - [0x25, 0x50, 0x44, 0x46, 0x2D, ..] => Some(mime::APPLICATION_PDF), - [0x7B, ..] => Some(mime::APPLICATION_JSON), - [0x46, 0x4F, 0x52, 0x4D, _, _, _, _, 0x41, 0x49, 0x46, 0x46, ..] => { + &[0xFF, 0xD8, 0xFF, ..] => Some(mime::IMAGE_JPEG), + &[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, ..] => Some(mime::IMAGE_PNG), + &[b'G', b'I', b'F', b'8', b'7', b'a', ..] + | &[b'G', b'I', b'F', b'8', b'9', b'a', ..] => Some(mime::IMAGE_GIF), + &[b'B', b'M', ..] => Some(mime::IMAGE_BMP), + &[b'<', b'?', b'x', b'm', b'l', ..] => Some(mime::TEXT_XML), + &[b'<', b's', b'v', b'g', ..] => Some(mime::IMAGE_SVG), + &[b'w', b'O', b'F', b'F', ..] => Some(mime::FONT_WOFF), + &[b'w', b'O', b'F', b'2', ..] => Some(mime::FONT_WOFF2), + &[b'%', b'P', b'D', b'F', b'-', ..] => Some(mime::APPLICATION_PDF), + &[b'{', ..] => Some(mime::APPLICATION_JSON), + &[b'F', b'O', b'R', b'M', _, _, _, _, b'A', b'I', b'F', b'F', ..] => { Some(mime::AUDIO_AIFF) } - [0x49, 0x44, 0x33, ..] => Some(mime::AUDIO_MPEG), - [0x4F, 0x67, 0x67, 0x53, 0x00, ..] => Some(mime::AUDIO_OGG), - [0x4D, 0x54, 0x68, 0x64, 0x00, 0x00, 0x00, 0x06, ..] => Some(mime::AUDIO_MIDI), - [0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x41, 0x56, 0x49, 0x20, ..] => { + &[b'I', b'D', b'3', ..] => Some(mime::AUDIO_MPEG), + &[b'O', b'g', b'g', b'S', 0, ..] => Some(mime::AUDIO_OGG), + &[b'M', b'T', b'h', b'd', 0, 0, 0, 0x06, ..] => Some(mime::AUDIO_MIDI), + &[b'R', b'I', b'F', b'F', _, _, _, _, b'A', b'V', b'I', b' ', ..] => { Some(mime::VIDEO_AVI) } - [0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x57, 0x41, 0x56, 0x45, ..] => { + &[b'R', b'I', b'F', b'F', _, _, _, _, b'W', b'A', b'V', b'E', ..] => { Some(mime::AUDIO_WAVE) } - [0x1F, 0x8B, 0x08, ..] => Some(mime::APPLICATION_GZIP), - [0x50, 0x4B, 0x03, 0x04, ..] => Some(mime::APPLICATION_ZIP), - [0x52, 0x61, 0x72, 0x20, 0x1A, 0x07, 0x00, ..] => Some(mime::APPLICATION_RAR), - [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x50, ..] => { + &[0x1F, 0x8B, 0x08, ..] => Some(mime::APPLICATION_GZIP), + &[b'P', b'K', 0x03, 0x04, ..] => Some(mime::APPLICATION_ZIP), + &[b'R', b'a', b'r', b' ', 0x1A, 0x07, 0, ..] => Some(mime::APPLICATION_RAR), + &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, b'L', b'P'] => { Some(mime::APPLICATION_VND_MS_FONTOBJECT) } - [0x00, 0x01, 0x00, 0x00, ..] => Some(mime::FONT_TTF), - [0x4F, 0x54, 0x54, 0x4F, ..] => Some(mime::FONT_OTF), - [0x74, 0x74, 0x63, 0x66, ..] => Some(mime::FONT_COLLECTION), - [0x25, 0x21, 0x50, 0x53, 0x2D, 0x41, 0x64, 0x6F, 0x62, 0x65, 0x2D, ..] => { + &[0, 0x01, 0, 0, ..] => Some(mime::FONT_TTF), + &[b'O', b'T', b'T', b'O', ..] => Some(mime::FONT_OTF), + &[b't', b't', b'c', b'f', ..] => Some(mime::FONT_COLLECTION), + &[b'%', b'!', b'P', b'S', b'-', b'A', b'd', b'o', b'b', b'e', b'-', ..] => { Some(mime::APPLICATION_POSTSCRIPT) } - [0xFE, 0xFF, 0x00, 0x00, ..] - | [0xFF, 0xFE, 0x00, 0x00, ..] - | [0xEF, 0xBB, 0xBF, 0x00, ..] => Some(mime::TEXT_PLAIN), - // TODO: And the mask - [0x3C, 0x21, 0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45, 0x20, 0x48, 0x54, 0x4D, 0x4C, ..] - | [0x3C, 0x53, 0x43, 0x52, 0x49, 0x50, 0x54, ..] - | [0x3C, 0x49, 0x46, 0x52, 0x41, 0x4D, 0x45, ..] - | [0x3C, 0x54, 0x41, 0x42, 0x4C, 0x45, ..] - | [0x3C, 0x53, 0x54, 0x59, 0x4C, 0x45, ..] - | [0x3C, 0x54, 0x49, 0x54, 0x4C, 0x45, ..] - | [0x3C, 0x48, 0x45, 0x41, 0x44, ..] - | [0x3C, 0x48, 0x54, 0x4D, 0x4C, ..] - | [0x3C, 0x46, 0x4F, 0x4E, 0x54, ..] - | [0x3C, 0x42, 0x4F, 0x44, 0x59, ..] - | [0x3C, 0x44, 0x49, 0x56, ..] - | [0x3C, 0x21, 0x2D, 0x2D, ..] - | [0x3C, 0x48, 0x31, ..] - | [0x3C, 0x42, 0x52, ..] - | [0x3C, 0x41, ..] - | [0x3C, 0x42, ..] - | [0x3C, 0x50, ..] => Some(mime::TEXT_HTML), + &[0xFE, 0xFF, 0, 0, ..] | &[0xFF, 0xFE, 0, 0, ..] | &[0xEF, 0xBB, 0xBF, 0, ..] => { + Some(mime::TEXT_PLAIN) + } + [b'<', _, _, _, _, ..] => { + // case-insensitive match + match &magic_bytes[1..5] + .iter() + .map(|b| b & 0xDF) + .collect::>()[..] + { + b"!DOC" => Some(mime::TEXT_HTML), + b"SCRI" => Some(mime::TEXT_HTML), + b"IFRA" => Some(mime::TEXT_HTML), + b"TABL" => Some(mime::TEXT_HTML), + b"STYL" => Some(mime::TEXT_HTML), + b"TITL" => Some(mime::TEXT_HTML), + b"HEAD" => Some(mime::TEXT_HTML), + b"HTML" => Some(mime::TEXT_HTML), + b"FONT" => Some(mime::TEXT_HTML), + b"BODY" => Some(mime::TEXT_HTML), + &[b'D', b'I', b'V', ..] => Some(mime::TEXT_HTML), + &[b'!', b'-', b'-', ..] => Some(mime::TEXT_HTML), + &[b'H', b'1', ..] => Some(mime::TEXT_HTML), + &[b'B', b'R', ..] => Some(mime::TEXT_HTML), + &[b'A', ..] => Some(mime::TEXT_HTML), + &[b'B', ..] => Some(mime::TEXT_HTML), + &[b'P', ..] => Some(mime::TEXT_HTML), + _ => None, + } + } _ => None, } } fn algorithm_match(&self) -> Option { - if self.is_mp4() == Some(mime::AUDIO_MP4) { + if self.is_mp4() { return Some(mime::AUDIO_MP4); } if self.is_mp3() { @@ -124,59 +131,52 @@ impl MimeGuesser { } None } - fn is_mp4(&self) -> Option { - let length = self.data.len(); - if length < 12 { - return None; - } - let box_size = u32::from_be_bytes([self.data[0], self.data[1], self.data[2], self.data[3]]); - if length < box_size as usize || box_size % 4 != 0 { - return None; + + fn is_mp4(&self) -> bool { + let data = &self.data; + if data.len() < 12 { + return false; } - if self.data[4..8] != [0x66, 0x74, 0x79, 0x70] { - return None; + let box_size = u32::from_be_bytes([data[0], data[1], data[2], data[3]]); + if data.len() < box_size as usize || box_size % 4 != 0 { + return false; } - if self.data[8..11] == [0x6D, 0x70, 0x34] { - return Some(mime::AUDIO_MP4); + if &data[4..8] != b"ftyp" { + return false; } - let mut bytes_read = 16; - while bytes_read < box_size as usize { - if self.data[bytes_read..bytes_read + 3] == [0x6D, 0x70, 0x34] { - return Some(mime::AUDIO_MP4); - } - bytes_read += 4; + if &data[8..11] == b"mp4" { + return true; } - None + data[16..] + .chunks_exact(4) + .take_while(|chunk| &chunk[..3] != b"mp4") + .last() + .map_or(false, |chunk| &chunk[..3] == b"mp4") } fn is_mp3(&self) -> bool { - let sequence = &self.data; - let length = sequence.len(); - let mut s = 0; + let data = &self.data; + let mut offset = 0; - if !match_mp3_header(sequence, s) { + if !match_mp3_header(data, offset) { return false; } - let (version, bitrate_index, samplerate_index, pad) = parse_mp3_frame(sequence, s); + let (version, bitrate_index, sample_rate_index, pad) = parse_mp3_frame(data, offset); let bitrate = if version & 0x01 != 0 { - MimeGuesser::MP25_RATES[bitrate_index as usize] + Self::MP25_RATES[bitrate_index as usize] } else { - MimeGuesser::MP3_RATES[bitrate_index as usize] + Self::MP3_RATES[bitrate_index as usize] }; - let sample_rate = MimeGuesser::SAMPLE_RATES[samplerate_index as usize]; + let sample_rate = Self::SAMPLE_RATES[sample_rate_index as usize]; let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad); - if skipped_bytes < 4 || skipped_bytes > length - s { - return false; - } - s += skipped_bytes; - - if !match_mp3_header(sequence, s) { + if skipped_bytes < 4 || skipped_bytes > data.len() - offset { return false; } + offset += skipped_bytes; - true + match_mp3_header(data, offset) } } @@ -197,16 +197,16 @@ fn match_mp3_header(sequence: &[u8], s: usize) -> bool { fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) { let version = sequence[s + 1] & 0x18 >> 3; let bitrate_index = sequence[s + 2] & 0xf0 >> 4; - let samplerate_index = sequence[s + 2] & 0x0c >> 2; + let sample_rate_index = sequence[s + 2] & 0x0c >> 2; let pad = sequence[s + 2] & 0x02 >> 1; - (version, bitrate_index, samplerate_index, pad) + (version, bitrate_index, sample_rate_index, pad) } -fn compute_mp3_frame_size(version: u8, bitrate: u32, samplerate: u32, pad: u8) -> usize { +fn compute_mp3_frame_size(version: u8, bitrate: u32, sample_rate: u32, pad: u8) -> usize { let scale = if version == 1 { 72 } else { 144 }; - let mut size = (bitrate as usize * scale / samplerate as usize) as usize; + let mut size = bitrate * scale / sample_rate; if pad != 0 { size += 1; } - size + size as usize } diff --git a/src/filesystem/nodes/metadata/mod.rs b/src/filesystem/nodes/metadata/mod.rs index 29bb6e9..07ad8bb 100644 --- a/src/filesystem/nodes/metadata/mod.rs +++ b/src/filesystem/nodes/metadata/mod.rs @@ -3,7 +3,7 @@ mod mime_type; pub use mime_type::MimeGuesser; -#[derive(Hash, Eq, PartialEq)] +#[derive(Hash, Eq, PartialEq, Debug)] pub enum MetadataKey { MimeType, } From 812d13c811df82b9a6ae520cedeaecad55f90f67 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Thu, 6 Jun 2024 16:54:12 +0300 Subject: [PATCH 03/12] Fixing erroneous commit --- src/filesystem/drive/directory_handle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index bbed2ea..7d6f821 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -65,7 +65,7 @@ impl DirectoryHandle { } /// Changes the permission on the target node. Currently not implemented and changes are - /// expected to combine the [`FilePermissions`] with the ::new(crate::codec::filesystem::DirectoryPermissions`] all at once. + /// expected to combine the [`FilePermissions`] with the [`crate::codec::filesystem::DirectoryPermissions`] all at once. pub async fn chmod( &self, _path: &[&str], From 32a3dc0683f1ae85dee08d2e96811d71093880e0 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Thu, 6 Jun 2024 17:46:02 +0300 Subject: [PATCH 04/12] Made all tests pass --- src/filesystem/drive/directory_handle.rs | 11 +++-- src/filesystem/nodes/metadata/mime_type.rs | 49 +++++++++++----------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index 7d6f821..f396b49 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -974,11 +974,14 @@ mod test { let mut store = MemoryDataStore::default(); let test_cases = vec![ - // (b"Test File

Hello World!

".to_vec(), "test.html"), - // (b"Test File

Hello World!

".to_vec(), "TEST.HTML"), - // (b"

Heading

Paragraph

".to_vec(), "file.htm"), + (b"Test File

Hello World!

".to_vec(), "test.html"), + (b"Test File

Hello World!

".to_vec(), "TEST.HTML"), + (b"

Heading

Paragraph

".to_vec(), "file.htm"), (b"
Some text
".to_vec(), "page.php"), - // (b"Content".to_vec(), "invalid_file_name"), + ( + b"Content".to_vec(), + "invalid_file_name", + ), ]; for (data, file_name) in test_cases { handle diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index db3c569..5b16c38 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -91,30 +91,28 @@ impl MimeGuesser { &[0xFE, 0xFF, 0, 0, ..] | &[0xFF, 0xFE, 0, 0, ..] | &[0xEF, 0xBB, 0xBF, 0, ..] => { Some(mime::TEXT_PLAIN) } - [b'<', _, _, _, _, ..] => { - // case-insensitive match - match &magic_bytes[1..5] - .iter() - .map(|b| b & 0xDF) - .collect::>()[..] - { - b"!DOC" => Some(mime::TEXT_HTML), - b"SCRI" => Some(mime::TEXT_HTML), - b"IFRA" => Some(mime::TEXT_HTML), - b"TABL" => Some(mime::TEXT_HTML), - b"STYL" => Some(mime::TEXT_HTML), - b"TITL" => Some(mime::TEXT_HTML), - b"HEAD" => Some(mime::TEXT_HTML), - b"HTML" => Some(mime::TEXT_HTML), - b"FONT" => Some(mime::TEXT_HTML), - b"BODY" => Some(mime::TEXT_HTML), - &[b'D', b'I', b'V', ..] => Some(mime::TEXT_HTML), - &[b'!', b'-', b'-', ..] => Some(mime::TEXT_HTML), - &[b'H', b'1', ..] => Some(mime::TEXT_HTML), - &[b'B', b'R', ..] => Some(mime::TEXT_HTML), - &[b'A', ..] => Some(mime::TEXT_HTML), - &[b'B', ..] => Some(mime::TEXT_HTML), - &[b'P', ..] => Some(mime::TEXT_HTML), + [b'<', ..] => { + match &magic_bytes[1..] + .iter() + // check if it's [a-zA-Z] and only then apply the case-insensitive conversion + .map(|&b| b.to_ascii_uppercase()) + .collect::>().as_slice() { + [b'!', b'D', b'O', b'C', b'T', b'Y', b'P', b'E', b' ', b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'H', b'E', b'A', b'D', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'S', b'C', b'R', b'I', b'P', b'T', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'I', b'F', b'R', b'A', b'M', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'H', b'1', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'D', b'I', b'V', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'F', b'O', b'N', b'T', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'T', b'A', b'B', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'A', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'S', b'T', b'Y', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'T', b'I', b'T', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'B', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'B', b'O', b'D', b'Y', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'B', b'R', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'P', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), _ => None, } } @@ -210,3 +208,6 @@ fn compute_mp3_frame_size(version: u8, bitrate: u32, sample_rate: u32, pad: u8) } size as usize } +fn is_whitespace_or_tag_terminator(byte: u8) -> bool { + byte == b' ' || byte == b'>' +} From c1a49145519c6ea16248cd87ee1a8fbca3d766dc Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Thu, 6 Jun 2024 17:46:18 +0300 Subject: [PATCH 05/12] Removed comment --- src/filesystem/nodes/metadata/mime_type.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index 5b16c38..f1bc241 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -94,7 +94,6 @@ impl MimeGuesser { [b'<', ..] => { match &magic_bytes[1..] .iter() - // check if it's [a-zA-Z] and only then apply the case-insensitive conversion .map(|&b| b.to_ascii_uppercase()) .collect::>().as_slice() { [b'!', b'D', b'O', b'C', b'T', b'Y', b'P', b'E', b' ', b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), From a956ee0a3a0cd5e61d6add731c637d627737ecd7 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Thu, 6 Jun 2024 17:46:56 +0300 Subject: [PATCH 06/12] Applying sad formatting --- src/filesystem/nodes/metadata/mime_type.rs | 72 +++++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index f1bc241..ddf5332 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -93,24 +93,64 @@ impl MimeGuesser { } [b'<', ..] => { match &magic_bytes[1..] - .iter() - .map(|&b| b.to_ascii_uppercase()) - .collect::>().as_slice() { - [b'!', b'D', b'O', b'C', b'T', b'Y', b'P', b'E', b' ', b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'H', b'E', b'A', b'D', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'S', b'C', b'R', b'I', b'P', b'T', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'I', b'F', b'R', b'A', b'M', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'H', b'1', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'D', b'I', b'V', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'F', b'O', b'N', b'T', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'T', b'A', b'B', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + .iter() + .map(|&b| b.to_ascii_uppercase()) + .collect::>() + .as_slice() + { + [b'!', b'D', b'O', b'C', b'T', b'Y', b'P', b'E', b' ', b'H', b'T', b'M', b'L', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } + [b'H', b'T', b'M', b'L', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'H', b'E', b'A', b'D', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'S', b'C', b'R', b'I', b'P', b'T', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } + [b'I', b'F', b'R', b'A', b'M', b'E', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } + [b'H', b'1', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'D', b'I', b'V', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'F', b'O', b'N', b'T', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'T', b'A', b'B', b'L', b'E', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } [b'A', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'S', b'T', b'Y', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'T', b'I', b'T', b'L', b'E', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'S', b'T', b'Y', b'L', b'E', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } + [b'T', b'I', b'T', b'L', b'E', tt, ..] + if is_whitespace_or_tag_terminator(*tt) => + { + Some(mime::TEXT_HTML) + } [b'B', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'B', b'O', b'D', b'Y', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), - [b'B', b'R', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), + [b'B', b'O', b'D', b'Y', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } + [b'B', b'R', tt, ..] if is_whitespace_or_tag_terminator(*tt) => { + Some(mime::TEXT_HTML) + } [b'P', tt, ..] if is_whitespace_or_tag_terminator(*tt) => Some(mime::TEXT_HTML), _ => None, } From 238d51b4a1178e18291d4d6fdc796da577a6e0a6 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 17:09:17 +0300 Subject: [PATCH 07/12] Addressed some comments --- src/filesystem/drive/directory_handle.rs | 132 ++++++++++++++++++++- src/filesystem/nodes/metadata/mime_type.rs | 118 ++++++++---------- src/filesystem/nodes/metadata/mod.rs | 28 ++++- src/filesystem/nodes/mod.rs | 6 +- 4 files changed, 206 insertions(+), 78 deletions(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index f396b49..be4bdc6 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -775,7 +775,6 @@ mod test { use super::*; use crate::filesystem::drive::inner::test::build_interesting_inner; use crate::prelude::MemoryDataStore; - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_cwd_specify_name() { @@ -967,7 +966,7 @@ mod test { } #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] - async fn write_file_with_html_tags() { + async fn sniff_html_mime_type() { let mut rng = crate::utils::crypto_rng(); let current_key = SigningKey::generate(&mut rng); let mut handle = interesting_handle(Some(current_key)).await; @@ -1012,4 +1011,133 @@ mod test { assert_eq!(mime_type, "text/html"); } } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_mp3_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp3_test_case: &[u8] = &[ + 0x49, 0x44, 0x33, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x54, 0x53, 0x53, 0x45, + 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x03, 0x4c, 0x61, 0x76, 0x66, 0x36, 0x30, 0x2e, + 0x33, 0x2e, 0x31, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xfb, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let file_name = "the_audio.mp4"; + handle + .write(&mut rng, &mut store, &[file_name], mp3_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp3_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "audio/mpeg"); + } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_mp4_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp4_test_case: &[u8] = &[ + 0x00, 0x00, 0x00, 0x1c, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d, 0x00, 0x00, + 0x02, 0x00, 0x69, 0x73, 0x6f, 0x6d, 0x69, 0x73, 0x6f, 0x32, 0x6d, 0x70, 0x34, 0x31, + 0x00, 0x00, 0x00, 0x08, + ]; + let file_name = "the_audio.mp3"; + handle + .write(&mut rng, &mut store, &[file_name], mp4_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp4_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "video/mp4"); + } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_webm_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp4_test_case: &[u8] = &[ + 0x1a, 0x45, 0xdf, 0xa3, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x42, 0x86, + 0x81, 0x01, 0x42, 0xf7, 0x81, 0x01, 0x42, 0xf2, 0x81, 0x04, 0x42, 0xf3, 0x81, 0x08, + 0x42, 0x82, 0x84, 0x77, 0x65, 0x62, 0x6d, 0x42, 0x87, 0x81, 0x02, 0x42, 0x85, 0x81, + 0x02, 0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0d, 0xc0, 0x0a, 0x11, + 0x4d, 0x9b, 0x74, 0x40, 0x3c, 0x4d, 0xbb, 0x8b, 0x53, 0xab, 0x84, 0x15, 0x49, 0xa9, + 0x66, 0x53, 0xac, 0x81, 0xe5, 0x4d, 0xbb, 0x8c, 0x53, 0xab, + ]; + let file_name = "the_audio.mp4"; + handle + .write(&mut rng, &mut store, &[file_name], mp4_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp4_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "video/webm"); + } } diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index ddf5332..a1fe5f1 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -7,18 +7,6 @@ pub struct MimeGuesser { } impl MimeGuesser { - const MP3_RATES: [u32; 15] = [ - 0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, - 256000, 320000, - ]; - - const MP25_RATES: [u32; 15] = [ - 0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, - 144000, 160000, - ]; - - const SAMPLE_RATES: [u32; 3] = [44100, 48000, 32000]; - pub fn with_name(mut self, name: NodeName) -> Self { match name { NodeName::Named(name) => self.name = Some(name.clone()), @@ -161,11 +149,12 @@ impl MimeGuesser { fn algorithm_match(&self) -> Option { if self.is_mp4() { - return Some(mime::AUDIO_MP4); + return Some(mime::VIDEO_MP4); } - if self.is_mp3() { - return Some(mime::AUDIO_MPEG); + if self.is_webm() { + return Some(mime::VIDEO_WEBM); } + None } @@ -178,75 +167,68 @@ impl MimeGuesser { if data.len() < box_size as usize || box_size % 4 != 0 { return false; } - if &data[4..8] != b"ftyp" { - return false; - } - if &data[8..11] == b"mp4" { - return true; - } - data[16..] - .chunks_exact(4) - .take_while(|chunk| &chunk[..3] != b"mp4") - .last() - .map_or(false, |chunk| &chunk[..3] == b"mp4") - } - fn is_mp3(&self) -> bool { + data.get(4..8) == Some(b"ftyp") + && (data.get(8..11) == Some(b"mp4") + || data[16..] + .chunks_exact(4) + .any(|chunk| chunk.starts_with(b"mp4"))) + } + fn is_webm(&self) -> bool { let data = &self.data; - let mut offset = 0; - - if !match_mp3_header(data, offset) { + if data.len() < 4 || data[..4] != [0x1A, 0x45, 0xDF, 0xA3] { return false; } - let (version, bitrate_index, sample_rate_index, pad) = parse_mp3_frame(data, offset); - let bitrate = if version & 0x01 != 0 { - Self::MP25_RATES[bitrate_index as usize] - } else { - Self::MP3_RATES[bitrate_index as usize] - }; - let sample_rate = Self::SAMPLE_RATES[sample_rate_index as usize]; - let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad); + let skip_first_bytes = 4; + let chunk_size = 2; + let magic_bytes_delim = [0x42, 0x82]; + for (chunk_idx, chunk) in data[skip_first_bytes..].chunks(chunk_size).enumerate() { + // went over 4 + 2 * 17 = 38 bytes + if chunk_idx >= 17 { + break; + } - if skipped_bytes < 4 || skipped_bytes > data.len() - offset { - return false; + if chunk != magic_bytes_delim { + continue; + } + + let offset = skip_first_bytes + chunk_idx * chunk_size + magic_bytes_delim.len(); + if let Some((_, number_size)) = data.get(offset..).map(|d| parse_vint(d, 0)) { + let start = offset + number_size; + let end = start + 4; + if data.get(start..end) == Some(b"webm") { + return true; + } + } } - offset += skipped_bytes; - match_mp3_header(data, offset) + false } } -fn match_mp3_header(sequence: &[u8], s: usize) -> bool { - let length = sequence.len(); - if length - s < 4 { - return false; +fn parse_vint(data: &[u8], offset: usize) -> (usize, usize) { + let mut mask = 128; + let max_vint_length = 8; + let mut number_size = 1; + + while number_size < max_vint_length + && data.get(offset).is_none() + && (data.get(offset).unwrap() & mask == 0) + { + mask >>= 1; + number_size += 1; } - sequence[s] == 0xff - && sequence[s + 1] & 0xe0 == 0xe0 - && (sequence[s + 1] & 0x06 >> 1) != 0 - && (sequence[s + 2] & 0xf0 >> 4) != 15 - && (sequence[s + 2] & 0x0c >> 2) != 3 - && (4 - (sequence[s + 1] & 0x06 >> 1)) == 3 -} - -fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) { - let version = sequence[s + 1] & 0x18 >> 3; - let bitrate_index = sequence[s + 2] & 0xf0 >> 4; - let sample_rate_index = sequence[s + 2] & 0x0c >> 2; - let pad = sequence[s + 2] & 0x02 >> 1; - (version, bitrate_index, sample_rate_index, pad) -} + let mut parsed_number = data.get(offset).map_or(0, |&b| (b & !mask) as usize); -fn compute_mp3_frame_size(version: u8, bitrate: u32, sample_rate: u32, pad: u8) -> usize { - let scale = if version == 1 { 72 } else { 144 }; - let mut size = bitrate * scale / sample_rate; - if pad != 0 { - size += 1; + for &b in data.get(offset + 1..offset + number_size).unwrap_or(&[]) { + parsed_number = (parsed_number << 8) | b as usize; } - size as usize + + (parsed_number, number_size) } + fn is_whitespace_or_tag_terminator(byte: u8) -> bool { byte == b' ' || byte == b'>' } diff --git a/src/filesystem/nodes/metadata/mod.rs b/src/filesystem/nodes/metadata/mod.rs index 07ad8bb..03a3db2 100644 --- a/src/filesystem/nodes/metadata/mod.rs +++ b/src/filesystem/nodes/metadata/mod.rs @@ -6,25 +6,37 @@ pub use mime_type::MimeGuesser; #[derive(Hash, Eq, PartialEq, Debug)] pub enum MetadataKey { MimeType, + Custom(String), } impl MetadataKey { - pub fn as_str(&self) -> &'static str { + pub fn as_str(&self) -> &str { match self { MetadataKey::MimeType => "mime", + MetadataKey::Custom(s) => s.as_str(), } } - pub fn as_bytes(&self) -> &'static [u8] { + pub fn as_bytes(&self) -> Vec { match self { - MetadataKey::MimeType => b"mime", + MetadataKey::MimeType => b"mime".to_vec(), + MetadataKey::Custom(s) => s.as_bytes().to_vec(), } } pub fn from_bytes(key: &[u8]) -> Option { match key { b"mime" => Some(MetadataKey::MimeType), - _ => None, + _ => { + if key.len() > 255 { + return None; + } + + match std::str::from_utf8(key) { + Ok(s) => Some(MetadataKey::Custom(s.to_string())), + Err(_) => None, + } + } } } } @@ -35,7 +47,13 @@ impl FromStr for MetadataKey { fn from_str(s: &str) -> Result { match s { "mime" => Ok(MetadataKey::MimeType), - _ => Err(winnow::error::ErrorKind::Token), + _ => { + if s.len() > 255 { + return Err(winnow::error::ErrorKind::Verify); + } + + Ok(MetadataKey::Custom(s.to_string())) + } } } } diff --git a/src/filesystem/nodes/mod.rs b/src/filesystem/nodes/mod.rs index 98fd983..fa68f89 100644 --- a/src/filesystem/nodes/mod.rs +++ b/src/filesystem/nodes/mod.rs @@ -196,7 +196,7 @@ impl Node { node_data.write_all(&[entry_count]).await?; let mut sorted_metadata = self.metadata.iter().collect::>(); - sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes())); + sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(&b.as_bytes())); for (key, val) in sorted_metadata.into_iter() { let key_bytes = key.as_bytes(); @@ -207,7 +207,7 @@ impl Node { } node_data.write_all(&[key_bytes_len as u8]).await?; - node_data.write_all(key_bytes).await?; + node_data.write_all(&key_bytes).await?; let val_bytes_len = val.len(); if val_bytes_len > u8::MAX as usize { @@ -296,7 +296,7 @@ impl Node { encoded_size += self .metadata() .iter() - .map(|(k, v)| (2 + k.as_str().len() + v.len()) as u64) + .map(|(k, v)| (2 + k.as_bytes().len() + v.len()) as u64) .sum::(); encoded_size From 8bed2a4ae7d87e030f998d2d692481dc21f37452 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 17:15:35 +0300 Subject: [PATCH 08/12] Adding banyan fork of mime --- Cargo.lock | 4 ++-- Cargo.toml | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94f8f47..7ab309c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1098,7 +1098,7 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime" version = "0.4.0-a.0" -source = "git+https://github.com/PlamenHristov/mime.git?rev=c30e3db#c30e3db1b4414036baaac50e0914355385f56ec1" +source = "git+https://github.com/banyancomputer/mime.git?rev=d1a1744#d1a1744cbe6b87e33a4258e6bef555efc99016dd" dependencies = [ "mime-parse", "quoted-string", @@ -1107,7 +1107,7 @@ dependencies = [ [[package]] name = "mime-parse" version = "0.0.0" -source = "git+https://github.com/PlamenHristov/mime.git?rev=c30e3db#c30e3db1b4414036baaac50e0914355385f56ec1" +source = "git+https://github.com/banyancomputer/mime.git?rev=d1a1744#d1a1744cbe6b87e33a4258e6bef555efc99016dd" [[package]] name = "mime_guess" diff --git a/Cargo.toml b/Cargo.toml index 021207d..061468e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,8 +61,7 @@ reqwest = { version = "^0.12", default-features = false, optional = true, featur serde = { version = "^1", features = ["derive"], optional = true } serde_json = { version = "^1", optional = true } url = { version = "^2", optional = true } -#mime = "0.3.17" -mime = {git = "https://github.com/PlamenHristov/mime.git", rev = "c30e3db"} +mime = {git = "https://github.com/banyancomputer/mime.git", rev = "d1a1744"} mime_guess = "2.0.4" From 7cc10dfd7ca22c9efc217759234d94b5ee6aa832 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 17:23:30 +0300 Subject: [PATCH 09/12] Fixed a small bug and added one expect --- src/filesystem/drive/directory_handle.rs | 1 + src/filesystem/nodes/metadata/mime_type.rs | 4 ++-- src/filesystem/nodes/mod.rs | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index be4bdc6..27682de 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -775,6 +775,7 @@ mod test { use super::*; use crate::filesystem::drive::inner::test::build_interesting_inner; use crate::prelude::MemoryDataStore; + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_cwd_specify_name() { diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index a1fe5f1..eee2a41 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -213,8 +213,8 @@ fn parse_vint(data: &[u8], offset: usize) -> (usize, usize) { let mut number_size = 1; while number_size < max_vint_length - && data.get(offset).is_none() - && (data.get(offset).unwrap() & mask == 0) + && data.get(offset).is_some() + && (data.get(offset).expect("already checked") & mask == 0) { mask >>= 1; number_size += 1; diff --git a/src/filesystem/nodes/mod.rs b/src/filesystem/nodes/mod.rs index fa68f89..ac98cc5 100644 --- a/src/filesystem/nodes/mod.rs +++ b/src/filesystem/nodes/mod.rs @@ -24,6 +24,7 @@ pub(crate) use node_builder::{NodeBuilder, NodeBuilderError}; pub(crate) use node_data::{NodeData, NodeDataError}; pub use node_name::{NodeName, NodeNameError}; +use std::str::FromStr; use std::collections::HashMap; use std::io::{Error as StdError, ErrorKind as StdErrorKind}; @@ -446,7 +447,10 @@ impl Node { self.metadata .get(&MetadataKey::MimeType) .and_then(|mime_str| { - mime::MediaType::parse(std::str::from_utf8(mime_str).unwrap()).ok() + match std::str::from_utf8(mime_str) { + Ok(s) => Some(mime::MediaType::from_str(s).ok()?), + Err(_) => None, + } }) } From ece7a2c9498fb46f88494219bf1c1708d44efc36 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 17:23:58 +0300 Subject: [PATCH 10/12] Reformat --- src/filesystem/nodes/mod.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/filesystem/nodes/mod.rs b/src/filesystem/nodes/mod.rs index ac98cc5..0576212 100644 --- a/src/filesystem/nodes/mod.rs +++ b/src/filesystem/nodes/mod.rs @@ -24,9 +24,9 @@ pub(crate) use node_builder::{NodeBuilder, NodeBuilderError}; pub(crate) use node_data::{NodeData, NodeDataError}; pub use node_name::{NodeName, NodeNameError}; -use std::str::FromStr; use std::collections::HashMap; use std::io::{Error as StdError, ErrorKind as StdErrorKind}; +use std::str::FromStr; use futures::{AsyncWrite, AsyncWriteExt}; use mime; @@ -446,11 +446,9 @@ impl Node { pub fn mime_type(&self) -> Option { self.metadata .get(&MetadataKey::MimeType) - .and_then(|mime_str| { - match std::str::from_utf8(mime_str) { - Ok(s) => Some(mime::MediaType::from_str(s).ok()?), - Err(_) => None, - } + .and_then(|mime_str| match std::str::from_utf8(mime_str) { + Ok(s) => Some(mime::MediaType::from_str(s).ok()?), + Err(_) => None, }) } From 192072e5b696a2e0bcd41c7b75760839769a7482 Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 17:25:51 +0300 Subject: [PATCH 11/12] Variable rename --- src/filesystem/drive/directory_handle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index 27682de..baffd3b 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -1105,7 +1105,7 @@ mod test { let current_key = SigningKey::generate(&mut rng); let mut handle = interesting_handle(Some(current_key)).await; let mut store = MemoryDataStore::default(); - let mp4_test_case: &[u8] = &[ + let webm_test_case: &[u8] = &[ 0x1a, 0x45, 0xdf, 0xa3, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x42, 0x86, 0x81, 0x01, 0x42, 0xf7, 0x81, 0x01, 0x42, 0xf2, 0x81, 0x04, 0x42, 0xf3, 0x81, 0x08, 0x42, 0x82, 0x84, 0x77, 0x65, 0x62, 0x6d, 0x42, 0x87, 0x81, 0x02, 0x42, 0x85, 0x81, @@ -1115,7 +1115,7 @@ mod test { ]; let file_name = "the_audio.mp4"; handle - .write(&mut rng, &mut store, &[file_name], mp4_test_case) + .write(&mut rng, &mut store, &[file_name], webm_test_case) .await .unwrap(); From 9a89cf3a17851a625a2bdc795c8d405c5ed4da0f Mon Sep 17 00:00:00 2001 From: Plamen Hristov Date: Fri, 7 Jun 2024 18:48:47 +0300 Subject: [PATCH 12/12] Fixed test issue. --- src/filesystem/drive/directory_handle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index baffd3b..e7003c0 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -1136,7 +1136,7 @@ mod test { assert_eq!(file_entry.kind(), NodeKind::File); let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); - assert_eq!(file_data.as_slice(), mp4_test_case); + assert_eq!(file_data.as_slice(), webm_test_case); let mime_type = file_entry.mime_type().unwrap(); assert_eq!(mime_type, "video/webm");