From 56d949905da963e2bb5ad16cbbb1e016e576d158 Mon Sep 17 00:00:00 2001 From: Sam Stelfox Date: Sun, 11 Feb 2024 18:35:18 -0500 Subject: [PATCH] feat: deeper in the attribute encoding and decoding on a filesystem, pretty close to that being complete --- src/codec/actor_id.rs | 27 +++++ src/codec/cid.rs | 6 + src/codec/crypto/fingerprint.rs | 29 ++++- src/codec/filesystem/attribute.rs | 172 ++++++++++++++++++++++++++++ src/codec/filesystem/mod.rs | 2 + src/codec/mod.rs | 2 + src/filesystem/content_reference.rs | 41 ++++++- src/filesystem/mod.rs | 2 - src/filesystem/nodes/file.rs | 55 ++++++++- 9 files changed, 327 insertions(+), 9 deletions(-) create mode 100644 src/codec/actor_id.rs create mode 100644 src/codec/filesystem/attribute.rs diff --git a/src/codec/actor_id.rs b/src/codec/actor_id.rs new file mode 100644 index 0000000..962ecd2 --- /dev/null +++ b/src/codec/actor_id.rs @@ -0,0 +1,27 @@ +use async_trait::async_trait; +use futures::AsyncWrite; + +use crate::codec::crypto::Fingerprint; +use crate::codec::AsyncEncodable; + +// todo(sstelfox) likely need a vector clock here... +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct ActorId(Fingerprint); + +impl ActorId { + pub fn parse(input: &[u8]) -> nom::IResult<&[u8], Self> { + let (remaining, fingerprint) = Fingerprint::parse(input)?; + Ok((remaining, ActorId(fingerprint))) + } +} + +#[async_trait] +impl AsyncEncodable for ActorId { + async fn encode( + &self, + writer: &mut W, + pos: usize, + ) -> std::io::Result { + self.0.encode(writer, pos).await + } +} diff --git a/src/codec/cid.rs b/src/codec/cid.rs index 7d27874..a339adc 100644 --- a/src/codec/cid.rs +++ b/src/codec/cid.rs @@ -23,6 +23,12 @@ impl Cid { } } +impl From<[u8; CID_LENGTH]> for Cid { + fn from(bytes: [u8; CID_LENGTH]) -> Self { + Self(bytes) + } +} + #[async_trait] impl AsyncEncodable for Cid { async fn encode( diff --git a/src/codec/crypto/fingerprint.rs b/src/codec/crypto/fingerprint.rs index 584c82f..54eb34f 100644 --- a/src/codec/crypto/fingerprint.rs +++ b/src/codec/crypto/fingerprint.rs @@ -1,15 +1,42 @@ +use async_trait::async_trait; +use futures::{AsyncWrite, AsyncWriteExt}; +use nom::bytes::streaming::take; + use crate::codec::crypto::{KeyId, VerifyingKey}; +use crate::codec::AsyncEncodable; const FINGERPRINT_SIZE: usize = 32; +#[derive(Clone, Copy, PartialEq)] pub struct Fingerprint([u8; FINGERPRINT_SIZE]); impl Fingerprint { - pub(crate) fn key_id(&self) -> KeyId { + pub fn key_id(&self) -> KeyId { let mut key_id = [0u8; 2]; key_id.copy_from_slice(&self.0[..2]); KeyId::from(u16::from_le_bytes(key_id)) } + + pub fn parse(input: &[u8]) -> nom::IResult<&[u8], Self> { + let (remaining, id_bytes) = take(FINGERPRINT_SIZE)(input)?; + + let mut bytes = [0u8; FINGERPRINT_SIZE]; + bytes.copy_from_slice(id_bytes); + + Ok((remaining, Self(bytes))) + } +} + +#[async_trait] +impl AsyncEncodable for Fingerprint { + async fn encode( + &self, + writer: &mut W, + pos: usize, + ) -> std::io::Result { + writer.write_all(&self.0).await?; + Ok(pos + self.0.len()) + } } impl std::fmt::Debug for Fingerprint { diff --git a/src/codec/filesystem/attribute.rs b/src/codec/filesystem/attribute.rs new file mode 100644 index 0000000..843ac3d --- /dev/null +++ b/src/codec/filesystem/attribute.rs @@ -0,0 +1,172 @@ +use async_trait::async_trait; +use futures::{AsyncWrite, AsyncWriteExt}; +use nom::bytes::streaming::take; +use nom::error::{Error as NomError, ErrorKind}; +use nom::number::streaming::{le_u64, le_u8}; +use nom::IResult; +use time::OffsetDateTime; + +use crate::codec::filesystem::Permissions; +use crate::codec::ActorId; +use crate::codec::AsyncEncodable; + +const ATTRIBUTE_CUSTOM_TYPE_ID: u8 = 0x00; + +const ATTRIBUTE_OWNER_TYPE_ID: u8 = 0x01; + +const ATTRIBUTE_PERMISSIONS_TYPE_ID: u8 = 0x02; + +const ATTRIBUTE_CREATED_AT_TYPE_ID: u8 = 0x03; + +const ATTRIBUTE_MODIFIED_AT_TYPE_ID: u8 = 0x04; + +const ATTRIBUTE_MIME_TYPE_TYPE_ID: u8 = 0x05; + +pub enum Attribute { + // Note: key and value both must encode to fewer than 255 bytes each + Custom { key: String, value: String }, + + Owner(ActorId), + Permissions(Permissions), + + CreatedAt(OffsetDateTime), + ModifiedAt(OffsetDateTime), + + MimeType(String), +} + +impl Attribute { + pub fn parse(input: &[u8]) -> IResult<&[u8], Self> { + let (remaining, type_byte) = le_u8(input)?; + + let parsed = match type_byte { + ATTRIBUTE_CUSTOM_TYPE_ID => { + let (remaining, (key_len, value_len)) = + nom::sequence::pair(le_u8, le_u8)(remaining)?; + + let (remaining, key_bytes) = take(key_len)(remaining)?; + let key = String::from_utf8(key_bytes.to_vec()) + .map_err(|_| nom::Err::Failure(NomError::new(input, ErrorKind::Verify)))?; + + let (remaining, value_bytes) = take(value_len)(remaining)?; + let value = String::from_utf8(value_bytes.to_vec()) + .map_err(|_| nom::Err::Failure(NomError::new(input, ErrorKind::Verify)))?; + + (remaining, Self::Custom { key, value }) + } + ATTRIBUTE_OWNER_TYPE_ID => { + let (remaining, actor_id) = ActorId::parse(remaining)?; + (remaining, Self::Owner(actor_id)) + } + ATTRIBUTE_PERMISSIONS_TYPE_ID => { + let (remaining, permissions) = Permissions::parse(remaining)?; + (remaining, Self::Permissions(permissions)) + } + ATTRIBUTE_CREATED_AT_TYPE_ID => { + let (remaining, unix_milliseconds) = le_u64(remaining)?; + + let unix_nanos = unix_milliseconds as i128 * 1_000_000; + let time = OffsetDateTime::from_unix_timestamp_nanos(unix_nanos) + .map_err(|_| nom::Err::Failure(NomError::new(input, ErrorKind::Verify)))?; + + (remaining, Self::CreatedAt(time)) + } + ATTRIBUTE_MODIFIED_AT_TYPE_ID => { + let (remaining, unix_milliseconds) = le_u64(remaining)?; + + let unix_nanos = unix_milliseconds as i128 * 1_000_000; + let time = OffsetDateTime::from_unix_timestamp_nanos(unix_nanos) + .map_err(|_| nom::Err::Failure(NomError::new(input, ErrorKind::Verify)))?; + + (remaining, Self::ModifiedAt(time)) + } + ATTRIBUTE_MIME_TYPE_TYPE_ID => { + let (remaining, mime_len) = le_u8(remaining)?; + + let (remaining, mime_bytes) = take(mime_len)(remaining)?; + let mime = String::from_utf8(mime_bytes.to_vec()) + .map_err(|_| nom::Err::Failure(NomError::new(input, ErrorKind::Verify)))?; + + (remaining, Self::MimeType(mime)) + } + _ => return Err(nom::Err::Failure(NomError::new(input, ErrorKind::Tag))), + }; + + Ok(parsed) + } +} + +#[async_trait] +impl AsyncEncodable for Attribute { + async fn encode( + &self, + writer: &mut W, + pos: usize, + ) -> std::io::Result { + match self { + Self::Custom { key, value } => { + let key_bytes = key.as_bytes(); + let key_len = key_bytes.len(); + + let value_bytes = value.as_bytes(); + let value_len = value_bytes.len(); + + if key_len > 255 || value_len > 255 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "attribute key or value longer than 255 bytes when encoded", + )); + } + + writer.write_all(&[ATTRIBUTE_CUSTOM_TYPE_ID]).await?; + writer.write_all(&[key_len as u8, value_len as u8]).await?; + writer.write_all(key_bytes).await?; + writer.write_all(value_bytes).await?; + + Ok(pos + 1 + 2 + key_len + value_len) + } + Self::Owner(actor_id) => { + writer.write_all(&[ATTRIBUTE_OWNER_TYPE_ID]).await?; + actor_id.encode(writer, pos + 1).await + } + Self::Permissions(permissions) => { + writer.write_all(&[ATTRIBUTE_PERMISSIONS_TYPE_ID]).await?; + permissions.encode(writer, pos + 1).await + } + Self::CreatedAt(time) => { + writer.write_all(&[ATTRIBUTE_CREATED_AT_TYPE_ID]).await?; + + let unix_milliseconds: u64 = (time.unix_timestamp_nanos() / 1_000_000) as u64; + let ts_bytes = unix_milliseconds.to_le_bytes(); + writer.write_all(&ts_bytes).await?; + + Ok(pos + 1 + 8) + } + Self::ModifiedAt(time) => { + writer.write_all(&[ATTRIBUTE_MODIFIED_AT_TYPE_ID]).await?; + + let unix_milliseconds: u64 = (time.unix_timestamp_nanos() / 1_000_000) as u64; + let ts_bytes = unix_milliseconds.to_le_bytes(); + writer.write_all(&ts_bytes).await?; + + Ok(pos + 1 + 8) + } + Self::MimeType(mime) => { + let mime_bytes = mime.as_bytes(); + let mime_len = mime_bytes.len(); + + if mime_len > 255 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "mime type longer than 255 bytes when encoded", + )); + } + + writer.write_all(&[ATTRIBUTE_MIME_TYPE_TYPE_ID]).await?; + writer.write_all(mime_bytes).await?; + + Ok(pos + 1 + mime_len) + } + } + } +} diff --git a/src/codec/filesystem/mod.rs b/src/codec/filesystem/mod.rs index f5ae64a..5763079 100644 --- a/src/codec/filesystem/mod.rs +++ b/src/codec/filesystem/mod.rs @@ -1,5 +1,7 @@ +mod attribute; mod node_type; mod permissions; +pub use attribute::Attribute; pub use node_type::NodeType; pub use permissions::Permissions; diff --git a/src/codec/mod.rs b/src/codec/mod.rs index 8af1eeb..06d8305 100644 --- a/src/codec/mod.rs +++ b/src/codec/mod.rs @@ -1,3 +1,4 @@ +mod actor_id; mod cid; pub mod content_payload; pub mod crypto; @@ -8,6 +9,7 @@ pub mod header; use async_trait::async_trait; use futures::AsyncWrite; +pub use actor_id::ActorId; pub use cid::Cid; pub use filesystem_id::FilesystemId; diff --git a/src/filesystem/content_reference.rs b/src/filesystem/content_reference.rs index 81529c3..be47c6c 100644 --- a/src/filesystem/content_reference.rs +++ b/src/filesystem/content_reference.rs @@ -1,5 +1,44 @@ -use crate::codec::Cid; +use async_trait::async_trait; +use futures::{AsyncWrite, AsyncWriteExt}; +use nom::number::streaming::le_u32; + +use crate::codec::{AsyncEncodable, Cid}; pub struct ContentReference { data_block_cid: Cid, + offset: u32, + length: u32, +} + +impl ContentReference { + pub fn parse(input: &[u8]) -> nom::IResult<&[u8], Self> { + let (remaining, data_block_cid) = Cid::parse(input)?; + + let (remaining, offset) = le_u32(remaining)?; + let (remaining, length) = le_u32(remaining)?; + + let content_reference = Self { + data_block_cid, + offset, + length, + }; + + Ok((remaining, content_reference)) + } +} + +#[async_trait] +impl AsyncEncodable for ContentReference { + async fn encode( + &self, + writer: &mut W, + pos: usize, + ) -> std::io::Result { + let pos = self.data_block_cid.encode(writer, pos).await?; + + writer.write_all(&self.offset.to_le_bytes()).await?; + writer.write_all(&self.length.to_le_bytes()).await?; + + Ok(pos + 8) + } } diff --git a/src/filesystem/mod.rs b/src/filesystem/mod.rs index 27d7838..d1cd6e3 100644 --- a/src/filesystem/mod.rs +++ b/src/filesystem/mod.rs @@ -7,8 +7,6 @@ pub use nodes::*; use crate::codec::crypto::SigningKey; use crate::codec::FilesystemId; -pub type ActorId = u16; - pub struct Drive { _filesystem_id: FilesystemId, _root: DriveDirectory, diff --git a/src/filesystem/nodes/file.rs b/src/filesystem/nodes/file.rs index 8091ff5..3aeece6 100644 --- a/src/filesystem/nodes/file.rs +++ b/src/filesystem/nodes/file.rs @@ -1,10 +1,11 @@ use std::collections::HashMap; +use nom::AsBytes; use time::OffsetDateTime; -use crate::codec::filesystem::Permissions; -use crate::codec::Cid; -use crate::filesystem::{ActorId, ContentReference}; +use crate::codec::filesystem::{Attribute, Permissions}; +use crate::codec::{ActorId, AsyncEncodable, Cid}; +use crate::filesystem::ContentReference; pub struct File { owner: ActorId, @@ -19,7 +20,51 @@ pub struct File { } impl File { - pub fn calculate_cid(&self) -> Cid { - todo!() + pub async fn calculate_cid(&self) -> Result { + let mut cid_content = Vec::new(); + + for content in self.content.iter() { + content + .encode(&mut cid_content, 0) + .await + .map_err(FileError::CidEncodingError)?; + } + + let mut attributes: Vec = Vec::new(); + + attributes.push(Attribute::Owner(self.owner)); + attributes.push(Attribute::Permissions(self.permissions)); + attributes.push(Attribute::CreatedAt(self.created_at)); + attributes.push(Attribute::ModifiedAt(self.modified_at)); + + for (key, value) in self.custom_metadata.iter() { + attributes.push(Attribute::Custom { + key: key.clone(), + value: value.clone(), + }); + } + + // Sort lexigraphically by the bytes strings as the RFC specifies + let mut attribute_bytes = Vec::new(); + for attribute in attributes.into_iter() { + let mut encoded_attributes = Vec::new(); + attribute.encode(&mut encoded_attributes, 0).await?; + attribute_bytes.push(encoded_attributes); + } + + attribute_bytes.sort_unstable(); + for attribute in attribute_bytes.into_iter() { + cid_content.extend(attribute); + } + + let hash: [u8; 32] = blake3::hash(cid_content.as_bytes()).into(); + + Ok(Cid::from(hash)) } } + +#[derive(Debug, thiserror::Error)] +pub enum FileError { + #[error("failed to generate cid content: {0}")] + CidEncodingError(#[from] std::io::Error), +}