From 7438d4af9f012119f5827d6fc718fdf4f8de08a1 Mon Sep 17 00:00:00 2001 From: Leonard Lesinski <84378319+Le0X8@users.noreply.github.com> Date: Fri, 12 Jul 2024 18:37:50 +0200 Subject: [PATCH] Added (very) basic ZIP parser --- Cargo.lock | 283 ++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/file.rs | 6 + src/formats/zip/parser.rs | 101 ++++++++++-- src/helpers.rs | 1 + src/helpers/datetime.rs | 1 + src/helpers/datetime/msdos.rs | 23 +++ src/lib.rs | 1 + src/types.rs | 31 +++- tests/samples/zip/001.zip | Bin 0 -> 238 bytes tests/zip.rs | 41 ++++- 11 files changed, 473 insertions(+), 16 deletions(-) create mode 100644 src/helpers.rs create mode 100644 src/helpers/datetime.rs create mode 100644 src/helpers/datetime/msdos.rs create mode 100644 tests/samples/zip/001.zip diff --git a/Cargo.lock b/Cargo.lock index 74bc631..b8e2648 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,286 @@ version = 3 [[package]] name = "acridotheres_core" version = "0.1.0" +dependencies = [ + "chrono", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907d8581360765417f8f2e0e7d602733bbed60156b4465b7617243689ef9b83d" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml index 13a633a..07c707e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ path = 'src/lib.rs' # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +chrono = "0.4.38" diff --git a/src/file.rs b/src/file.rs index 1b52668..347f15f 100644 --- a/src/file.rs +++ b/src/file.rs @@ -54,6 +54,12 @@ impl<'a> File<'a> { String::from_utf8(buf).unwrap() } + pub fn read_u8array(&mut self, len: u64) -> Vec { + let mut buf = vec![0; len as usize]; + self.read(&mut buf); + buf + } + pub fn read_u8(&mut self) -> u8 { let mut buf = [0; 1]; self.read(&mut buf); diff --git a/src/formats/zip/parser.rs b/src/formats/zip/parser.rs index 00b1303..0e4edb0 100644 --- a/src/formats/zip/parser.rs +++ b/src/formats/zip/parser.rs @@ -1,11 +1,92 @@ -use crate::{types::ArchiveMetadata, File}; - -pub fn metadata(path: &str) -> ArchiveMetadata { - let mut file = File::new(path); - let lfh_signature = file.read_u32le(); - let filecount = file.read_u128le(); - ArchiveMetadata { - lfh_matches: lfh_signature == 0x04034b50, // local file header 1 signature matches - file_count: filecount as u128, - } // NO! THIS IS NOT A FILE COUNT, THIS IS JUST A VALUE READING TEST +use crate::{ + helpers::datetime::msdos, + types::{ArchiveMetadata, FileEntry, ZipArchiveMetadata, ZipFileEntry}, + File, +}; + +pub fn metadata(file: &mut File) -> ZipArchiveMetadata { + let local_files = read_local_files(file); + + let signature = local_files.1; + + if signature == 0x02014b50 {} + + println!("0x{:x}", signature); + ZipArchiveMetadata { + archive: ArchiveMetadata { format: "zip" }, + files: local_files.0, + } +} + +pub fn get_file(file: &mut File, entry: &ZipFileEntry) -> Vec { + file.seek(entry.file.offset); + file.read_u8array(entry.uncompressed_size as u64) +} + +fn read_local_files(file: &mut File) -> (Vec, u32) { + let mut files: Vec = Vec::new(); + + let mut signature: u32 = file.read_u32le(); + while signature == 0x04034b50 { + let version = file.read_u16le(); + let bit_flag = file.read_u16le(); + let compression_method = match file.read_u16le() { + 0 => "stored", // The file is stored (no compression) + 1 => "shrunk", // The file is Shrunk + 2 => "reduced1", // The file is Reduced with compression factor 1 + 3 => "reduced2", // The file is Reduced with compression factor 2 + 4 => "reduced3", // The file is Reduced with compression factor 3 + 5 => "reduced4", // The file is Reduced with compression factor 4 + 6 => "imploded", // The file is Imploded + 7 => "tokenizing", // Reserved for Tokenizing compression algorithm + 8 => "deflated", // The file is Deflated + 9 => "deflated64", // Enhanced Deflating using Deflate64(tm) + 10 => "dcli", // PKWARE Data Compression Library Imploding (old IBM TERSE) + 11 => "reserved", // Reserved by PKWARE + 12 => "bzip2", // File is compressed using BZIP2 algorithm + 13 => "reserved2", // Reserved by PKWARE + 14 => "lzma", // LZMA + 15 => "reserved3", // Reserved by PKWARE + 16 => "cmpsc", // IBM z/OS CMPSC Compression + 17 => "reserved4", // Reserved by PKWARE + 18 => "terse", // IBM TERSE (new) + 19 => "lz77", // IBM LZ77 z Architecture (PFS) + 20 => "deprecated", // deprecated (use method 93 for zstd) + 93 => "zstd", // Zstandard + 94 => "mp3", // MP3 Compression + 95 => "xz", // XZ Compression + 96 => "jpeg", // JPEG variant + 97 => "wavpack", // WavPack compressed data + 98 => "ppmd", // PPMd version I, Rev 1 + 99 => "aes", // AE-x encryption (see APPENDIX E) + _ => "unknown", + }; + let lastmod_time = file.read_u16le(); + let lastmod_date = file.read_u16le(); + let crc32 = file.read_u32le(); + let size_compressed = file.read_u32le(); + let size_uncompressed = file.read_u32le(); + let name_length = file.read_u16le(); + let extra_length = file.read_u16le(); + let name = file.read_utf8(name_length as u64); + let extra = file.read_u8array(extra_length as u64); + files.push(ZipFileEntry { + file: FileEntry { + path: name, + offset: file.get_position(), + size: size_compressed as u64, + modified: msdos::parse(lastmod_date, lastmod_time), + }, + version, + bit_flag, + compression: compression_method, + uncompressed_size: size_uncompressed, + checksum: crc32, + extra_field: extra, + }); + file.jump(size_compressed as i128); + signature = file.read_u32le(); + }; + + (files, signature) } diff --git a/src/helpers.rs b/src/helpers.rs new file mode 100644 index 0000000..3ea3113 --- /dev/null +++ b/src/helpers.rs @@ -0,0 +1 @@ +pub mod datetime; \ No newline at end of file diff --git a/src/helpers/datetime.rs b/src/helpers/datetime.rs new file mode 100644 index 0000000..6a6fb3e --- /dev/null +++ b/src/helpers/datetime.rs @@ -0,0 +1 @@ +pub mod msdos; \ No newline at end of file diff --git a/src/helpers/datetime/msdos.rs b/src/helpers/datetime/msdos.rs new file mode 100644 index 0000000..e83a0f5 --- /dev/null +++ b/src/helpers/datetime/msdos.rs @@ -0,0 +1,23 @@ +use chrono::{DateTime, Utc}; + +// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime + +pub fn parse(date: u16, time: u16) -> DateTime { + let y = (date >> 9) + 1980; + let m = (date >> 5) & 0x0F; + let d = date & 0x1F; + + let h = time >> 11; + let min = (time >> 5) & 0x3F; + let s = (time & 0x1F) * 2; + + DateTime::parse_from_rfc3339( + format!( + "{:0>4}-{:0>2}-{:0>2}T{:0>2}:{:0>2}:{:0>2}Z", + y, m, d, h, min, s + ) + .as_str(), + ) + .unwrap_or_else(|_| DateTime::from_timestamp(0, 0).unwrap().into()) + .into() +} diff --git a/src/lib.rs b/src/lib.rs index b1ffbe8..4f06753 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod file; pub mod formats; +pub mod helpers; pub mod types; pub use file::File; diff --git a/src/types.rs b/src/types.rs index 7f17e6b..a8cc044 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,4 +1,31 @@ +use chrono::{DateTime, Utc}; + +#[derive(Debug)] pub struct ArchiveMetadata { - pub lfh_matches: bool, - pub file_count: u128, + pub format: &'static str, +} + +#[derive(Debug)] +pub struct ZipArchiveMetadata { + pub archive: ArchiveMetadata, + pub files: Vec, +} + +#[derive(Debug)] +pub struct FileEntry { + pub path: String, + pub offset: u64, + pub size: u64, + pub modified: DateTime, +} + +#[derive(Debug)] +pub struct ZipFileEntry { + pub file: FileEntry, + pub uncompressed_size: u32, + pub checksum: u32, + pub extra_field: Vec, + pub version: u16, + pub bit_flag: u16, + pub compression: &'static str, } diff --git a/tests/samples/zip/001.zip b/tests/samples/zip/001.zip new file mode 100644 index 0000000000000000000000000000000000000000..a2eca652258eb8e27bd39e5efbe69905e9348c98 GIT binary patch literal 238 zcmWIWW@Zs#0D+W=Zz3d?hgS0e*&xgT#3iZ4C3+H=FH T;LXYgl41hFTp*nd;xGUJs1q=S literal 0 HcmV?d00001 diff --git a/tests/zip.rs b/tests/zip.rs index 1c71176..f909bc3 100644 --- a/tests/zip.rs +++ b/tests/zip.rs @@ -1,8 +1,41 @@ -use corelib; +use corelib::{self, File}; #[test] fn metadata_000() { - let metadata = corelib::formats::zip::parser::metadata("tests/samples/zip/000.zip"); - assert_eq!(metadata.lfh_matches, true); - //assert_eq!(metadata.file_count, 1); + let mut file = File::new("tests/samples/zip/000.zip"); + + let metadata = corelib::formats::zip::parser::metadata(&mut file); + assert_eq!(metadata.files.len(), 1); + assert_eq!(metadata.files[0].file.path, "test.txt"); + assert_eq!(metadata.files[0].file.size, 14); + assert_eq!(metadata.files[0].compression, "stored"); + assert_eq!(metadata.files[0].uncompressed_size, 14); + assert_eq!(metadata.files[0].file.modified.to_rfc3339(), "2024-07-11T18:14:42+00:00"); + //println!("{:#?}", metadata); + + let test_txt = corelib::formats::zip::parser::get_file(&mut file, &metadata.files[0]); + assert_eq!(String::from_utf8(test_txt).unwrap(), "Hello, world!\n"); } + +#[test] +fn metadata_001() { + let mut file = File::new("tests/samples/zip/001.zip"); + + let metadata = corelib::formats::zip::parser::metadata(&mut file); + assert_eq!(metadata.files.len(), 2); + assert_eq!(metadata.files[0].file.path, "test.txt"); + assert_eq!(metadata.files[0].file.size, 14); + assert_eq!(metadata.files[0].compression, "stored"); + assert_eq!(metadata.files[0].uncompressed_size, 14); + assert_eq!(metadata.files[0].file.modified.to_rfc3339(), "2024-07-12T18:11:08+00:00"); + assert_eq!(metadata.files[1].file.path, "test2.txt"); + assert_eq!(metadata.files[1].file.size, 16); + assert_eq!(metadata.files[1].compression, "stored"); + assert_eq!(metadata.files[1].uncompressed_size, 16); + assert_eq!(metadata.files[1].file.modified.to_rfc3339(), "2024-07-12T18:11:26+00:00"); + + let test_txt = corelib::formats::zip::parser::get_file(&mut file, &metadata.files[0]); + assert_eq!(String::from_utf8(test_txt).unwrap(), "Hello, world!\n"); + let test2_txt = corelib::formats::zip::parser::get_file(&mut file, &metadata.files[1]); + assert_eq!(String::from_utf8(test2_txt).unwrap(), "Hello, world! 2\n"); +} \ No newline at end of file