diff --git a/Cargo.toml b/Cargo.toml index e669dc3..46707d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ tokio = { version = "1.9", features = [ "rt-multi-thread", "io-util", ] } +tokio-test = "0.4.4" [features] default = ["object_store", "reqwest"] diff --git a/python/Cargo.lock b/python/Cargo.lock index f8b4f0d..24e2562 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -50,9 +50,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.87" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d556ec1359574147ec0c4fc5eb525f3f23263a592b1a9c07e0a75b427de55c97" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", @@ -127,9 +127,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cc" -version = "1.2.16" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "shlex", ] @@ -387,7 +387,21 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", "wasm-bindgen", ] @@ -533,14 +547,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -595,9 +610,9 @@ dependencies = [ [[package]] name = "icu_locid_transform_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" [[package]] name = "icu_normalizer" @@ -619,9 +634,9 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" [[package]] name = "icu_properties" @@ -640,9 +655,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" [[package]] name = "icu_provider" @@ -770,9 +785,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.26" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "md-5" @@ -821,7 +836,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -886,7 +901,7 @@ dependencies = [ "parking_lot", "percent-encoding", "quick-xml", - "rand", + "rand 0.8.5", "reqwest", "ring", "rustls-pemfile", @@ -1116,9 +1131,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.37.2" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +checksum = "bf763ab1c7a3aa408be466efc86efe35ed1bd3dd74173ed39d6b0d0a6f0ba148" dependencies = [ "memchr", "serde", @@ -1126,11 +1141,12 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +checksum = "c3bd15a6f2967aef83887dcb9fec0014580467e33720d073560cf015a5683012" dependencies = [ "bytes", + "cfg_aliases", "pin-project-lite", "quinn-proto", "quinn-udp", @@ -1140,17 +1156,18 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "web-time", ] [[package]] name = "quinn-proto" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc" dependencies = [ "bytes", - "getrandom", - "rand", + "getrandom 0.3.2", + "rand 0.9.0", "ring", "rustc-hash", "rustls", @@ -1164,9 +1181,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" +checksum = "541d0f57c6ec747a90738a52741d3221f7960e8ac2f0ff4b1a63680e033b4ab5" dependencies = [ "cfg_aliases", "libc", @@ -1185,6 +1202,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" @@ -1192,8 +1215,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", + "zerocopy", ] [[package]] @@ -1203,7 +1237,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -1212,7 +1256,16 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.2", ] [[package]] @@ -1246,9 +1299,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e327e510263980e231de548a33e63d34962d29ae61b467389a1a09627a254" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ "base64", "bytes", @@ -1298,7 +1351,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom", + "getrandom 0.2.15", "libc", "untrusted", "windows-sys 0.52.0", @@ -1318,9 +1371,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustls" -version = "0.23.23" +version = "0.23.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" +checksum = "822ee9188ac4ec04a2f0531e55d035fb2de73f18b41a63c70c2712503b6fb13c" dependencies = [ "once_cell", "ring", @@ -1362,9 +1415,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "ring", "rustls-pki-types", @@ -1838,6 +1891,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.100" @@ -1968,9 +2030,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" [[package]] name = "windows-registry" @@ -1985,9 +2047,9 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" dependencies = [ "windows-link", ] @@ -2156,6 +2218,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "write16" version = "1.0.0" @@ -2194,18 +2265,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.23" +version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd97444d05a4328b90e75e503a34bad781f14e28a823ad3557f0750df1ebcbc6" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.23" +version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" dependencies = [ "proc-macro2", "quote", diff --git a/python/python/async_tiff/_tiff.pyi b/python/python/async_tiff/_tiff.pyi index 3e32f97..dcc0f67 100644 --- a/python/python/async_tiff/_tiff.pyi +++ b/python/python/async_tiff/_tiff.pyi @@ -16,14 +16,14 @@ class TIFF: path: str, *, store: ObjectStore | ObspecInput, - prefetch: int | None = 16384, + prefetch: int = 32768, ) -> TIFF: """Open a new TIFF. Args: path: The path within the store to read from. store: The backend to use for data fetching. - prefetch: The number of initial bytes to read up front. Defaults to 16384. + prefetch: The number of initial bytes to read up front. Returns: A TIFF instance. diff --git a/python/src/reader.rs b/python/src/reader.rs index 31c49b4..73580d4 100644 --- a/python/src/reader.rs +++ b/python/src/reader.rs @@ -115,15 +115,11 @@ struct ObspecReader { } impl AsyncFileReader for ObspecReader { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.backend.get_range_wrapper(&self.path, range).boxed() } - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.backend.get_range_wrapper(&self.path, range).boxed() - } - - fn get_image_byte_ranges( + fn get_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { diff --git a/python/src/tiff.rs b/python/src/tiff.rs index 0462a92..82f1351 100644 --- a/python/src/tiff.rs +++ b/python/src/tiff.rs @@ -1,6 +1,7 @@ use std::sync::Arc; -use async_tiff::reader::{AsyncFileReader, PrefetchReader}; +use async_tiff::metadata::{PrefetchBuffer, TiffMetadataReader}; +use async_tiff::reader::AsyncFileReader; use async_tiff::TIFF; use pyo3::exceptions::PyIndexError; use pyo3::prelude::*; @@ -20,26 +21,25 @@ pub(crate) struct PyTIFF { #[pymethods] impl PyTIFF { #[classmethod] - #[pyo3(signature = (path, *, store, prefetch=16384))] + #[pyo3(signature = (path, *, store, prefetch=32768))] fn open<'py>( _cls: &'py Bound, py: Python<'py>, path: String, store: StoreInput, - prefetch: Option, + prefetch: u64, ) -> PyResult> { let reader = store.into_async_file_reader(path); let cog_reader = future_into_py(py, async move { - let reader: Arc = if let Some(prefetch) = prefetch { - Arc::new(PrefetchReader::new(reader, prefetch).await.unwrap()) - } else { - reader - }; - Ok(PyTIFF { - tiff: TIFF::try_open(reader.clone()).await.unwrap(), - reader, - }) + let metadata_fetch = PrefetchBuffer::new(reader.clone(), prefetch).await.unwrap(); + let mut metadata_reader = TiffMetadataReader::try_open(&metadata_fetch).await.unwrap(); + let ifds = metadata_reader + .read_all_ifds(&metadata_fetch) + .await + .unwrap(); + let tiff = TIFF::new(ifds); + Ok(PyTIFF { tiff, reader }) })?; Ok(cog_reader) } diff --git a/src/cog.rs b/src/cog.rs index e9e3c5b..aee53a0 100644 --- a/src/cog.rs +++ b/src/cog.rs @@ -1,57 +1,19 @@ -use std::sync::Arc; - -use crate::error::AsyncTiffResult; -use crate::ifd::ImageFileDirectories; -use crate::reader::{AsyncCursor, AsyncFileReader}; -use crate::tiff::{TiffError, TiffFormatError}; +use crate::ifd::ImageFileDirectory; /// A TIFF file. #[derive(Debug, Clone)] pub struct TIFF { - ifds: ImageFileDirectories, + ifds: Vec, } impl TIFF { - /// Open a new TIFF file. - /// - /// This will read all the Image File Directories (IFDs) in the file. - pub async fn try_open(reader: Arc) -> AsyncTiffResult { - let mut cursor = AsyncCursor::try_open_tiff(reader).await?; - let version = cursor.read_u16().await?; - - let bigtiff = match version { - 42 => false, - 43 => { - // Read bytesize of offsets (in bigtiff it's alway 8 but provide a way to move to 16 some day) - if cursor.read_u16().await? != 8 { - return Err( - TiffError::FormatError(TiffFormatError::TiffSignatureNotFound).into(), - ); - } - // This constant should always be 0 - if cursor.read_u16().await? != 0 { - return Err( - TiffError::FormatError(TiffFormatError::TiffSignatureNotFound).into(), - ); - } - true - } - _ => return Err(TiffError::FormatError(TiffFormatError::TiffSignatureInvalid).into()), - }; - - let first_ifd_location = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u32().await?.into() - }; - - let ifds = ImageFileDirectories::open(&mut cursor, first_ifd_location, bigtiff).await?; - - Ok(Self { ifds }) + /// Create a new TIFF from existing IFDs. + pub fn new(ifds: Vec) -> Self { + Self { ifds } } /// Access the underlying Image File Directories. - pub fn ifds(&self) -> &ImageFileDirectories { + pub fn ifds(&self) -> &[ImageFileDirectory] { &self.ifds } } @@ -62,7 +24,8 @@ mod test { use std::sync::Arc; use crate::decoder::DecoderRegistry; - use crate::reader::ObjectReader; + use crate::metadata::{PrefetchBuffer, TiffMetadataReader}; + use crate::reader::{AsyncFileReader, ObjectReader}; use super::*; use object_store::local::LocalFileSystem; @@ -74,11 +37,20 @@ mod test { let folder = "/Users/kyle/github/developmentseed/async-tiff/"; let path = object_store::path::Path::parse("m_4007307_sw_18_060_20220803.tif").unwrap(); let store = Arc::new(LocalFileSystem::new_with_prefix(folder).unwrap()); - let reader = Arc::new(ObjectReader::new(store, path)); - - let cog_reader = TIFF::try_open(reader.clone()).await.unwrap(); + let reader = Arc::new(ObjectReader::new(store, path)) as Arc; + let prefetch_reader = PrefetchBuffer::new(reader.clone(), 32 * 1024) + .await + .unwrap(); + let mut metadata_reader = TiffMetadataReader::try_open(&prefetch_reader) + .await + .unwrap(); + let ifds = metadata_reader + .read_all_ifds(&prefetch_reader) + .await + .unwrap(); + let tiff = TIFF::new(ifds); - let ifd = &cog_reader.ifds.as_ref()[1]; + let ifd = &tiff.ifds[1]; let decoder_registry = DecoderRegistry::default(); let tile = ifd.fetch_tile(0, 0, reader.as_ref()).await.unwrap(); let tile = tile.decode(&decoder_registry).unwrap(); diff --git a/src/ifd.rs b/src/ifd.rs index 90f399f..ef91ed5 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -1,5 +1,4 @@ use std::collections::HashMap; -use std::io::Read; use std::ops::Range; use bytes::Bytes; @@ -7,53 +6,16 @@ use num_enum::TryFromPrimitive; use crate::error::{AsyncTiffError, AsyncTiffResult}; use crate::geo::{GeoKeyDirectory, GeoKeyTag}; -use crate::reader::{AsyncCursor, AsyncFileReader}; +use crate::reader::AsyncFileReader; use crate::tiff::tags::{ CompressionMethod, PhotometricInterpretation, PlanarConfiguration, Predictor, ResolutionUnit, - SampleFormat, Tag, Type, + SampleFormat, Tag, }; use crate::tiff::{TiffError, Value}; use crate::tile::Tile; const DOCUMENT_NAME: u16 = 269; -/// A collection of all the IFD -// TODO: maybe separate out the primary/first image IFD out of the vec, as that one should have -// geospatial metadata? -#[derive(Debug, Clone)] -pub struct ImageFileDirectories { - /// There's always at least one IFD in a TIFF. We store this separately - ifds: Vec, - // Is it guaranteed that if masks exist that there will be one per image IFD? Or could there be - // different numbers of image ifds and mask ifds? - // mask_ifds: Option>, -} - -impl AsRef<[ImageFileDirectory]> for ImageFileDirectories { - fn as_ref(&self) -> &[ImageFileDirectory] { - &self.ifds - } -} - -impl ImageFileDirectories { - pub(crate) async fn open( - cursor: &mut AsyncCursor, - ifd_offset: u64, - bigtiff: bool, - ) -> AsyncTiffResult { - let mut next_ifd_offset = Some(ifd_offset); - - let mut ifds = vec![]; - while let Some(offset) = next_ifd_offset { - let ifd = ImageFileDirectory::read(cursor, offset, bigtiff).await?; - next_ifd_offset = ifd.next_ifd_offset(); - ifds.push(ifd); - } - - Ok(Self { ifds }) - } -} - /// An ImageFileDirectory representing Image content // The ordering of these tags matches the sorted order in TIFF spec Appendix A #[allow(dead_code)] @@ -177,69 +139,11 @@ pub struct ImageFileDirectory { // no_data // gdal_metadata pub(crate) other_tags: HashMap, - - pub(crate) next_ifd_offset: Option, } impl ImageFileDirectory { - /// Read and parse the IFD starting at the given file offset - async fn read( - cursor: &mut AsyncCursor, - ifd_start: u64, - bigtiff: bool, - ) -> AsyncTiffResult { - cursor.seek(ifd_start); - - let tag_count = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u16().await?.into() - }; - let mut tags = HashMap::with_capacity(tag_count as usize); - for _ in 0..tag_count { - let (tag_name, tag_value) = read_tag(cursor, bigtiff).await?; - tags.insert(tag_name, tag_value); - } - - // Tag 2 bytes - // Type 2 bytes - // Count: - // - bigtiff: 8 bytes - // - else: 4 bytes - // Value: - // - bigtiff: 8 bytes either a pointer the value itself - // - else: 4 bytes either a pointer the value itself - let ifd_entry_byte_size = if bigtiff { 20 } else { 12 }; - // The size of `tag_count` that we read above - let tag_count_byte_size = if bigtiff { 8 } else { 2 }; - - // Reset the cursor position before reading the next ifd offset - cursor.seek(ifd_start + (ifd_entry_byte_size * tag_count) + tag_count_byte_size); - - let next_ifd_offset = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u32().await?.into() - }; - - // If the ifd_offset is 0, stop - let next_ifd_offset = if next_ifd_offset == 0 { - None - } else { - Some(next_ifd_offset) - }; - - Self::from_tags(tags, next_ifd_offset) - } - - fn next_ifd_offset(&self) -> Option { - self.next_ifd_offset - } - - fn from_tags( - mut tag_data: HashMap, - next_ifd_offset: Option, - ) -> AsyncTiffResult { + /// Create a new ImageFileDirectory from tag data + pub fn from_tags(tag_data: HashMap) -> AsyncTiffResult { let mut new_subfile_type = None; let mut image_width = None; let mut image_height = None; @@ -281,7 +185,10 @@ impl ImageFileDirectory { let mut other_tags = HashMap::new(); - tag_data.drain().try_for_each(|(tag, value)| { + // for x in tag_data.into_iter() { + + // } + tag_data.into_iter().try_for_each(|(tag, value)| { match tag { Tag::NewSubfileType => new_subfile_type = Some(value.into_u32()?), Tag::ImageWidth => image_width = Some(value.into_u32()?), @@ -485,7 +392,6 @@ impl ImageFileDirectory { model_pixel_scale, model_tiepoint, other_tags, - next_ifd_offset, }) } @@ -779,7 +685,7 @@ impl ImageFileDirectory { let range = self .get_tile_byte_range(x, y) .ok_or(AsyncTiffError::General("Not a tiled TIFF".to_string()))?; - let compressed_bytes = reader.get_image_bytes(range).await?; + let compressed_bytes = reader.get_bytes(range).await?; Ok(Tile { x, y, @@ -809,8 +715,8 @@ impl ImageFileDirectory { }) .collect::>>()?; - // 2: Fetch using `get_image_byte_ranges` - let buffers = reader.get_image_byte_ranges(byte_ranges).await?; + // 2: Fetch using `get_byte_ranges` + let buffers = reader.get_byte_ranges(byte_ranges).await?; // 3: Create tile objects let mut tiles = vec![]; @@ -836,370 +742,3 @@ impl ImageFileDirectory { Some((x_count as usize, y_count as usize)) } } - -/// Read a single tag from the cursor -async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> { - let start_cursor_position = cursor.position(); - - let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?); - - let tag_type_code = cursor.read_u16().await?; - let tag_type = Type::from_u16(tag_type_code).expect( - "Unknown tag type {tag_type_code}. TODO: we should skip entries with unknown tag types.", - ); - let count = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u32().await?.into() - }; - - let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?; - - // TODO: better handle management of cursor state - let ifd_entry_size = if bigtiff { 20 } else { 12 }; - cursor.seek(start_cursor_position + ifd_entry_size); - - Ok((tag_name, tag_value)) -} - -/// Read a tag's value from the cursor -/// -/// NOTE: this does not maintain cursor state -// This is derived from the upstream tiff crate: -// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639 -async fn read_tag_value( - cursor: &mut AsyncCursor, - tag_type: Type, - count: u64, - bigtiff: bool, -) -> AsyncTiffResult { - // Case 1: there are no values so we can return immediately. - if count == 0 { - return Ok(Value::List(vec![])); - } - - let tag_size = match tag_type { - Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, - Type::SHORT | Type::SSHORT => 2, - Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, - Type::LONG8 - | Type::SLONG8 - | Type::DOUBLE - | Type::RATIONAL - | Type::SRATIONAL - | Type::IFD8 => 8, - }; - - let value_byte_length = count.checked_mul(tag_size).unwrap(); - - // Case 2: there is one value. - if count == 1 { - // 2a: the value is 5-8 bytes and we're in BigTiff mode. - if bigtiff && value_byte_length > 4 && value_byte_length <= 8 { - let mut data = cursor.read(value_byte_length).await?; - - return Ok(match tag_type { - Type::LONG8 => Value::UnsignedBig(data.read_u64()?), - Type::SLONG8 => Value::SignedBig(data.read_i64()?), - Type::DOUBLE => Value::Double(data.read_f64()?), - Type::RATIONAL => Value::Rational(data.read_u32()?, data.read_u32()?), - Type::SRATIONAL => Value::SRational(data.read_i32()?, data.read_i32()?), - Type::IFD8 => Value::IfdBig(data.read_u64()?), - Type::BYTE - | Type::SBYTE - | Type::ASCII - | Type::UNDEFINED - | Type::SHORT - | Type::SSHORT - | Type::LONG - | Type::SLONG - | Type::FLOAT - | Type::IFD => unreachable!(), - }); - } - - // NOTE: we should only be reading value_byte_length when it's 4 bytes or fewer. Right now - // we're reading even if it's 8 bytes, but then only using the first 4 bytes of this - // buffer. - let mut data = cursor.read(value_byte_length).await?; - - // 2b: the value is at most 4 bytes or doesn't fit in the offset field. - return Ok(match tag_type { - Type::BYTE | Type::UNDEFINED => Value::Byte(data.read_u8()?), - Type::SBYTE => Value::Signed(data.read_i8()? as i32), - Type::SHORT => Value::Short(data.read_u16()?), - Type::SSHORT => Value::Signed(data.read_i16()? as i32), - Type::LONG => Value::Unsigned(data.read_u32()?), - Type::SLONG => Value::Signed(data.read_i32()?), - Type::FLOAT => Value::Float(data.read_f32()?), - Type::ASCII => { - if data.as_ref()[0] == 0 { - Value::Ascii("".to_string()) - } else { - panic!("Invalid tag"); - // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); - } - } - Type::LONG8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::UnsignedBig(cursor.read_u64().await?) - } - Type::SLONG8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::SignedBig(cursor.read_i64().await?) - } - Type::DOUBLE => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::Double(cursor.read_f64().await?) - } - Type::RATIONAL => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - let numerator = cursor.read_u32().await?; - let denominator = cursor.read_u32().await?; - Value::Rational(numerator, denominator) - } - Type::SRATIONAL => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - let numerator = cursor.read_i32().await?; - let denominator = cursor.read_i32().await?; - Value::SRational(numerator, denominator) - } - Type::IFD => Value::Ifd(data.read_u32()?), - Type::IFD8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::IfdBig(cursor.read_u64().await?) - } - }); - } - - // Case 3: There is more than one value, but it fits in the offset field. - if value_byte_length <= 4 || bigtiff && value_byte_length <= 8 { - let mut data = cursor.read(value_byte_length).await?; - if bigtiff { - cursor.advance(8 - value_byte_length); - } else { - cursor.advance(4 - value_byte_length); - } - - match tag_type { - Type::BYTE | Type::UNDEFINED => { - return { - Ok(Value::List( - (0..count) - .map(|_| Value::Byte(data.read_u8().unwrap())) - .collect(), - )) - }; - } - Type::SBYTE => { - return { - Ok(Value::List( - (0..count) - .map(|_| Value::Signed(data.read_i8().unwrap() as i32)) - .collect(), - )) - } - } - Type::ASCII => { - let mut buf = vec![0; count as usize]; - data.read_exact(&mut buf)?; - if buf.is_ascii() && buf.ends_with(&[0]) { - let v = std::str::from_utf8(&buf) - .map_err(|err| AsyncTiffError::General(err.to_string()))?; - let v = v.trim_matches(char::from(0)); - return Ok(Value::Ascii(v.into())); - } else { - panic!("Invalid tag"); - // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); - } - } - Type::SHORT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Short(data.read_u16()?)); - } - return Ok(Value::List(v)); - } - Type::SSHORT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Signed(i32::from(data.read_i16()?))); - } - return Ok(Value::List(v)); - } - Type::LONG => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Unsigned(data.read_u32()?)); - } - return Ok(Value::List(v)); - } - Type::SLONG => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Signed(data.read_i32()?)); - } - return Ok(Value::List(v)); - } - Type::FLOAT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Float(data.read_f32()?)); - } - return Ok(Value::List(v)); - } - Type::IFD => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Ifd(data.read_u32()?)); - } - return Ok(Value::List(v)); - } - Type::LONG8 - | Type::SLONG8 - | Type::RATIONAL - | Type::SRATIONAL - | Type::DOUBLE - | Type::IFD8 => { - unreachable!() - } - } - } - - // Seek cursor - let offset = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u32().await?.into() - }; - cursor.seek(offset); - - // Case 4: there is more than one value, and it doesn't fit in the offset field. - match tag_type { - // TODO check if this could give wrong results - // at a different endianess of file/computer. - Type::BYTE | Type::UNDEFINED => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Byte(cursor.read_u8().await?)) - } - Ok(Value::List(v)) - } - Type::SBYTE => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Signed(cursor.read_i8().await? as i32)) - } - Ok(Value::List(v)) - } - Type::SHORT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Short(cursor.read_u16().await?)) - } - Ok(Value::List(v)) - } - Type::SSHORT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Signed(cursor.read_i16().await? as i32)) - } - Ok(Value::List(v)) - } - Type::LONG => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Unsigned(cursor.read_u32().await?)) - } - Ok(Value::List(v)) - } - Type::SLONG => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Signed(cursor.read_i32().await?)) - } - Ok(Value::List(v)) - } - Type::FLOAT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Float(cursor.read_f32().await?)) - } - Ok(Value::List(v)) - } - Type::DOUBLE => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Double(cursor.read_f64().await?)) - } - Ok(Value::List(v)) - } - Type::RATIONAL => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Rational( - cursor.read_u32().await?, - cursor.read_u32().await?, - )) - } - Ok(Value::List(v)) - } - Type::SRATIONAL => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::SRational( - cursor.read_i32().await?, - cursor.read_i32().await?, - )) - } - Ok(Value::List(v)) - } - Type::LONG8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::UnsignedBig(cursor.read_u64().await?)) - } - Ok(Value::List(v)) - } - Type::SLONG8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::SignedBig(cursor.read_i64().await?)) - } - Ok(Value::List(v)) - } - Type::IFD => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Ifd(cursor.read_u32().await?)) - } - Ok(Value::List(v)) - } - Type::IFD8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::IfdBig(cursor.read_u64().await?)) - } - Ok(Value::List(v)) - } - Type::ASCII => { - let mut out = vec![0; count as _]; - let mut buf = cursor.read(count).await?; - buf.read_exact(&mut out)?; - - // Strings may be null-terminated, so we trim anything downstream of the null byte - if let Some(first) = out.iter().position(|&b| b == 0) { - out.truncate(first); - } - Ok(Value::Ascii( - String::from_utf8(out).map_err(|err| AsyncTiffError::General(err.to_string()))?, - )) - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 6c3445b..76c94bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,10 @@ pub mod decoder; pub mod error; pub mod geo; mod ifd; +pub mod metadata; pub mod tiff; mod tile; pub use cog::TIFF; -pub use ifd::{ImageFileDirectories, ImageFileDirectory}; +pub use ifd::ImageFileDirectory; pub use tile::Tile; diff --git a/src/metadata/fetch.rs b/src/metadata/fetch.rs new file mode 100644 index 0000000..126b3ae --- /dev/null +++ b/src/metadata/fetch.rs @@ -0,0 +1,155 @@ +use std::ops::Range; + +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::FutureExt; + +use crate::error::AsyncTiffResult; +use crate::reader::{AsyncFileReader, EndianAwareReader, Endianness}; + +/// A data source that can be used with [`TiffMetadataReader`] and [`ImageFileDirectoryReader`] to +/// load [`ImageFileDirectory`]s. +/// +/// Note that implementation is provided for [`AsyncFileReader`]. +pub trait MetadataFetch { + /// Return a future that fetches the specified range of bytes asynchronously + /// + /// Note the returned type is a boxed future, often created by + /// [futures::FutureExt::boxed]. See the trait documentation for an example. + fn fetch(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; +} + +impl MetadataFetch for T { + fn fetch(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.get_bytes(range) + } +} + +/// Buffering for the first `N` bytes of a file. +/// +/// This is designed so that the async requests made by the underlying tag reader get intercepted +/// here and served from the existing buffer when possible. +#[derive(Debug)] +pub struct PrefetchBuffer { + fetch: F, + buffer: Bytes, +} + +impl PrefetchBuffer { + /// Construct a new PrefetchBuffer, catching the first `prefetch` bytes of the file. + pub async fn new(fetch: F, prefetch: u64) -> AsyncTiffResult { + let buffer = fetch.fetch(0..prefetch).await?; + Ok(Self { fetch, buffer }) + } +} + +impl MetadataFetch for PrefetchBuffer { + fn fetch(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + if range.start < self.buffer.len() as _ { + if range.end < self.buffer.len() as _ { + let usize_range = range.start as usize..range.end as usize; + let result = self.buffer.slice(usize_range); + async { Ok(result) }.boxed() + } else { + // TODO: reuse partial internal buffer + self.fetch.fetch(range) + } + } else { + self.fetch.fetch(range) + } + } +} + +pub(crate) struct MetadataCursor<'a, F: MetadataFetch> { + fetch: &'a F, + offset: u64, + endianness: Endianness, +} + +impl<'a, F: MetadataFetch> MetadataCursor<'a, F> { + pub fn new(fetch: &'a F, endianness: Endianness) -> Self { + Self { + fetch, + offset: 0, + endianness, + } + } + + pub fn new_with_offset(fetch: &'a F, endianness: Endianness, offset: u64) -> Self { + Self { + fetch, + offset, + endianness, + } + } + + pub fn with_offset(mut self, offset: u64) -> Self { + self.offset = offset; + self + } + + pub fn seek(&mut self, offset: u64) { + self.offset = offset; + } + + /// Advance cursor position by a set amount + pub(crate) fn advance(&mut self, amount: u64) { + self.offset += amount; + } + + /// Read the given number of bytes, advancing the internal cursor state by the same amount. + pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult { + let range = self.offset as _..(self.offset + length) as _; + self.offset += length; + let bytes = self.fetch.fetch(range).await?; + Ok(EndianAwareReader::new(bytes, self.endianness)) + } + + /// Read a u8 from the cursor, advancing the internal state by 1 byte. + pub(crate) async fn read_u8(&mut self) -> AsyncTiffResult { + self.read(1).await?.read_u8() + } + + /// Read a i8 from the cursor, advancing the internal state by 1 byte. + pub(crate) async fn read_i8(&mut self) -> AsyncTiffResult { + self.read(1).await?.read_i8() + } + + /// Read a u16 from the cursor, advancing the internal state by 2 bytes. + pub(crate) async fn read_u16(&mut self) -> AsyncTiffResult { + self.read(2).await?.read_u16() + } + + /// Read a i16 from the cursor, advancing the internal state by 2 bytes. + pub(crate) async fn read_i16(&mut self) -> AsyncTiffResult { + self.read(2).await?.read_i16() + } + + /// Read a u32 from the cursor, advancing the internal state by 4 bytes. + pub(crate) async fn read_u32(&mut self) -> AsyncTiffResult { + self.read(4).await?.read_u32() + } + + /// Read a i32 from the cursor, advancing the internal state by 4 bytes. + pub(crate) async fn read_i32(&mut self) -> AsyncTiffResult { + self.read(4).await?.read_i32() + } + + /// Read a u64 from the cursor, advancing the internal state by 8 bytes. + pub(crate) async fn read_u64(&mut self) -> AsyncTiffResult { + self.read(8).await?.read_u64() + } + + /// Read a i64 from the cursor, advancing the internal state by 8 bytes. + pub(crate) async fn read_i64(&mut self) -> AsyncTiffResult { + self.read(8).await?.read_i64() + } + + pub(crate) async fn read_f32(&mut self) -> AsyncTiffResult { + self.read(4).await?.read_f32() + } + + pub(crate) async fn read_f64(&mut self) -> AsyncTiffResult { + self.read(8).await?.read_f64() + } +} diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs new file mode 100644 index 0000000..3592014 --- /dev/null +++ b/src/metadata/mod.rs @@ -0,0 +1,65 @@ +//! API for reading metadata out of a TIFF file. +//! +//! ### Reading all TIFF metadata +//! +//! We can use [`TiffMetadataReader::read_all_ifds`] to read all IFDs up front: +//! +//! ``` +//! # tokio_test::block_on(async { +//! use std::env::current_dir; +//! use std::sync::Arc; +//! +//! use object_store::local::LocalFileSystem; +//! +//! use async_tiff::metadata::{PrefetchBuffer, TiffMetadataReader}; +//! use async_tiff::reader::ObjectReader; +//! +//! // Create new Arc +//! let store = Arc::new(LocalFileSystem::new_with_prefix(current_dir().unwrap()).unwrap()); +//! +//! // Create new ObjectReader to map the ObjectStore to the AsyncFileReader trait +//! let reader = ObjectReader::new( +//! store, +//! "tests/image_tiff/images/tiled-jpeg-rgb-u8.tif".into(), +//! ); +//! +//! // Use PrefetchBuffer to ensure that a given number of bytes at the start of the +//! // file are prefetched. +//! // +//! // This or a similar caching layer should **always** be used and ensures that the +//! // underlying read calls that the TiffMetadataReader makes don't translate to actual +//! // network fetches. +//! let prefetch_reader = PrefetchBuffer::new(reader.clone(), 32 * 1024) +//! .await +//! .unwrap(); +//! +//! // Create a TiffMetadataReader wrapping some MetadataFetch +//! let mut metadata_reader = TiffMetadataReader::try_open(&prefetch_reader) +//! .await +//! .unwrap(); +//! +//! // Read all IFDs out of the source. +//! let ifds = metadata_reader +//! .read_all_ifds(&prefetch_reader) +//! .await +//! .unwrap(); +//! # }) +//! ``` +//! +//! +//! ### Caching/prefetching/buffering +//! +//! The underlying [`ImageFileDirectoryReader`] used to read tags out of the TIFF file reads each +//! tag individually. This means that it will make many small byte range requests to the +//! [`MetadataFetch`] implementation. +//! +//! Thus, it is **imperative to always supply some sort of caching, prefetching, or buffering** +//! middleware when reading metadata. [`PrefetchBuffer`] is an example of this, which +//! fetches the first `N` bytes out of a file. +//! + +mod fetch; +mod reader; + +pub use fetch::{MetadataFetch, PrefetchBuffer}; +pub use reader::{ImageFileDirectoryReader, TiffMetadataReader}; diff --git a/src/metadata/reader.rs b/src/metadata/reader.rs new file mode 100644 index 0000000..cc02322 --- /dev/null +++ b/src/metadata/reader.rs @@ -0,0 +1,622 @@ +use std::collections::HashMap; +use std::io::Read; + +use bytes::Bytes; + +use crate::error::{AsyncTiffError, AsyncTiffResult}; +use crate::metadata::fetch::MetadataCursor; +use crate::metadata::MetadataFetch; +use crate::reader::Endianness; +use crate::tiff::tags::{Tag, Type}; +use crate::tiff::{TiffError, TiffFormatError, Value}; +use crate::ImageFileDirectory; + +/// Entry point to reading TIFF metadata. +/// +/// This is a stateful reader because we don't know how many IFDs will be encountered. +/// +/// ```notest +/// // fetch implements MetadataFetch +/// let mut metadata_reader = TiffMetadataReader::try_open(&fetch).await?; +/// let ifds = metadata_reader.read_all_ifds(&fetch).await?; +/// ``` +pub struct TiffMetadataReader { + endianness: Endianness, + bigtiff: bool, + next_ifd_offset: Option, +} + +impl TiffMetadataReader { + /// Open a new TIFF file, validating the magic bytes, reading the endianness, and checking for + /// the bigtiff flag. + /// + /// This does not read any IFD metadata. + pub async fn try_open(fetch: &F) -> AsyncTiffResult { + let magic_bytes = fetch.fetch(0..2).await?; + + // Should be b"II" for little endian or b"MM" for big endian + let endianness = if magic_bytes == Bytes::from_static(b"II") { + Endianness::LittleEndian + } else if magic_bytes == Bytes::from_static(b"MM") { + Endianness::BigEndian + } else { + return Err(AsyncTiffError::General(format!( + "unexpected magic bytes {magic_bytes:?}" + ))); + }; + + // Set offset to 2 since we've already read magic bytes. + let mut cursor = MetadataCursor::new(fetch, endianness).with_offset(2); + + let version = cursor.read_u16().await?; + let bigtiff = match version { + 42 => false, + 43 => { + // Read bytesize of offsets (in bigtiff it's alway 8 but provide a way to move to 16 some day) + if cursor.read_u16().await? != 8 { + return Err( + TiffError::FormatError(TiffFormatError::TiffSignatureNotFound).into(), + ); + } + // This constant should always be 0 + if cursor.read_u16().await? != 0 { + return Err( + TiffError::FormatError(TiffFormatError::TiffSignatureNotFound).into(), + ); + } + true + } + _ => return Err(TiffError::FormatError(TiffFormatError::TiffSignatureInvalid).into()), + }; + + let first_ifd_location = if bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u32().await?.into() + }; + + Ok(Self { + endianness, + bigtiff, + next_ifd_offset: Some(first_ifd_location), + }) + } + + /// Returns the endianness of the file. + pub fn endianness(&self) -> Endianness { + self.endianness + } + + /// Returns `true` if this is a bigtiff file. + pub fn bigtiff(&self) -> bool { + self.bigtiff + } + + /// Returns `true` if there are more IFDs to read. + pub fn has_next_ifd(&self) -> bool { + self.next_ifd_offset.is_some() + } + + /// The byte offset of the start of the next IFD. + /// + /// This will be `None` if all IFDs have already been read. + pub fn next_ifd_offset(&self) -> Option { + self.next_ifd_offset + } + + /// Read the next IFD from the file. + /// + /// If there are no more IFDs, returns `None`. + pub async fn read_next_ifd( + &mut self, + fetch: &F, + ) -> AsyncTiffResult> { + if let Some(ifd_start) = self.next_ifd_offset { + let ifd_reader = + ImageFileDirectoryReader::open(fetch, ifd_start, self.bigtiff, self.endianness) + .await?; + let ifd = ifd_reader.read(fetch).await?; + let next_ifd_offset = ifd_reader.finish(fetch).await?; + self.next_ifd_offset = next_ifd_offset; + Ok(Some(ifd)) + } else { + Ok(None) + } + } + + /// Read all IFDs from the file. + pub async fn read_all_ifds( + &mut self, + fetch: &F, + ) -> AsyncTiffResult> { + let mut ifds = vec![]; + while let Some(ifd) = self.read_next_ifd(fetch).await? { + ifds.push(ifd); + } + Ok(ifds) + } +} + +/// Reads the [`ImageFileDirectory`] metadata. +/// +/// TIFF metadata is not necessarily contiguous in the files: IFDs are normally all stored +/// contiguously in the header, but the spec allows them to be non-contiguous or spread out through +/// the file. +/// +/// Note that you must call [`finish`][ImageFileDirectoryReader::finish] to read the offset of the +/// following IFD. +pub struct ImageFileDirectoryReader { + endianness: Endianness, + bigtiff: bool, + /// The byte offset of the beginning of this IFD + ifd_start_offset: u64, + /// The number of tags in this IFD + tag_count: u64, + /// The number of bytes that each IFD entry takes up. + /// This is 12 bytes for normal TIFF and 20 bytes for BigTIFF. + ifd_entry_byte_size: u64, + /// The number of bytes that the value for the number of tags takes up. + tag_count_byte_size: u64, +} + +impl ImageFileDirectoryReader { + /// Read and parse the IFD starting at the given file offset + pub async fn open( + fetch: &F, + ifd_start_offset: u64, + bigtiff: bool, + endianness: Endianness, + ) -> AsyncTiffResult { + let mut cursor = MetadataCursor::new_with_offset(fetch, endianness, ifd_start_offset); + + // Tag 2 bytes + // Type 2 bytes + // Count: + // - bigtiff: 8 bytes + // - else: 4 bytes + // Value: + // - bigtiff: 8 bytes either a pointer the value itself + // - else: 4 bytes either a pointer the value itself + let ifd_entry_byte_size = if bigtiff { 20 } else { 12 }; + // The size of `tag_count` that we read above + let tag_count_byte_size = if bigtiff { 8 } else { 2 }; + + let tag_count = if bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u16().await?.into() + }; + + Ok(Self { + endianness, + bigtiff, + ifd_entry_byte_size, + tag_count, + tag_count_byte_size, + ifd_start_offset, + }) + } + + /// Manually read the tag with the specified index. + /// + /// Panics if the tag index is out of range of the tag count. + /// + /// This can be useful if you need to access tags at a low level. You'll need to call + /// [`ImageFileDirectory::from_tags`] on the resulting collection of tags. + pub async fn read_tag( + &self, + fetch: &F, + tag_idx: u64, + ) -> AsyncTiffResult<(Tag, Value)> { + assert!(tag_idx < self.tag_count); + let tag_offset = + self.ifd_start_offset + self.tag_count_byte_size + (self.ifd_entry_byte_size * tag_idx); + let (tag_name, tag_value) = + read_tag(fetch, tag_offset, self.endianness, self.bigtiff).await?; + Ok((tag_name, tag_value)) + } + + /// Read all tags out of this IFD. + /// + /// Keep in mind that you'll still need to call [`finish`][Self::finish] to get the byte offset + /// of the next IFD. + pub async fn read(&self, fetch: &F) -> AsyncTiffResult { + let mut tags = HashMap::with_capacity(self.tag_count as usize); + for tag_idx in 0..self.tag_count { + let (tag, value) = self.read_tag(fetch, tag_idx).await?; + tags.insert(tag, value); + } + ImageFileDirectory::from_tags(tags) + } + + /// Finish this reader, reading the byte offset of the next IFD + pub async fn finish(self, fetch: &F) -> AsyncTiffResult> { + // The byte offset for reading the next ifd + let next_ifd_byte_offset = self.ifd_start_offset + + self.tag_count_byte_size + + (self.ifd_entry_byte_size * self.tag_count); + let mut cursor = + MetadataCursor::new_with_offset(fetch, self.endianness, next_ifd_byte_offset); + + let next_ifd_offset = if self.bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u32().await?.into() + }; + + // If the ifd_offset is 0, no more IFDs + if next_ifd_offset == 0 { + Ok(None) + } else { + Ok(Some(next_ifd_offset)) + } + } +} + +/// Read a single tag from the cursor +async fn read_tag( + fetch: &F, + tag_offset: u64, + endianness: Endianness, + bigtiff: bool, +) -> AsyncTiffResult<(Tag, Value)> { + let mut cursor = MetadataCursor::new_with_offset(fetch, endianness, tag_offset); + + let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?); + + let tag_type_code = cursor.read_u16().await?; + let tag_type = Type::from_u16(tag_type_code).expect( + "Unknown tag type {tag_type_code}. TODO: we should skip entries with unknown tag types.", + ); + let count = if bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u32().await?.into() + }; + + let tag_value = read_tag_value(&mut cursor, tag_type, count, bigtiff).await?; + + Ok((tag_name, tag_value)) +} + +/// Read a tag's value from the cursor +/// +/// NOTE: this does not maintain cursor state +// This is derived from the upstream tiff crate: +// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639 +async fn read_tag_value( + cursor: &mut MetadataCursor<'_, F>, + tag_type: Type, + count: u64, + bigtiff: bool, +) -> AsyncTiffResult { + // Case 1: there are no values so we can return immediately. + if count == 0 { + return Ok(Value::List(vec![])); + } + + let tag_size = match tag_type { + Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, + Type::SHORT | Type::SSHORT => 2, + Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, + Type::LONG8 + | Type::SLONG8 + | Type::DOUBLE + | Type::RATIONAL + | Type::SRATIONAL + | Type::IFD8 => 8, + }; + + let value_byte_length = count.checked_mul(tag_size).unwrap(); + + // Case 2: there is one value. + if count == 1 { + // 2a: the value is 5-8 bytes and we're in BigTiff mode. + if bigtiff && value_byte_length > 4 && value_byte_length <= 8 { + let mut data = cursor.read(value_byte_length).await?; + + return Ok(match tag_type { + Type::LONG8 => Value::UnsignedBig(data.read_u64()?), + Type::SLONG8 => Value::SignedBig(data.read_i64()?), + Type::DOUBLE => Value::Double(data.read_f64()?), + Type::RATIONAL => Value::Rational(data.read_u32()?, data.read_u32()?), + Type::SRATIONAL => Value::SRational(data.read_i32()?, data.read_i32()?), + Type::IFD8 => Value::IfdBig(data.read_u64()?), + Type::BYTE + | Type::SBYTE + | Type::ASCII + | Type::UNDEFINED + | Type::SHORT + | Type::SSHORT + | Type::LONG + | Type::SLONG + | Type::FLOAT + | Type::IFD => unreachable!(), + }); + } + + // NOTE: we should only be reading value_byte_length when it's 4 bytes or fewer. Right now + // we're reading even if it's 8 bytes, but then only using the first 4 bytes of this + // buffer. + let mut data = cursor.read(value_byte_length).await?; + + // 2b: the value is at most 4 bytes or doesn't fit in the offset field. + return Ok(match tag_type { + Type::BYTE | Type::UNDEFINED => Value::Byte(data.read_u8()?), + Type::SBYTE => Value::Signed(data.read_i8()? as i32), + Type::SHORT => Value::Short(data.read_u16()?), + Type::SSHORT => Value::Signed(data.read_i16()? as i32), + Type::LONG => Value::Unsigned(data.read_u32()?), + Type::SLONG => Value::Signed(data.read_i32()?), + Type::FLOAT => Value::Float(data.read_f32()?), + Type::ASCII => { + if data.as_ref()[0] == 0 { + Value::Ascii("".to_string()) + } else { + panic!("Invalid tag"); + // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); + } + } + Type::LONG8 => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + Value::UnsignedBig(cursor.read_u64().await?) + } + Type::SLONG8 => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + Value::SignedBig(cursor.read_i64().await?) + } + Type::DOUBLE => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + Value::Double(cursor.read_f64().await?) + } + Type::RATIONAL => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + let numerator = cursor.read_u32().await?; + let denominator = cursor.read_u32().await?; + Value::Rational(numerator, denominator) + } + Type::SRATIONAL => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + let numerator = cursor.read_i32().await?; + let denominator = cursor.read_i32().await?; + Value::SRational(numerator, denominator) + } + Type::IFD => Value::Ifd(data.read_u32()?), + Type::IFD8 => { + let offset = data.read_u32()?; + cursor.seek(offset as _); + Value::IfdBig(cursor.read_u64().await?) + } + }); + } + + // Case 3: There is more than one value, but it fits in the offset field. + if value_byte_length <= 4 || bigtiff && value_byte_length <= 8 { + let mut data = cursor.read(value_byte_length).await?; + if bigtiff { + cursor.advance(8 - value_byte_length); + } else { + cursor.advance(4 - value_byte_length); + } + + match tag_type { + Type::BYTE | Type::UNDEFINED => { + return { + Ok(Value::List( + (0..count) + .map(|_| Value::Byte(data.read_u8().unwrap())) + .collect(), + )) + }; + } + Type::SBYTE => { + return { + Ok(Value::List( + (0..count) + .map(|_| Value::Signed(data.read_i8().unwrap() as i32)) + .collect(), + )) + } + } + Type::ASCII => { + let mut buf = vec![0; count as usize]; + data.read_exact(&mut buf)?; + if buf.is_ascii() && buf.ends_with(&[0]) { + let v = std::str::from_utf8(&buf) + .map_err(|err| AsyncTiffError::General(err.to_string()))?; + let v = v.trim_matches(char::from(0)); + return Ok(Value::Ascii(v.into())); + } else { + panic!("Invalid tag"); + // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); + } + } + Type::SHORT => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Short(data.read_u16()?)); + } + return Ok(Value::List(v)); + } + Type::SSHORT => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Signed(i32::from(data.read_i16()?))); + } + return Ok(Value::List(v)); + } + Type::LONG => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Unsigned(data.read_u32()?)); + } + return Ok(Value::List(v)); + } + Type::SLONG => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Signed(data.read_i32()?)); + } + return Ok(Value::List(v)); + } + Type::FLOAT => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Float(data.read_f32()?)); + } + return Ok(Value::List(v)); + } + Type::IFD => { + let mut v = Vec::new(); + for _ in 0..count { + v.push(Value::Ifd(data.read_u32()?)); + } + return Ok(Value::List(v)); + } + Type::LONG8 + | Type::SLONG8 + | Type::RATIONAL + | Type::SRATIONAL + | Type::DOUBLE + | Type::IFD8 => { + unreachable!() + } + } + } + + // Seek cursor + let offset = if bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u32().await?.into() + }; + cursor.seek(offset); + + // Case 4: there is more than one value, and it doesn't fit in the offset field. + match tag_type { + // TODO check if this could give wrong results + // at a different endianess of file/computer. + Type::BYTE | Type::UNDEFINED => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Byte(cursor.read_u8().await?)) + } + Ok(Value::List(v)) + } + Type::SBYTE => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Signed(cursor.read_i8().await? as i32)) + } + Ok(Value::List(v)) + } + Type::SHORT => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Short(cursor.read_u16().await?)) + } + Ok(Value::List(v)) + } + Type::SSHORT => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Signed(cursor.read_i16().await? as i32)) + } + Ok(Value::List(v)) + } + Type::LONG => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Unsigned(cursor.read_u32().await?)) + } + Ok(Value::List(v)) + } + Type::SLONG => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Signed(cursor.read_i32().await?)) + } + Ok(Value::List(v)) + } + Type::FLOAT => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Float(cursor.read_f32().await?)) + } + Ok(Value::List(v)) + } + Type::DOUBLE => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Double(cursor.read_f64().await?)) + } + Ok(Value::List(v)) + } + Type::RATIONAL => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Rational( + cursor.read_u32().await?, + cursor.read_u32().await?, + )) + } + Ok(Value::List(v)) + } + Type::SRATIONAL => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::SRational( + cursor.read_i32().await?, + cursor.read_i32().await?, + )) + } + Ok(Value::List(v)) + } + Type::LONG8 => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::UnsignedBig(cursor.read_u64().await?)) + } + Ok(Value::List(v)) + } + Type::SLONG8 => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::SignedBig(cursor.read_i64().await?)) + } + Ok(Value::List(v)) + } + Type::IFD => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::Ifd(cursor.read_u32().await?)) + } + Ok(Value::List(v)) + } + Type::IFD8 => { + let mut v = Vec::with_capacity(count as _); + for _ in 0..count { + v.push(Value::IfdBig(cursor.read_u64().await?)) + } + Ok(Value::List(v)) + } + Type::ASCII => { + let mut out = vec![0; count as _]; + let mut buf = cursor.read(count).await?; + buf.read_exact(&mut out)?; + + // Strings may be null-terminated, so we trim anything downstream of the null byte + if let Some(first) = out.iter().position(|&b| b == 0) { + out.truncate(first); + } + Ok(Value::Ascii( + String::from_utf8(out).map_err(|err| AsyncTiffError::General(err.to_string()))?, + )) + } + } +} diff --git a/src/reader.rs b/src/reader.rs index 6c3dcb5..06520ec 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -11,7 +11,7 @@ use bytes::{Buf, Bytes}; use futures::future::{BoxFuture, FutureExt}; use futures::TryFutureExt; -use crate::error::{AsyncTiffError, AsyncTiffResult}; +use crate::error::AsyncTiffResult; /// The asynchronous interface used to read COG files /// @@ -20,30 +20,24 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; /// /// Notes: /// -/// 1. There are distinct traits for accessing "metadata bytes" and "image bytes". The requests for -/// "metadata bytes" from `get_metadata_bytes` will be called from `TIFF.open`, while parsing -/// IFDs. Requests for "image bytes" from `get_image_bytes` and `get_image_byte_ranges` will be -/// called while fetching data from TIFF tiles or strips. -/// -/// 2. [`ObjectReader`], available when the `object_store` crate feature +/// 1. [`ObjectReader`], available when the `object_store` crate feature /// is enabled, implements this interface for [`ObjectStore`]. /// -/// 3. You can use [`TokioReader`] to implement [`AsyncFileReader`] for types that implement +/// 2. You can use [`TokioReader`] to implement [`AsyncFileReader`] for types that implement /// [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`]. /// /// [`ObjectStore`]: object_store::ObjectStore /// /// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html pub trait AsyncFileReader: Debug + Send + Sync { - /// Retrieve the bytes in `range` as part of a request for header metadata. - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; - /// Retrieve the bytes in `range` as part of a request for image data, not header metadata. - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + /// + /// This is also used as the default implementation of [`MetadataFetch`] if not overridden. + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; /// Retrieve multiple byte ranges as part of a request for image data, not header metadata. The - /// default implementation will call `get_image_bytes` sequentially - fn get_image_byte_ranges( + /// default implementation will call `get_bytes` sequentially + fn get_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { @@ -51,7 +45,7 @@ pub trait AsyncFileReader: Debug + Send + Sync { let mut result = Vec::with_capacity(ranges.len()); for range in ranges.into_iter() { - let data = self.get_image_bytes(range).await?; + let data = self.get_bytes(range).await?; result.push(data); } @@ -63,19 +57,29 @@ pub trait AsyncFileReader: Debug + Send + Sync { /// This allows Box to be used as an AsyncFileReader, impl AsyncFileReader for Box { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.as_ref().get_metadata_bytes(range) + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_bytes(range) + } + + fn get_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> { + self.as_ref().get_byte_ranges(ranges) } +} - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.as_ref().get_image_bytes(range) +/// This allows Arc to be used as an AsyncFileReader, +impl AsyncFileReader for Arc { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_bytes(range) } - fn get_image_byte_ranges( + fn get_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { - self.as_ref().get_image_byte_ranges(ranges) + self.as_ref().get_byte_ranges(ranges) } } @@ -106,6 +110,8 @@ impl Toki use std::io::SeekFrom; use tokio::io::{AsyncReadExt, AsyncSeekExt}; + use crate::error::AsyncTiffError; + let mut file = self.0.lock().await; file.seek(SeekFrom::Start(range.start)).await?; @@ -125,11 +131,7 @@ impl Toki impl AsyncFileReader for TokioReader { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.make_range_request(range).boxed() - } - - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.make_range_request(range).boxed() } } @@ -162,18 +164,11 @@ impl ObjectReader { #[cfg(feature = "object_store")] impl AsyncFileReader for ObjectReader { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.make_range_request(range).boxed() - } - - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.make_range_request(range).boxed() } - fn get_image_byte_ranges( - &self, - ranges: Vec>, - ) -> BoxFuture<'_, AsyncTiffResult>> + fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { @@ -227,207 +222,33 @@ impl ReqwestReader { #[cfg(feature = "reqwest")] impl AsyncFileReader for ReqwestReader { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.make_range_request(range) } - - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.make_range_request(range) - } -} - -/// An AsyncFileReader that caches the first `prefetch` bytes of a file. -#[derive(Debug)] -pub struct PrefetchReader { - reader: Arc, - buffer: Bytes, -} - -impl PrefetchReader { - /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. - pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { - let buffer = reader.get_metadata_bytes(0..prefetch).await?; - Ok(Self { reader, buffer }) - } -} - -impl AsyncFileReader for PrefetchReader { - fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - if range.start < self.buffer.len() as _ { - if range.end < self.buffer.len() as _ { - let usize_range = range.start as usize..range.end as usize; - let result = self.buffer.slice(usize_range); - async { Ok(result) }.boxed() - } else { - // TODO: reuse partial internal buffer - self.reader.get_metadata_bytes(range) - } - } else { - self.reader.get_metadata_bytes(range) - } - } - - fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - // In practice, get_image_bytes is only used for fetching tiles, which are unlikely - // to overlap a metadata prefetch. - self.reader.get_image_bytes(range) - } - - fn get_image_byte_ranges( - &self, - ranges: Vec>, - ) -> BoxFuture<'_, AsyncTiffResult>> - where - Self: Send, - { - // In practice, get_image_byte_ranges is only used for fetching tiles, which are unlikely - // to overlap a metadata prefetch. - self.reader.get_image_byte_ranges(ranges) - } } +/// Endianness #[derive(Debug, Clone, Copy)] -pub(crate) enum Endianness { +pub enum Endianness { + /// Little Endian LittleEndian, + /// Big Endian BigEndian, } -/// A wrapper around an [ObjectStore] that provides a seek-oriented interface -// TODO: in the future add buffering to this -#[derive(Debug)] -pub(crate) struct AsyncCursor { - reader: Arc, - offset: u64, +pub(crate) struct EndianAwareReader { + reader: Reader, endianness: Endianness, } -impl AsyncCursor { - /// Create a new AsyncCursor from a reader and endianness. - pub(crate) fn new(reader: Arc, endianness: Endianness) -> Self { +impl EndianAwareReader { + pub(crate) fn new(bytes: Bytes, endianness: Endianness) -> Self { Self { - reader, - offset: 0, + reader: bytes.reader(), endianness, } } - /// Create a new AsyncCursor for a TIFF file, automatically inferring endianness from the first - /// two bytes. - pub(crate) async fn try_open_tiff(reader: Arc) -> AsyncTiffResult { - // Initialize with little endianness and then set later - let mut cursor = Self::new(reader, Endianness::LittleEndian); - let magic_bytes = cursor.read(2).await?; - let magic_bytes = magic_bytes.as_ref(); - - // Should be b"II" for little endian or b"MM" for big endian - if magic_bytes == Bytes::from_static(b"II") { - cursor.endianness = Endianness::LittleEndian; - } else if magic_bytes == Bytes::from_static(b"MM") { - cursor.endianness = Endianness::BigEndian; - } else { - return Err(AsyncTiffError::General(format!( - "unexpected magic bytes {magic_bytes:?}" - ))); - }; - - Ok(cursor) - } - - /// Consume self and return the underlying [`AsyncFileReader`]. - #[allow(dead_code)] - pub(crate) fn into_inner(self) -> Arc { - self.reader - } - - /// Read the given number of bytes, advancing the internal cursor state by the same amount. - pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult { - let range = self.offset as _..(self.offset + length) as _; - self.offset += length; - let bytes = self.reader.get_metadata_bytes(range).await?; - Ok(EndianAwareReader { - reader: bytes.reader(), - endianness: self.endianness, - }) - } - - /// Read a u8 from the cursor, advancing the internal state by 1 byte. - pub(crate) async fn read_u8(&mut self) -> AsyncTiffResult { - self.read(1).await?.read_u8() - } - - /// Read a i8 from the cursor, advancing the internal state by 1 byte. - pub(crate) async fn read_i8(&mut self) -> AsyncTiffResult { - self.read(1).await?.read_i8() - } - - /// Read a u16 from the cursor, advancing the internal state by 2 bytes. - pub(crate) async fn read_u16(&mut self) -> AsyncTiffResult { - self.read(2).await?.read_u16() - } - - /// Read a i16 from the cursor, advancing the internal state by 2 bytes. - pub(crate) async fn read_i16(&mut self) -> AsyncTiffResult { - self.read(2).await?.read_i16() - } - - /// Read a u32 from the cursor, advancing the internal state by 4 bytes. - pub(crate) async fn read_u32(&mut self) -> AsyncTiffResult { - self.read(4).await?.read_u32() - } - - /// Read a i32 from the cursor, advancing the internal state by 4 bytes. - pub(crate) async fn read_i32(&mut self) -> AsyncTiffResult { - self.read(4).await?.read_i32() - } - - /// Read a u64 from the cursor, advancing the internal state by 8 bytes. - pub(crate) async fn read_u64(&mut self) -> AsyncTiffResult { - self.read(8).await?.read_u64() - } - - /// Read a i64 from the cursor, advancing the internal state by 8 bytes. - pub(crate) async fn read_i64(&mut self) -> AsyncTiffResult { - self.read(8).await?.read_i64() - } - - pub(crate) async fn read_f32(&mut self) -> AsyncTiffResult { - self.read(4).await?.read_f32() - } - - pub(crate) async fn read_f64(&mut self) -> AsyncTiffResult { - self.read(8).await?.read_f64() - } - - #[allow(dead_code)] - pub(crate) fn reader(&self) -> &Arc { - &self.reader - } - - #[allow(dead_code)] - pub(crate) fn endianness(&self) -> Endianness { - self.endianness - } - - /// Advance cursor position by a set amount - pub(crate) fn advance(&mut self, amount: u64) { - self.offset += amount; - } - - pub(crate) fn seek(&mut self, offset: u64) { - self.offset = offset; - } - - pub(crate) fn position(&self) -> u64 { - self.offset - } -} - -pub(crate) struct EndianAwareReader { - reader: Reader, - endianness: Endianness, -} - -impl EndianAwareReader { /// Read a u8 from the cursor, advancing the internal state by 1 byte. pub(crate) fn read_u8(&mut self) -> AsyncTiffResult { Ok(self.reader.read_u8()?) diff --git a/tests/image_tiff/decode_bigtiff_images.rs b/tests/image_tiff/decode_bigtiff_images.rs index a90ee19..eb23fa8 100644 --- a/tests/image_tiff/decode_bigtiff_images.rs +++ b/tests/image_tiff/decode_bigtiff_images.rs @@ -13,7 +13,7 @@ async fn test_big_tiff() { ]; for filename in filenames.iter() { let tiff = open_tiff(filename).await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert_eq!(ifd.image_height(), 64); assert_eq!(ifd.image_width(), 64); assert_eq!( diff --git a/tests/image_tiff/decode_geotiff_images.rs b/tests/image_tiff/decode_geotiff_images.rs index da2f058..79f1205 100644 --- a/tests/image_tiff/decode_geotiff_images.rs +++ b/tests/image_tiff/decode_geotiff_images.rs @@ -7,7 +7,7 @@ async fn test_geo_tiff() { let filenames = ["geo-5b.tif"]; for filename in filenames.iter() { let tiff = open_tiff(filename).await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; dbg!(&ifd); assert_eq!(ifd.image_height(), 10); assert_eq!(ifd.image_width(), 10); diff --git a/tests/image_tiff/decode_images.rs b/tests/image_tiff/decode_images.rs index 811b54e..dd9a7b5 100644 --- a/tests/image_tiff/decode_images.rs +++ b/tests/image_tiff/decode_images.rs @@ -7,7 +7,7 @@ use crate::image_tiff::util::open_tiff; #[tokio::test] async fn cmyk_u8() { let tiff = open_tiff("cmyk-3c-8b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::CMYK @@ -18,7 +18,7 @@ async fn cmyk_u8() { #[tokio::test] async fn test_cmyk_u16() { let tiff = open_tiff("cmyk-3c-16b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::CMYK @@ -29,7 +29,7 @@ async fn test_cmyk_u16() { #[tokio::test] async fn test_cmyk_f32() { let tiff = open_tiff("cmyk-3c-32b-float.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::CMYK @@ -40,7 +40,7 @@ async fn test_cmyk_f32() { #[tokio::test] async fn test_gray_u8() { let tiff = open_tiff("minisblack-1c-8b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -51,7 +51,7 @@ async fn test_gray_u8() { #[tokio::test] async fn test_gray_u12() { let tiff = open_tiff("12bit.cropped.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -62,7 +62,7 @@ async fn test_gray_u12() { #[tokio::test] async fn test_gray_u16() { let tiff = open_tiff("minisblack-1c-16b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -73,7 +73,7 @@ async fn test_gray_u16() { #[tokio::test] async fn test_gray_u32() { let tiff = open_tiff("gradient-1c-32b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -84,7 +84,7 @@ async fn test_gray_u32() { #[tokio::test] async fn test_gray_u64() { let tiff = open_tiff("gradient-1c-64b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -95,7 +95,7 @@ async fn test_gray_u64() { #[tokio::test] async fn test_gray_f32() { let tiff = open_tiff("gradient-1c-32b-float.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -106,7 +106,7 @@ async fn test_gray_f32() { #[tokio::test] async fn test_gray_f64() { let tiff = open_tiff("gradient-1c-64b-float.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -117,7 +117,7 @@ async fn test_gray_f64() { #[tokio::test] async fn test_rgb_u8() { let tiff = open_tiff("rgb-3c-8b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -128,7 +128,7 @@ async fn test_rgb_u8() { #[tokio::test] async fn test_rgb_u12() { let tiff = open_tiff("12bit.cropped.rgb.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -139,7 +139,7 @@ async fn test_rgb_u12() { #[tokio::test] async fn test_rgb_u16() { let tiff = open_tiff("rgb-3c-16b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -150,7 +150,7 @@ async fn test_rgb_u16() { #[tokio::test] async fn test_rgb_u32() { let tiff = open_tiff("gradient-3c-32b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -161,7 +161,7 @@ async fn test_rgb_u32() { #[tokio::test] async fn test_rgb_u64() { let tiff = open_tiff("gradient-3c-64b.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -172,7 +172,7 @@ async fn test_rgb_u64() { #[tokio::test] async fn test_rgb_f32() { let tiff = open_tiff("gradient-3c-32b-float.tiff").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -183,7 +183,7 @@ async fn test_rgb_f32() { #[tokio::test] async fn test_int8() { let tiff = open_tiff("int8.tif").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -194,7 +194,7 @@ async fn test_int8() { #[tokio::test] async fn test_int8_rgb() { let tiff = open_tiff("int8_rgb.tif").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -205,7 +205,7 @@ async fn test_int8_rgb() { #[tokio::test] async fn test_int16() { let tiff = open_tiff("int16.tif").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero @@ -216,7 +216,7 @@ async fn test_int16() { #[tokio::test] async fn test_int16_rgb() { let tiff = open_tiff("int16_rgb.tif").await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; assert!(matches!( ifd.photometric_interpretation(), PhotometricInterpretation::RGB @@ -231,7 +231,7 @@ async fn test_string_tags() { let filenames = ["minisblack-1c-16b.tiff", "rgb-3c-16b.tiff"]; for filename in filenames.iter() { let tiff = open_tiff(filename).await; - let ifd = &tiff.ifds().as_ref()[0]; + let ifd = &tiff.ifds()[0]; let software = ifd.software().unwrap(); assert_eq!( software, diff --git a/tests/image_tiff/util.rs b/tests/image_tiff/util.rs index 834ccc7..0b7e0c3 100644 --- a/tests/image_tiff/util.rs +++ b/tests/image_tiff/util.rs @@ -1,7 +1,8 @@ use std::env::current_dir; use std::sync::Arc; -use async_tiff::reader::ObjectReader; +use async_tiff::metadata::TiffMetadataReader; +use async_tiff::reader::{AsyncFileReader, ObjectReader}; use async_tiff::TIFF; use object_store::local::LocalFileSystem; @@ -10,6 +11,9 @@ const TEST_IMAGE_DIR: &str = "tests/image_tiff/images/"; pub(crate) async fn open_tiff(filename: &str) -> TIFF { let store = Arc::new(LocalFileSystem::new_with_prefix(current_dir().unwrap()).unwrap()); let path = format!("{TEST_IMAGE_DIR}/{filename}"); - let reader = Arc::new(ObjectReader::new(store.clone(), path.as_str().into())); - TIFF::try_open(reader).await.unwrap() + let reader = Arc::new(ObjectReader::new(store.clone(), path.as_str().into())) + as Arc; + let mut metadata_reader = TiffMetadataReader::try_open(&reader).await.unwrap(); + let ifds = metadata_reader.read_all_ifds(&reader).await.unwrap(); + TIFF::new(ifds) }