Skip to content

Commit

Permalink
remote_storage: add DownloadOpts::byte_(start|end) (#9293)
Browse files Browse the repository at this point in the history
`download_byte_range()` is basically a copy of `download()` with an
additional option passed to the backend SDKs. This can cause these code
paths to diverge, and prevents combining various options.

This patch adds `DownloadOpts::byte_(start|end)` and move byte range
handling into `download()`.
  • Loading branch information
erikgrinaker authored Oct 9, 2024
1 parent f87f5a3 commit 211970f
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 241 deletions.
26 changes: 6 additions & 20 deletions libs/remote_storage/src/azure_blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,26 +496,12 @@ impl RemoteStorage for AzureBlobStorage {
builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
}

self.download_for_builder(builder, cancel).await
}

async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from));

let mut builder = blob_client.get();

let range: Range = if let Some(end_exclusive) = end_exclusive {
(start_inclusive..end_exclusive).into()
} else {
(start_inclusive..).into()
};
builder = builder.range(range);
if let Some((start, end)) = opts.byte_range() {
builder = builder.range(match end {
Some(end) => Range::Range(start..end),
None => Range::RangeFrom(start..),
});
}

self.download_for_builder(builder, cancel).await
}
Expand Down
180 changes: 122 additions & 58 deletions libs/remote_storage/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ mod simulate_failures;
mod support;

use std::{
collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc,
time::SystemTime,
};

use anyhow::Context;
Expand Down Expand Up @@ -162,11 +163,60 @@ pub struct Listing {
}

/// Options for downloads. The default value is a plain GET.
#[derive(Default)]
pub struct DownloadOpts {
/// If given, returns [`DownloadError::Unmodified`] if the object still has
/// the same ETag (using If-None-Match).
pub etag: Option<Etag>,
/// The start of the byte range to download, or unbounded.
pub byte_start: Bound<u64>,
/// The end of the byte range to download, or unbounded. Must be after the
/// start bound.
pub byte_end: Bound<u64>,
}

impl Default for DownloadOpts {
fn default() -> Self {
Self {
etag: Default::default(),
byte_start: Bound::Unbounded,
byte_end: Bound::Unbounded,
}
}
}

impl DownloadOpts {
/// Returns the byte range with inclusive start and exclusive end, or None
/// if unbounded.
pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
return None;
}
let start = match self.byte_start {
Bound::Excluded(i) => i + 1,
Bound::Included(i) => i,
Bound::Unbounded => 0,
};
let end = match self.byte_end {
Bound::Excluded(i) => Some(i),
Bound::Included(i) => Some(i + 1),
Bound::Unbounded => None,
};
if let Some(end) = end {
assert!(start < end, "range end {end} at or before start {start}");
}
Some((start, end))
}

/// Returns the byte range as an RFC 2616 Range header value with inclusive
/// bounds, or None if unbounded.
pub fn byte_range_header(&self) -> Option<String> {
self.byte_range()
.map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{end}"),
None => format!("bytes={start}-"),
})
}
}

/// Storage (potentially remote) API to manage its state.
Expand Down Expand Up @@ -257,21 +307,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
cancel: &CancellationToken,
) -> Result<Download, DownloadError>;

/// Streams a given byte range of the remote storage entry contents.
///
/// The returned download stream will obey initial timeout and cancellation signal by erroring
/// on whichever happens first. Only one of the reasons will fail the stream, which is usually
/// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
///
/// Returns the metadata, if any was stored with the file previously.
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError>;

/// Delete a single path from remote storage.
///
/// If the operation fails because of timeout or cancellation, the root cause of the error will be
Expand Down Expand Up @@ -425,33 +460,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
}
}

pub async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match self {
Self::LocalFs(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AwsS3(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AzureBlob(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::Unreliable(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
}
}

/// See [`RemoteStorage::delete`]
pub async fn delete(
&self,
Expand Down Expand Up @@ -573,20 +581,6 @@ impl GenericRemoteStorage {
})
}

/// Downloads the storage object into the `to_path` provided.
/// `byte_range` could be specified to dowload only a part of the file, if needed.
pub async fn download_storage_object(
&self,
byte_range: Option<(u64, Option<u64>)>,
from: &RemotePath,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match byte_range {
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
None => self.download(from, &DownloadOpts::default(), cancel).await,
}
}

/// The name of the bucket/container/etc.
pub fn bucket_name(&self) -> Option<&str> {
match self {
Expand Down Expand Up @@ -660,6 +654,76 @@ impl ConcurrencyLimiter {
mod tests {
use super::*;

/// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
/// with optional end bound, or None when unbounded.
#[test]
fn download_opts_byte_range() {
// Consider using test_case or a similar table-driven test framework.
let cases = [
// (byte_start, byte_end, expected)
(Bound::Unbounded, Bound::Unbounded, None),
(Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
(Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
(Bound::Included(3), Bound::Unbounded, Some((3, None))),
(Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
(Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
(Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
(Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
(Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
// 1-sized ranges are fine, 0 aren't and will panic (separate test).
(Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
(Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
];

for (byte_start, byte_end, expect) in cases {
let opts = DownloadOpts {
byte_start,
byte_end,
..Default::default()
};
let result = opts.byte_range();
assert_eq!(
result, expect,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);

// Check generated HTTP header, which uses an inclusive range.
let expect_header = expect.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
None => format!("bytes={start}-"),
});
assert_eq!(
opts.byte_range_header(),
expect_header,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);
}
}

/// DownloadOpts::byte_range() zero-sized byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_zero() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Excluded(3),
..Default::default()
}
.byte_range();
}

/// DownloadOpts::byte_range() negative byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_negative() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Included(2),
..Default::default()
}
.byte_range();
}

#[test]
fn test_object_name() {
let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
Expand Down
Loading

1 comment on commit 211970f

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5085 tests run: 4878 passed, 0 failed, 207 skipped (full report)


Flaky tests (2)

Postgres 15

Postgres 14

Code coverage* (full report)

  • functions: 31.3% (7503 of 23946 functions)
  • lines: 49.5% (60257 of 121839 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
211970f at 2024-10-09T10:31:11.078Z :recycle:

Please sign in to comment.