Skip to content

Commit

Permalink
Merge pull request #377 from str4d/368-buffered-header-reading
Browse files Browse the repository at this point in the history
Enable header parsing to use `R: std::io::BufRead`
  • Loading branch information
str4d authored Mar 25, 2023
2 parents 3260c7d + f337b55 commit f84af2c
Show file tree
Hide file tree
Showing 12 changed files with 329 additions and 94 deletions.
4 changes: 4 additions & 0 deletions age/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to Rust's notion of
to 1.0.0 are beta releases.

## [Unreleased]
### Added
- `age::Decryptor::new_buffered`, which is more efficient for types implementing
`std::io::BufRead` (which includes `&[u8]` slices).
- `impl std::io::BufRead for age::armor::ArmoredReader`

## [0.9.1] - 2022-03-24
### Added
Expand Down
2 changes: 1 addition & 1 deletion age/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn bench(c: &mut Criterion) {
output.write_all(&[]).unwrap();
output.finish().unwrap();

b.iter(|| Decryptor::new(&encrypted[..]))
b.iter(|| Decryptor::new_buffered(&encrypted[..]))
});
}

Expand Down
2 changes: 1 addition & 1 deletion age/benches/throughput.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fn bench(c: &mut Criterion_) {
output.finish().unwrap();

b.iter(|| {
let decryptor = match Decryptor::new(&ct_buf[..]).unwrap() {
let decryptor = match Decryptor::new_buffered(&ct_buf[..]).unwrap() {
Decryptor::Recipients(decryptor) => decryptor,
_ => panic!(),
};
Expand Down
30 changes: 29 additions & 1 deletion age/src/format.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! The age file format.

use age_core::format::Stanza;
use std::io::{self, Read, Write};
use std::io::{self, BufRead, Read, Write};

use crate::{
error::DecryptError,
Expand Down Expand Up @@ -88,6 +88,34 @@ impl Header {
}
}

pub(crate) fn read_buffered<R: BufRead>(mut input: R) -> Result<Self, DecryptError> {
let mut data = vec![];
loop {
match read::header(&data) {
Ok((_, mut header)) => {
if let Header::V1(h) = &mut header {
h.encoded_bytes = Some(data);
}
break Ok(header);
}
Err(nom::Err::Incomplete(nom::Needed::Size(_))) => {
// As we have a buffered reader, we can leverage the fact that the
// currently-defined header formats are newline-separated, to more
// efficiently read data for the parser to consume.
if input.read_until(b'\n', &mut data)? == 0 {
break Err(DecryptError::Io(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Incomplete header",
)));
}
}
Err(_) => {
break Err(DecryptError::InvalidHeader);
}
}
}
}

#[cfg(feature = "async")]
#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
pub(crate) async fn read_async<R: AsyncRead + Unpin>(
Expand Down
183 changes: 95 additions & 88 deletions age/src/primitives/armor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ impl<R> ArmoredReader<R> {
///
/// Returns the number of bytes read into the buffer, or None if there was no cached
/// data.
#[cfg(feature = "async")]
fn read_cached_data(&mut self, buf: &mut [u8]) -> Option<usize> {
if self.byte_start >= self.byte_end {
None
Expand Down Expand Up @@ -820,69 +821,106 @@ impl<R> ArmoredReader<R> {
}
}

impl<R: BufRead> Read for ArmoredReader<R> {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
impl<R: BufRead> BufRead for ArmoredReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
loop {
match self.is_armored {
None => {
self.inner.read_exact(&mut self.byte_buf[..MIN_ARMOR_LEN])?;
self.detect_armor()?
}
Some(false) => {
// Return any leftover data from armor detection
return if let Some(read) = self.read_cached_data(buf) {
Ok(read)
} else {
self.inner.read(buf).map(|read| {
self.data_read += read;
self.count_reader_bytes(read)
break if self.byte_start >= self.byte_end {
self.inner.read(&mut self.byte_buf[..]).map(|read| {
self.byte_start = 0;
self.byte_end = read;
self.count_reader_bytes(read);
&self.byte_buf[..read]
})
};
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
}
Some(true) => {
break if self.found_end {
Ok(&[])
} else if self.byte_start >= self.byte_end {
if self.read_next_armor_line()? {
Ok(&[])
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
} else {
Ok(&self.byte_buf[self.byte_start..self.byte_end])
}
}
Some(true) => break,
}
}
if self.found_end {
return Ok(0);
}
}

let buf_len = buf.len();
fn consume(&mut self, amt: usize) {
self.byte_start += amt;
self.data_read += amt;
assert!(self.byte_start <= self.byte_end);
}
}

// Output any remaining bytes from the previous line
if let Some(read) = self.read_cached_data(buf) {
buf = &mut buf[read..];
}
impl<R: BufRead> ArmoredReader<R> {
/// Fills `self.byte_buf` with the next line of armored data.
///
/// Returns `true` if this was the last line.
fn read_next_armor_line(&mut self) -> io::Result<bool> {
assert_eq!(self.is_armored, Some(true));

while !buf.is_empty() {
// Read the next line
self.inner
.read_line(&mut self.line_buf)
.map(|read| self.count_reader_bytes(read))?;

// Parse the line into bytes
if self.parse_armor_line()? {
// This was the last line! Check for trailing garbage.
loop {
let amt = match self.inner.fill_buf()? {
&[] => break,
buf => {
if buf.iter().any(|b| !b.is_ascii_whitespace()) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
ArmoredReadError::TrailingGarbage,
));
}
buf.len()
// Read the next line
self.inner
.read_line(&mut self.line_buf)
.map(|read| self.count_reader_bytes(read))?;

// Parse the line into bytes
if self.parse_armor_line()? {
// This was the last line! Check for trailing garbage.
loop {
let amt = match self.inner.fill_buf()? {
&[] => break,
buf => {
if buf.iter().any(|b| !b.is_ascii_whitespace()) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
ArmoredReadError::TrailingGarbage,
));
}
};
self.inner.consume(amt);
}
break;
buf.len()
}
};
self.inner.consume(amt);
}
Ok(true)
} else {
Ok(false)
}
}
}

impl<R: BufRead> Read for ArmoredReader<R> {
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
let buf_len = buf.len();

while !buf.is_empty() {
match self.fill_buf()? {
[] => break,
next => {
let read = cmp::min(next.len(), buf.len());

if next.len() < buf.len() {
buf[..read].copy_from_slice(next);
} else {
buf.copy_from_slice(&next[..read]);
}

// Output as much as we can of this line
if let Some(read) = self.read_cached_data(buf) {
buf = &mut buf[read..];
self.consume(read);
buf = &mut buf[read..];
}
}
}

Expand Down Expand Up @@ -1061,49 +1099,9 @@ impl<R: BufRead + Seek> Seek for ArmoredReader<R> {
self.inner.read_exact(&mut self.byte_buf[..MIN_ARMOR_LEN])?;
self.detect_armor()?
}
Some(false) => {
break if self.byte_start >= self.byte_end {
// Map the data read onto the underlying stream.
let start = self.start()?;
let pos = match pos {
SeekFrom::Start(offset) => SeekFrom::Start(start + offset),
// Current and End positions don't need to be shifted.
x => x,
};
self.inner.seek(pos)
} else {
// We are still inside the first line.
match pos {
SeekFrom::Start(offset) => self.byte_start = offset as usize,
SeekFrom::Current(offset) => {
let res = (self.byte_start as i64) + offset;
if res >= 0 {
self.byte_start = res as usize;
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"cannot seek before the start",
));
}
}
SeekFrom::End(offset) => {
let res = (self.line_buf.len() as i64) + offset;
if res >= 0 {
self.byte_start = res as usize;
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"cannot seek before the start",
));
}
}
}
Ok(self.byte_start as u64)
};
}
Some(true) => {
Some(armored) => {
// Convert the offset into the target position within the data inside
// the armor.
// the (maybe) armor.
let start = self.start()?;
let target_pos = match pos {
SeekFrom::Start(offset) => offset,
Expand Down Expand Up @@ -1146,6 +1144,15 @@ impl<R: BufRead + Seek> Seek for ArmoredReader<R> {
}
};

if !armored {
// We can seek directly on the inner reader.
self.inner.seek(SeekFrom::Start(start + target_pos))?;
self.byte_start = 0;
self.byte_end = 0;
self.data_read = target_pos as usize;
break Ok(self.data_read as u64);
}

// Jump back to the start of the armor data, and then read and drop
// until we reach the target position. This is very inefficient, but
// as armored files can have arbitrary line endings within the file,
Expand Down
31 changes: 30 additions & 1 deletion age/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use age_core::{format::grease_the_joint, secrecy::SecretString};
use rand::{rngs::OsRng, RngCore};
use std::io::{self, Read, Write};
use std::io::{self, BufRead, Read, Write};

use crate::{
error::{DecryptError, EncryptError},
Expand Down Expand Up @@ -181,6 +181,13 @@ impl<R: Read> Decryptor<R> {
/// Attempts to create a decryptor for an age file.
///
/// Returns an error if the input does not contain a valid age file.
///
/// # Performance
///
/// This constructor will work with any type implementing [`io::Read`], and uses a
/// slower parser and internal buffering to ensure no overreading occurs. Consider
/// using [`Decryptor::new_buffered`] for types implementing `std::io::BufRead`, which
/// includes `&[u8]` slices.
pub fn new(mut input: R) -> Result<Self, DecryptError> {
let header = Header::read(&mut input)?;

Expand All @@ -194,6 +201,28 @@ impl<R: Read> Decryptor<R> {
}
}

impl<R: BufRead> Decryptor<R> {
/// Attempts to create a decryptor for an age file.
///
/// Returns an error if the input does not contain a valid age file.
///
/// # Performance
///
/// This constructor is more performant than [`Decryptor::new`] for types implementing
/// [`io::BufRead`], which includes `&[u8]` slices.
pub fn new_buffered(mut input: R) -> Result<Self, DecryptError> {
let header = Header::read_buffered(&mut input)?;

match header {
Header::V1(v1_header) => {
let nonce = Nonce::read(&mut input)?;
Decryptor::from_v1_header(input, v1_header, nonce)
}
Header::Unknown(_) => Err(DecryptError::UnknownFormat),
}
}
}

#[cfg(feature = "async")]
#[cfg_attr(docsrs, doc(cfg(feature = "async")))]
impl<R: AsyncRead + Unpin> Decryptor<R> {
Expand Down
Loading

0 comments on commit f84af2c

Please sign in to comment.