diff --git a/src/uu/sort/src/chunks.rs b/src/uu/sort/src/chunks.rs index 934a20a665e..af10a008844 100644 --- a/src/uu/sort/src/chunks.rs +++ b/src/uu/sort/src/chunks.rs @@ -259,7 +259,9 @@ fn read_to_buffer( separator: u8, ) -> UResult<(usize, bool)> { let mut read_target = &mut buffer[start_offset..]; - let mut last_file_target_size = read_target.len(); + let mut last_file_empty = true; + let mut newline_search_offset = 0; + let mut found_newline = false; loop { match file.read(read_target) { Ok(0) => { @@ -278,13 +280,17 @@ fn read_to_buffer( continue; } } - let mut sep_iter = memchr_iter(separator, buffer).rev(); - let last_line_end = sep_iter.next(); - if sep_iter.next().is_some() { - // We read enough lines. - let end = last_line_end.unwrap(); - // We want to include the separator here, because it shouldn't be carried over. - return Ok((end + 1, true)); + + let mut sep_iter = + memchr_iter(separator, &buffer[newline_search_offset..buffer.len()]).rev(); + newline_search_offset = buffer.len(); + if let Some(last_line_end) = sep_iter.next() { + if found_newline || sep_iter.next().is_some() { + // We read enough lines. + // We want to include the separator here, because it shouldn't be carried over. + return Ok((last_line_end + 1, true)); + } + found_newline = true; } // We need to read more lines @@ -295,7 +301,7 @@ fn read_to_buffer( } else { // This file has been fully read. let mut leftover_len = read_target.len(); - if last_file_target_size != leftover_len { + if !last_file_empty { // The file was not empty. let read_len = buffer.len() - leftover_len; if buffer[read_len - 1] != separator { @@ -308,7 +314,7 @@ fn read_to_buffer( } if let Some(next_file) = next_files.next() { // There is another file. - last_file_target_size = leftover_len; + last_file_empty = true; *file = next_file?; } else { // This was the last file. @@ -319,6 +325,7 @@ fn read_to_buffer( } Ok(n) => { read_target = &mut read_target[n..]; + last_file_empty = false; } Err(e) if e.kind() == ErrorKind::Interrupted => { // retry diff --git a/src/uu/sort/src/ext_sort.rs b/src/uu/sort/src/ext_sort.rs index 4c003a3f25d..e43ad4b3a38 100644 --- a/src/uu/sort/src/ext_sort.rs +++ b/src/uu/sort/src/ext_sort.rs @@ -36,6 +36,7 @@ use crate::{ }; use crate::{Line, print_sorted}; +// Note: update `test_sort::test_start_buffer` if this size is changed const START_BUFFER_SIZE: usize = 8_000; /// Sort files by using auxiliary files for storing intermediate chunks (if needed), and output the result. diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index e92ddb832c7..3a4cc1a86c5 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1906,4 +1906,25 @@ fn test_color_environment_variables() { } } +#[test] +fn test_start_buffer() { + // Test that a file with the exact same size as the start buffer is handled correctly + const FILE_B: &[u8] = &[b'b'; 8_000]; + const FILE_A: &[u8] = b"aaa"; + + let mut expected = FILE_A.to_vec(); + expected.push(b'\n'); + expected.extend_from_slice(FILE_B); + expected.push(b'\n'); + + let (at, mut ucmd) = at_and_ucmd!(); + + at.write_bytes("b", FILE_B); + at.write_bytes("a", FILE_A); + + ucmd.args(&["b", "a"]) + .succeeds() + .stdout_only_bytes(&expected); +} + /* spell-checker: enable */