From aa2382e410f4e54c38d5505228510f64edb5d4f8 Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 5 Oct 2024 16:56:04 +0200 Subject: [PATCH] first branch cache check --- crates/polars-io/src/csv/read/parser.rs | 13 +++++++------ crates/polars-io/src/csv/read/splitfields.rs | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/polars-io/src/csv/read/parser.rs b/crates/polars-io/src/csv/read/parser.rs index 789f8bfd254f..2b11013be951 100644 --- a/crates/polars-io/src/csv/read/parser.rs +++ b/crates/polars-io/src/csv/read/parser.rs @@ -438,12 +438,7 @@ impl<'a> Iterator for SplitLines<'a> { #[inline] #[cfg(feature = "simd")] fn next(&mut self) -> Option<&'a [u8]> { - if self.v.is_empty() { - return None; - } - self.total_index = 0; - let mut not_in_field_previous_iter = true; - + // First check cached value if self.previous_valid_eols != 0 { let pos = self.previous_valid_eols.trailing_zeros() as usize; self.previous_valid_eols >>= (pos + 1) as u64; @@ -458,6 +453,12 @@ impl<'a> Iterator for SplitLines<'a> { return ret; } } + if self.v.is_empty() { + return None; + } + + self.total_index = 0; + let mut not_in_field_previous_iter = true; loop { let bytes = unsafe { self.v.get_unchecked_release(self.total_index..) }; diff --git a/crates/polars-io/src/csv/read/splitfields.rs b/crates/polars-io/src/csv/read/splitfields.rs index 490a0a27ebf0..68714c4fddd7 100644 --- a/crates/polars-io/src/csv/read/splitfields.rs +++ b/crates/polars-io/src/csv/read/splitfields.rs @@ -214,12 +214,7 @@ mod inner { #[inline] fn next(&mut self) -> Option<(&'a [u8], bool)> { - if self.finished { - return None; - } - if self.v.is_empty() { - return self.finish(false); - } + // First check cached value as this is hot. if self.previous_valid_ends != 0 { let pos = self.previous_valid_ends.trailing_zeros() as usize; self.previous_valid_ends >>= (pos + 1) as u64; @@ -241,6 +236,12 @@ mod inner { return ret; } } + if self.finished { + return None; + } + if self.v.is_empty() { + return self.finish(false); + } let mut needs_escaping = false; // There can be strings with separators: