From e65ed1f628f139ce41421390d335fbcebe4d6fdb Mon Sep 17 00:00:00 2001 From: bendn Date: Sat, 31 May 2025 16:47:03 +0700 Subject: [PATCH 1/4] faster charsearcher --- library/core/src/str/pattern.rs | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index bcbbb11c83b2f..a9e5f55e90ebc 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -429,8 +429,24 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { SearchStep::Done } } - #[inline] + #[inline(always)] fn next_match(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + let find = |haystack: &[u8]| { + if haystack.len() < 32 { + haystack.iter().position(|&x| x == self.utf8_encoded[0]) + } else { + memchr::memchr(self.utf8_encoded[0], haystack) + } + }; + return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { + Some(x) => { + self.finger += x + 1; + Some((self.finger - 1, self.finger)) + } + None => None, + }; + } loop { // get the haystack after the last character found let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?; @@ -498,6 +514,22 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { } #[inline] fn next_match_back(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + let find = |haystack: &[u8]| { + if haystack.len() < 32 { + haystack.iter().rposition(|&x| x == self.utf8_encoded[0]) + } else { + memchr::memrchr(self.utf8_encoded[0], haystack) + } + }; + return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { + Some(x) => { + self.finger_back = self.finger + x; + Some((self.finger_back, self.finger_back + 1)) + } + None => None, + }; + } let haystack = self.haystack.as_bytes(); loop { // get the haystack up to but not including the last character searched From 3d3387709e62314fda091bf3b86838703b114765 Mon Sep 17 00:00:00 2001 From: bendn Date: Wed, 4 Jun 2025 00:29:56 +0700 Subject: [PATCH 2/4] dont ever position() --- library/core/src/str/pattern.rs | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index a9e5f55e90ebc..9bea4ca523b2a 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -432,13 +432,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { #[inline(always)] fn next_match(&mut self) -> Option<(usize, usize)> { if self.utf8_size == 1 { - let find = |haystack: &[u8]| { - if haystack.len() < 32 { - haystack.iter().position(|&x| x == self.utf8_encoded[0]) - } else { - memchr::memchr(self.utf8_encoded[0], haystack) - } - }; + let find = |haystack: &[u8]| memchr::memchr(self.utf8_encoded[0], haystack); return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { Some(x) => { self.finger += x + 1; @@ -515,13 +509,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { #[inline] fn next_match_back(&mut self) -> Option<(usize, usize)> { if self.utf8_size == 1 { - let find = |haystack: &[u8]| { - if haystack.len() < 32 { - haystack.iter().rposition(|&x| x == self.utf8_encoded[0]) - } else { - memchr::memrchr(self.utf8_encoded[0], haystack) - } - }; + let find = |haystack: &[u8]| memchr::memrchr(self.utf8_encoded[0], haystack); return match find(self.haystack.as_bytes().get(self.finger..self.finger_back)?) { Some(x) => { self.finger_back = self.finger + x; From 99e141c79b0defd9cae8e8fc44a3650609a28083 Mon Sep 17 00:00:00 2001 From: bendn Date: Wed, 4 Jun 2025 13:37:28 +0700 Subject: [PATCH 3/4] add more inline --- library/core/src/slice/memchr.rs | 1 + library/core/src/str/iter.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs index 1e1053583a617..edd67f58b7004 100644 --- a/library/core/src/slice/memchr.rs +++ b/library/core/src/slice/memchr.rs @@ -48,6 +48,7 @@ const fn memchr_naive(x: u8, text: &[u8]) -> Option { } #[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior +#[inline] const fn memchr_aligned(x: u8, text: &[u8]) -> Option { // The runtime version behaves the same as the compiletime version, it's // just more optimized. diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 425c4eaee28ee..49c581f352eb3 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> { None } - #[inline] + #[inline(always)] fn next(&mut self) -> Option<&'a str> { if self.finished { return None; From 5d60566b23c9ee3dd1496a0136b033f5c45ed415 Mon Sep 17 00:00:00 2001 From: bendn Date: Thu, 5 Jun 2025 02:48:24 +0700 Subject: [PATCH 4/4] Revert "add more inline" This reverts commit 99e141c79b0defd9cae8e8fc44a3650609a28083. --- library/core/src/slice/memchr.rs | 1 - library/core/src/str/iter.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs index edd67f58b7004..1e1053583a617 100644 --- a/library/core/src/slice/memchr.rs +++ b/library/core/src/slice/memchr.rs @@ -48,7 +48,6 @@ const fn memchr_naive(x: u8, text: &[u8]) -> Option { } #[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior -#[inline] const fn memchr_aligned(x: u8, text: &[u8]) -> Option { // The runtime version behaves the same as the compiletime version, it's // just more optimized. diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 49c581f352eb3..425c4eaee28ee 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> { None } - #[inline(always)] + #[inline] fn next(&mut self) -> Option<&'a str> { if self.finished { return None;