Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge #592
Browse files Browse the repository at this point in the history
592: Fix(smart-crop): Ensure that matches are all highlighted r=loiclec a=ManyTheFish

In the case of only 1 query word was matching in an attribute,
and this same word was matching several times in the crop window of the same attribute,
then only the first match was highlighted.
Now we ensure that the computed "best matches interval" contains all the words that match in the same crop window.

related to meilisearch/meilisearch#2627



Co-authored-by: ManyTheFish <[email protected]>
  • Loading branch information
bors[bot] and ManyTheFish authored Jul 21, 2022
2 parents 83ad1aa + eeae3f5 commit 132558b
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 16 deletions.
2 changes: 1 addition & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "benchmarks"
version = "0.31.1"
version = "0.31.2"
edition = "2018"
publish = false

Expand Down
2 changes: 1 addition & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cli"
version = "0.31.1"
version = "0.31.2"
edition = "2018"
description = "A CLI to interact with a milli index"
publish = false
Expand Down
2 changes: 1 addition & 1 deletion filter-parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "filter-parser"
version = "0.31.1"
version = "0.31.2"
edition = "2021"
description = "The parser for the Meilisearch filter syntax"
publish = false
Expand Down
2 changes: 1 addition & 1 deletion flatten-serde-json/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "flatten-serde-json"
version = "0.31.1"
version = "0.31.2"
edition = "2021"
description = "Flatten serde-json objects like elastic search"
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion helpers/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "helpers"
version = "0.31.1"
version = "0.31.2"
authors = ["Clément Renault <[email protected]>"]
edition = "2018"
description = "A small tool to do operations on the database"
Expand Down
2 changes: 1 addition & 1 deletion http-ui/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "http-ui"
description = "The HTTP user interface of the milli search engine"
version = "0.31.1"
version = "0.31.2"
authors = ["Clément Renault <[email protected]>"]
edition = "2018"
publish = false
Expand Down
2 changes: 1 addition & 1 deletion infos/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "infos"
version = "0.31.1"
version = "0.31.2"
authors = ["Clément Renault <[email protected]>"]
edition = "2018"
publish = false
Expand Down
2 changes: 1 addition & 1 deletion json-depth-checker/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "json-depth-checker"
version = "0.31.1"
version = "0.31.2"
edition = "2021"
description = "A library that indicates if a JSON must be flattened"
publish = false
Expand Down
2 changes: 1 addition & 1 deletion milli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "milli"
version = "0.31.1"
version = "0.31.2"
authors = ["Kerollmops <[email protected]>"]
edition = "2018"

Expand Down
34 changes: 27 additions & 7 deletions milli/src/search/matches/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,11 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
if matches.len() > 1 {
// positions of the first and the last match of the best matches interval in `matches`.
let mut best_interval = (0, 0);
let mut best_interval_score = self.match_interval_score(&matches[0..=0]);
let mut best_interval_score = None;
// current interval positions.
let mut interval_first = 0;
let mut interval_last = 0;
for (index, next_match) in matches.iter().enumerate().skip(1) {
for (index, next_match) in matches.iter().enumerate() {
// if next match would make interval gross more than crop_size,
// we compare the current interval with the best one,
// then we increase `interval_first` until next match can be added.
Expand All @@ -372,10 +372,15 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
self.match_interval_score(&matches[interval_first..=interval_last]);

// keep interval if it's the best
if interval_score > best_interval_score {
best_interval = (interval_first, interval_last);
best_interval_score = interval_score;
}
best_interval_score = match best_interval_score.take() {
Some(best_interval_score) if interval_score <= best_interval_score => {
Some(best_interval_score)
}
_ => {
best_interval = (interval_first, interval_last);
Some(interval_score)
}
};

// advance start of the interval while interval is longer than crop_size.
while next_match.word_position - matches[interval_first].word_position
Expand All @@ -390,7 +395,9 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
// compute the last interval score and compare it to the best one.
let interval_score =
self.match_interval_score(&matches[interval_first..=interval_last]);
if interval_score > best_interval_score {
if best_interval_score
.map_or(true, |best_interval_score| interval_score > best_interval_score)
{
best_interval = (interval_first, interval_last);
}

Expand Down Expand Up @@ -743,6 +750,19 @@ mod tests {
&matcher.format(format_options),
"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
);

// testing https://github.com/meilisearch/meilisearch/issues/2627
let matching_words = vec![(vec![MatchingWord::new("test".to_string(), 0, true)], vec![0])];
let matching_words = MatchingWords::new(matching_words);
let builder = MatcherBuilder::from_matching_words(matching_words);

let text = "Lorem ipsum dolor test sit amet, consetetur sadipscing elitr, sed test diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat.";
let mut matcher = builder.build(text);
let format_options = FormatOptions { highlight: true, crop: Some(20) };
assert_eq!(
&matcher.format(format_options),
"Lorem ipsum dolor <em>test</em> sit amet, consetetur sadipscing elitr, sed <em>test</em> diam nonumy eirmod tempor invidunt ut labore et dolore…"
);
}

#[test]
Expand Down

0 comments on commit 132558b

Please sign in to comment.