diff --git a/src/S2/Rose/Entity/Metadata/SentenceCollection.php b/src/S2/Rose/Entity/Metadata/SentenceCollection.php
index 8554275..40d8c19 100644
--- a/src/S2/Rose/Entity/Metadata/SentenceCollection.php
+++ b/src/S2/Rose/Entity/Metadata/SentenceCollection.php
@@ -1,6 +1,6 @@
foundWords);
+ $wordPattern = implode('|', array_map(static fn(string $word) => preg_quote($word, '#'), $this->foundWords));
$wordPatternWithFormatting = '(?:\\\\[' . StringHelper::FORMATTING_SYMBOLS . '])*(?:' . $wordPattern . ')(?:\\\\[' . strtoupper(StringHelper::FORMATTING_SYMBOLS) . '])*';
$replacedLine = preg_replace_callback(
'#(?:\\s|^|\p{P})\\K' . $wordPatternWithFormatting . '(?:\\s+(?:' . $wordPatternWithFormatting . '))*\\b#su',
diff --git a/src/S2/Rose/Indexer.php b/src/S2/Rose/Indexer.php
index 9758b34..374ae64 100644
--- a/src/S2/Rose/Indexer.php
+++ b/src/S2/Rose/Indexer.php
@@ -2,7 +2,7 @@
/**
* Creates search index
*
- * @copyright 2010-2023 Roman Parpalak
+ * @copyright 2010-2024 Roman Parpalak
* @license MIT
*/
@@ -207,8 +207,8 @@ private function getStemsWithComponents(array $words): array
// If the word contains punctuation marks like hyphen, add a variant without it
if (false !== strpbrk($stemmedWord, '-.,')) {
- foreach (preg_split('#[\-.,]#', $word) as $k => $subWord) {
- if ($subWord) {
+ foreach (preg_split('#[\p{L}\d]\K[\-.,]+|[\-.,]+(?=[\p{L}\d])#u', $word) as $k => $subWord) {
+ if ($subWord !== '' && $subWord !== $word) {
$componentsOfCompoundWords[(string)($i + 0.001 * ($k + 1))] = $this->stemmer->stemWord($subWord, false);
}
}
diff --git a/src/S2/Rose/Snippet/SnippetBuilder.php b/src/S2/Rose/Snippet/SnippetBuilder.php
index c6b5501..5a7fbd7 100644
--- a/src/S2/Rose/Snippet/SnippetBuilder.php
+++ b/src/S2/Rose/Snippet/SnippetBuilder.php
@@ -1,6 +1,6 @@
stemmer->irregularWordsFromStems($stems));
$regexRules = $this->stemmer->getRegexTransformationRules();
+ $regexRules['#\\.#'] = '\\.'; // escaping dot in the following preg_match_all() call
$stemsForRegex = array_map(static fn(string $stem): string => preg_replace(
array_keys($regexRules),
array_values($regexRules),
diff --git a/tests/unit/Rose/Entity/QueryTest.php b/tests/unit/Rose/Entity/QueryTest.php
index 831a17a..4b7fc8c 100644
--- a/tests/unit/Rose/Entity/QueryTest.php
+++ b/tests/unit/Rose/Entity/QueryTest.php
@@ -1,6 +1,6 @@
assertEquals([1, 2], (new Query('1|||2'))->valueToArray());
$this->assertEquals([1, 2], (new Query('1\\\\\\2'))->valueToArray());
$this->assertEquals(['a', 'b'], (new Query('a/b'))->valueToArray());
$this->assertEquals(['a', 'b'], (new Query(' a b '))->valueToArray());
+ $this->assertEquals(['..'], (new Query('..'))->valueToArray());
+ $this->assertEquals(['...'], (new Query('...'))->valueToArray());
+ $this->assertEquals(['a..b'], (new Query('a..b'))->valueToArray());
}
}
diff --git a/tests/unit/Rose/IntegrationTest.php b/tests/unit/Rose/IntegrationTest.php
index 16fc955..8f97d70 100644
--- a/tests/unit/Rose/IntegrationTest.php
+++ b/tests/unit/Rose/IntegrationTest.php
@@ -2,7 +2,7 @@
/** @noinspection PhpComposerExtensionStubsInspection */
/**
- * @copyright 2016-2023 Roman Parpalak
+ * @copyright 2016-2024 Roman Parpalak
* @license MIT
*/
@@ -90,7 +90,7 @@ public function testFeatures(
$this->assertEquals([
'20:id_2' => 2.5953804134970615,
- '20:id_1' => 0.12828323517212156,
+ '20:id_1' => 0.12932092968696407,
'10:id_1' => 0.08569157515491249,
], $resultSet2->getSortedRelevanceByExternalId());
@@ -112,7 +112,7 @@ public function testFeatures(
$this->assertEquals([
'20:id_2' => 2.5953804134970615,
- '20:id_1' => 0.12828323517212156
+ '20:id_1' => 0.12932092968696407
], $resultSet2->getSortedRelevanceByExternalId());
$this->assertEquals(3, $resultSet2->getTotalCount());
@@ -265,6 +265,10 @@ public function testFeatures(
$this->assertEquals('25', $img1->getHeight());
$this->assertEquals('Alternative text', $img1->getAlt());
+ // Empty result
+ $this->assertCount(0, $finder->find(new Query('..'))->getItems());
+ $this->assertCount(0, $finder->find(new Query('...'))->getItems());
+
if ($readStorage instanceof PdoStorage && strpos($GLOBALS['s2_rose_test_db']['dsn'], 'sqlite') !== 0) {
$indexer->index(new Indexable('dummy', 'Dummy new', ''));
$similarItems = $readStorage->getSimilar(new ExternalId('id_2', 20), false);
@@ -418,7 +422,7 @@ public function indexableProvider()
->setDate(new \DateTime('2016-08-24 00:00:00'))
->setUrl('url1')
,
- (new Indexable('id_1', 'Another instance', 'The same id but another instance. Word "content" is present here. Twice: content.', 20))
+ (new Indexable('id_1', 'Another instance', 'The same id but another instance. Word "content" is present here. Twice: content. Delimiters must be $...$ or \[...\]', 20))
,
(new Indexable('id_4', 'Another instance', 'Nothing is here but images: ', 20))
,