From 7834c47d3b27c123af1435244fcd8a1a22ad79d6 Mon Sep 17 00:00:00 2001 From: thessakockelkorn <70509512+thessakockelkorn@users.noreply.github.com> Date: Tue, 4 Jul 2023 12:08:03 +0200 Subject: [PATCH] feat: multilingual search (#42) * fix!: field component (#30) * Fix: combination render (#31) * fix: render form fields with uppercase in type * fix: use field->GetCombinationChildren to (default) render combination fields * fix: lint * fix: fixed created_at and updated_at columns not setting values (#32) * feat!: load siteboss routes from normal route file * feat: language as string, language-specific fields * fix: right query for spellcheck * fix: variable names * fix: passing of language url * fix: pass page id for custom search values * feat: optional sorting of search results * fix: remove debug * fix: change language column into string * feat: use config for sitemap * style: formatting * fix: writing sitemap * fix: replace html tags by space * style: formatting * feat: error when solr hostname cannot be resolved * fix: remove unnecessary code * fix: pass localized url --------- Co-authored-by: Xander Schuurman <44030544+keeama13@users.noreply.github.com> Co-authored-by: M.A. Peene Co-authored-by: Merijn van Ginkel <107470233+nfmerijn@users.noreply.github.com> Co-authored-by: Rene Co-authored-by: Thessa Kockelkorn --- ...49_change_search_table_language_column.php | 27 ++++ .../mail/indexer/file-index-error.blade.php | 2 +- routes/api.php | 2 +- src/Models/Indexes/SolrIndex.php | 71 ++++++++--- src/Models/Indexes/SolrItem.php | 11 +- src/Services/Indexer/AbstractIndexService.php | 4 +- src/Services/Indexer/IndexBuilderService.php | 118 +++++++++--------- src/Services/Indexer/SolrIndexService.php | 5 +- 8 files changed, 156 insertions(+), 84 deletions(-) create mode 100644 database/migrations/2023_06_30_134549_change_search_table_language_column.php diff --git a/database/migrations/2023_06_30_134549_change_search_table_language_column.php b/database/migrations/2023_06_30_134549_change_search_table_language_column.php new file mode 100644 index 00000000..dee3acb2 --- /dev/null +++ b/database/migrations/2023_06_30_134549_change_search_table_language_column.php @@ -0,0 +1,27 @@ +string('language', 64)->change(); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + // + } +}; diff --git a/resources/views/mail/indexer/file-index-error.blade.php b/resources/views/mail/indexer/file-index-error.blade.php index 57beb5ca..f2996563 100644 --- a/resources/views/mail/indexer/file-index-error.blade.php +++ b/resources/views/mail/indexer/file-index-error.blade.php @@ -1,5 +1,5 @@ @component('mail::message') -

Document {{ document }} op server {{ server }} geeft de volgende fout: {{ error }}

+

Document {{ $document }} op server {{ $server }} geeft de volgende fout: {{ $error }}

@endcomponent \ No newline at end of file diff --git a/routes/api.php b/routes/api.php index 0f6e4d93..6c380f83 100644 --- a/routes/api.php +++ b/routes/api.php @@ -10,7 +10,6 @@ use NotFound\Framework\Http\Controllers\SettingsController; use NotFound\Framework\Http\Controllers\Support\SupportController; use NotFound\Framework\Http\Controllers\UserPreferencesController; -use Siteboss\Routes\SiteRoutes; use Spatie\Honeypot\ProtectAgainstSpam; // ContentBlock @@ -24,6 +23,7 @@ | is assigned the "api" middleware group. Enjoy building your API! | */ + Route::prefix(config('siteboss.api_prefix'))->group(function () { // Unauthenticated routes Route::prefix('api')->group(function () { diff --git a/src/Models/Indexes/SolrIndex.php b/src/Models/Indexes/SolrIndex.php index a795d8d4..facd3733 100644 --- a/src/Models/Indexes/SolrIndex.php +++ b/src/Models/Indexes/SolrIndex.php @@ -130,13 +130,12 @@ public function testSolrConnection() return false; } - public function addOrUpdateItem(string $url, string $title, string $contents, string $type, int $lang, int $siteId, array $customValues, int $priority): bool + public function addOrUpdateItem(string $url, string $title, string $contents, string $type, string $lang, int $siteId, array $customValues, int $priority): bool { $curl = $this->solrHandler(); - $doc = [ - 'title' => $title, - 'content' => html_entity_decode(trim(preg_replace('/\s+/', ' ', strip_tags($contents)))), + sprintf('title_%s', $lang) => $title, + sprintf('content_%s', $lang) => html_entity_decode(trim(preg_replace('/\s+/', ' ', preg_replace('#<[^>]+>#', ' ', $contents)))), 'type' => $type, 'url' => $url, 'priority' => $priority, @@ -157,6 +156,11 @@ public function addOrUpdateItem(string $url, string $title, string $contents, st curl_setopt($curl, CURLOPT_POSTFIELDS, json_encode($payload)); $result = curl_exec($curl); + + if (curl_errno($curl) === 6) { + exit('[ERROR] Could not resolve solr host: '.$this->getSolrBaseUrl()); + } + $json = json_decode($result); if ($json && isset($json->responseHeader) && $json->responseHeader->status == 0) { return true; @@ -192,7 +196,7 @@ public function removeItem($url) return false; } - public function addOrUpdateFile(string $url, string $title, string $file, string $type, int $lang, int $siteId, array $customValues, int $priority): string + public function addOrUpdateFile(string $url, string $title, string $file, string $type, string $lang, int $siteId, array $customValues, int $priority): string { // find out of document exists $result = 0; @@ -202,9 +206,10 @@ public function addOrUpdateFile(string $url, string $title, string $file, string $curl = $this->solrHandler(); $endpoint = sprintf( - '%s/update/extract?literal.url=%s&literal.title=%s&literal.type=%s&literal.site=%s&literal.language=%d&commit=true', + '%s/update/extract?literal.url=%s&literal.title_%s=%s&literal.type=%s&literal.site=%s&literal.language=%d&commit=true', $this->getSolrBaseUrl(), urlencode($url), + $lang, urlencode($title), $type, $siteId, @@ -268,22 +273,25 @@ private function mailQueryError($query, $result) } } - public function selectItems($query, $filter = null, $start = null, $rows = null, $extraColumns = [], $highlightLength = 50) + public function selectItems($query, $lang = 'nl', $filter = null, $start = null, $rows = null, $extraColumns = [], $highlightLength = 50, $sortField = null, $sortDirection = 'desc') { $curl = $this->solrHandler(); $url = sprintf( - '%s/select?q=title:%s%%20content:%s&spellcheck.q=%s&wt=%s&hl=%s&q.op=%s&hl.fl=%s&fl=%s&spellcheck=true&hl.fragsize=%d&hl.maxAnalyzedChars=%d', + '%s/select?q=title_%s:%s%%20content_%s:%s&spellcheck.q=%s&wt=%s&hl=%s&q.op=%s&hl.fl=%s&fl=%s&spellcheck=true&hl.fragsize=%d&hl.maxAnalyzedChars=%d&spellcheck.dictionary=spellcheck_%s', $this->getSolrBaseUrl(), + $lang, + rawurlencode($query), // make sure + between search terms is preserved + $lang, rawurlencode($query), // make sure + between search terms is preserved rawurlencode($query), // make sure + between search terms is preserved - rawurlencode($query), // make sure + between search terms is preserved rawurlencode($query), // make sure + between search terms is preserved $this->wt, $this->hl, $this->selectOperator, - $this->hlfl, + sprintf('%s_%s', $this->hlfl, $lang), urlencode($this->fl), $this->hlfragsize, $this->hlmaxAnalyzedChars, + $lang ); if ($filter) { $url .= '&fq='.$filter; @@ -295,12 +303,11 @@ public function selectItems($query, $filter = null, $start = null, $rows = null, if ($rows && is_int($rows)) { $url .= '&rows='.$rows; } - if (count($extraColumns) > 0) { } - if ($this->sort) { - $url .= '&sort='.urlencode($this->sort); + if ($sortField) { + $url .= '&sort='.urlencode($sortField.' '.$sortDirection); } curl_setopt($curl, CURLOPT_URL, $url); @@ -359,7 +366,6 @@ public function buildSuggester() $url = sprintf('%s&suggest.build=true', $this->suggestUrl()); curl_setopt($curl, CURLOPT_URL, $url); - $result = curl_exec($curl); $json = json_decode($result); $searchResults = new SolrItem($json, null); @@ -367,10 +373,45 @@ public function buildSuggester() return $searchResults; } + private function getConfig() + { + $curl = $this->solrHandler(); + $url = sprintf('%s/config/searchComponent?componentName=suggest', $this->getSolrBaseUrl()); + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_POST, false); + + $result = curl_exec($curl); + $json = json_decode($result); + + return $json; + } + + private function allSuggesters() + { + $json = $this->getConfig(); + $suggesters = []; + if ( + $json && isset($json->responseHeader) + && $json->responseHeader->status == 0 + && isset($json->config->searchComponent->suggest->suggester) + ) { + $list = $json->config->searchComponent->suggest->suggester; + foreach ($list as $s) { + $suggesters[] = $s->name; + } + } + + return $suggesters; + } + private function explodeSuggesters(): string { $suggesterString = ''; - foreach ($this->suggester as $s) { + $suggesters = $this->suggester; + if (count($suggesters) == 0) { + $suggesters = $this->allSuggesters(); + } + foreach ($suggesters as $s) { $suggesterString .= sprintf('&suggest.dictionary=%s', $s); } diff --git a/src/Models/Indexes/SolrItem.php b/src/Models/Indexes/SolrItem.php index ad627228..7f14e768 100644 --- a/src/Models/Indexes/SolrItem.php +++ b/src/Models/Indexes/SolrItem.php @@ -41,7 +41,7 @@ public function __construct($solr, $q, $collate = false, private int $highlightL $this->fl = explode(' ', $fl); $this->results = isset($solr->response->docs) ? $solr->response->docs : null; $this->highlights = isset($solr->highlighting) ? $solr->highlighting : null; - $this->spellcheck = $solr->spellcheck ?? null; + $this->spellcheck = isset($solr->spellcheck) ? $solr->spellcheck : null; $this->suggest = isset($solr->suggest) ? $solr->suggest : null; $this->number = isset($solr->response->numFound) ? $solr->response->numFound : 0; @@ -84,7 +84,8 @@ public function resultList() if ($column == 'url') { $resultArray[$column] = $this->parseUrl($result->{$column}); } else { - $resultArray[$column] = isset($result->{$column}[0]) ? $result->{$column}[0] : ''; + $columnName = preg_replace('/(_[a-zA-Z]{2}$)/', '', $column); + $resultArray[$columnName] = isset($result->{$column}) ? $result->{$column} : ''; } } @@ -186,12 +187,12 @@ public function spellcheckList() foreach ($this->spellcheck->suggestions as $suggestion) { if (isset($suggestion->startOffset)) { $suggest = substr($query, 0, $suggestion->startOffset).''.$suggestion->suggestion[0].''.substr($query, $suggestion->endOffset); - $suggest = preg_replace('/^([a-zA-Z])+:/', '', $suggest); // remove search field if necessary + $suggestTerm = preg_replace('/^([a-zA-Z])+(_[a-zA-Z]{2})?:/', '', $suggest); // remove search field if necessary $suggest_url = substr($query, 0, $suggestion->startOffset).$suggestion->suggestion[0].substr($query, $suggestion->endOffset); - $suggest_url = preg_replace('/^([a-zA-Z])+:/', '', $suggest_url); // remove search field if necessary + $suggest_url = preg_replace('/^([a-zA-Z])+(_[a-zA-Z]{2})?:/', '', $suggest_url); // remove search field if necessary - $items[] = (object) ['link' => '?q='.rawurlencode(urldecode($suggest_url)), 'text' => urldecode($suggest)]; + $items[] = (object) ['link' => '?q='.rawurlencode(urldecode($suggest_url)), 'text' => urldecode($suggestTerm)]; } } } diff --git a/src/Services/Indexer/AbstractIndexService.php b/src/Services/Indexer/AbstractIndexService.php index c142af30..0add32a8 100644 --- a/src/Services/Indexer/AbstractIndexService.php +++ b/src/Services/Indexer/AbstractIndexService.php @@ -18,7 +18,7 @@ abstract public function finishUpdate(): object; abstract public function urlNeedsUpdate(string $url, $updated): bool; - abstract public function upsertUrl(string $url, string $title, string $contents, string $type, int $lang, array $customValues = []): object; + abstract public function upsertUrl(string $url, string $title, string $contents, string $type, string $lang, array $customValues = []): object; - abstract public function upsertFile(string $url, string $title, string $file, string $type, int $lang, array $customValues): object; + abstract public function upsertFile(string $url, string $title, string $file, string $type, string $lang, array $customValues): object; } diff --git a/src/Services/Indexer/IndexBuilderService.php b/src/Services/Indexer/IndexBuilderService.php index c77fa385..5e70796f 100644 --- a/src/Services/Indexer/IndexBuilderService.php +++ b/src/Services/Indexer/IndexBuilderService.php @@ -13,9 +13,7 @@ class IndexBuilderService private $locales; - private $defaultLocale; - - private $domainName; + private $domain; private $sitemapFile; @@ -26,9 +24,7 @@ public function __construct(string $serverType, $debug = false) $this->debug = $debug; $this->locales = Lang::all(); - $locale = env('SB_LOCALES_DEFAULT', 'nl'); - $this->defaultLocale = Lang::where('url', $locale)->get(); - $this->domainName = env('APP_NAME'); + $this->domain = rtrim(env('APP_URL', ''), '/'); switch ($serverType) { case 'solr': $this->searchServer = new SolrIndexService($this->debug); @@ -50,7 +46,7 @@ public function run() foreach ($sites as $site) { $siteName = $site->name; - $sitemapFileName = env('APP_SITEMAP'); + $sitemapFileName = config('solr.sitemap'); if ($sitemapFileName) { $this->createFolderIfNotExists($sitemapFileName); $this->sitemapFile = fopen($sitemapFileName, 'w') or exit('Could not open sitemap file for writing'); @@ -104,15 +100,10 @@ private function indexChildPages($parentId) $menu = Menu::whereId($page->id)->firstOrFail(); - if ($this->searchServer->urlNeedsUpdate($menu->getPath(), strtotime($menu->updated_at))) { - $this->writeDebug(': update needed: '); - - foreach ($this->locales as $lang) { - $this->updatePage($menu, $lang); - } - } else { - $this->writeDebug(": Does not need updating\n"); + foreach ($this->locales as $lang) { + $this->updatePage($menu, $lang); } + // index subitems for page foreach ($this->locales as $lang) { $this->updateSubPages($menu, $lang); @@ -124,59 +115,63 @@ private function indexChildPages($parentId) private function updatePage($menu, $lang) { - $success = true; app()->setLocale($lang->url); - - if ($this->sitemapFile) { - $sitemap = ''; - } - $searchText = ''; - $pageService = new PageService($menu, $lang); - $title = $menu->getTitle($lang); if (count($this->locales) == 1) { $url = $menu->getPath(); } else { $url = $menu->getLocalizedPath(); } - $searchText = $pageService->getContentForIndexer(); - // continue with customValues - $customValues = []; + if ($this->searchServer->urlNeedsUpdate($url, strtotime($menu->updated_at))) { + $this->writeDebug(': update needed: '); - $class = $menu->template->filename ?? ''; - $className = 'App\Http\Controllers\Page\\'.$class.'Controller'; - $c = null; - $priority = 1; - if (class_exists($className)) { - $c = new $className(); - if (method_exists($className, 'customSearchValues')) { - $customValues = $c->customSearchValues(); - } - if (method_exists($className, 'searchPriority')) { - $priority = $c->searchPriority(); - } - } + $searchText = ''; + $pageService = new PageService($menu, $lang); + $title = $menu->getTitle($lang); - $searchText = rtrim($searchText, ', '); - if (! empty($title) && ! empty($searchText)) { - $result = $this->searchServer->upsertUrl($url, $title, $searchText, 'page', $lang->id, $customValues, $priority); + $searchText = $pageService->getContentForIndexer(); - if ($result->errorCode == 0) { - $this->writeDebug(" success\n"); - } else { - $this->writeDebug(" FAILED\n"); + // continue with customValues + $customValues = []; + + $class = $menu->template->filename ?? ''; + $className = 'App\Http\Controllers\Page\\'.$class.'Controller'; + $c = null; + $priority = 1; + if (class_exists($className)) { + $c = new $className(); + if (method_exists($className, 'customSearchValues')) { + $customValues = $c->customSearchValues($menu->id); + } + if (method_exists($className, 'searchPriority')) { + $priority = $c->searchPriority(); + } } - if ($this->sitemapFile) { - // update sitemap - $sitemap .= sprintf( - "%s%s\r\n", - $this->domainName, - $url - ); + $searchText = rtrim($searchText, ', '); + if (! empty($title) && ! empty($searchText)) { + $result = $this->searchServer->upsertUrl($url, $title, $searchText, 'page', $lang->url, $customValues, $priority); + + if ($result->errorCode == 0) { + $this->writeDebug(" success\n"); + } else { + $this->writeDebug(" FAILED\n"); + } + } else { + $this->writeDebug(" empty page or title\n"); } } else { - $this->writeDebug(" empty page or title\n"); + $this->writeDebug(": Does not need updating\n"); + } + + if ($this->sitemapFile) { + // update sitemap + $sitemap = sprintf( + "%s%s\r\n", + $this->domain, + $url + ); + fwrite($this->sitemapFile, $sitemap); } } @@ -207,15 +202,15 @@ private function updateSubitems($class, $lang) $success = true; if ($searchItem['isFile']) { - $success = $this->searchServer->upsertFile($url, $searchItem['title'], $searchItem['file'], $searchItem['type'], $lang->id, $searchItem['customValues'], $searchItem['priority']); + $success = $this->searchServer->upsertFile($url, $searchItem['title'], $searchItem['file'], $searchItem['type'], $lang->url, $searchItem['customValues'], $searchItem['priority']); } else { // subitem is table row - $success = $this->searchServer->upsertUrl($url, $searchItem['title'], $searchItem['content'], $searchItem['type'], $lang->id, $searchItem['customValues'], $searchItem['priority']); + $success = $this->searchServer->upsertUrl($url, $searchItem['title'], $searchItem['content'], $searchItem['type'], $lang->url, $searchItem['customValues'], $searchItem['priority']); } if ($this->sitemapFile && $searchItem['sitemap']) { $sitemap = sprintf( "%s%s\r\n", - $this->domainName, + $this->domain, $url ); } @@ -228,6 +223,15 @@ private function updateSubitems($class, $lang) } else { $this->writeDebug(": Does not need updating\n"); } + + if ($this->sitemapFile) { + $sitemap = sprintf( + "%s%s\r\n", + $this->domain, + $url + ); + fwrite($this->sitemapFile, $sitemap); + } } } } diff --git a/src/Services/Indexer/SolrIndexService.php b/src/Services/Indexer/SolrIndexService.php index 92aa1dc9..22c59b8b 100644 --- a/src/Services/Indexer/SolrIndexService.php +++ b/src/Services/Indexer/SolrIndexService.php @@ -32,7 +32,7 @@ public function urlNeedsUpdate(string $url, $updated): bool return true; } - public function upsertUrl(string $url, string $title, string $contents, string $type, int $lang, array $customValues = [], $priority = 1): object + public function upsertUrl(string $url, string $title, string $contents, string $type, string $lang, array $customValues = [], $priority = 1): object { $result = $this->solrIndex->addOrUpdateItem($this->siteUrl($url), $title, $contents, $type, $lang, $this->siteId, $customValues, $priority); $return = $this->returnvalue(); @@ -51,7 +51,7 @@ public function upsertUrl(string $url, string $title, string $contents, string $ return $return; } - public function upsertFile(string $url, string $title, string $file, string $type, int $lang, array $customValues = [], $priority = 1): object + public function upsertFile(string $url, string $title, string $file, string $type, string $lang, array $customValues = [], $priority = 1): object { $result = $this->solrIndex->addOrUpdateFile($this->siteUrl($url), $title, $file, $type, $lang, $this->siteId, $customValues, $priority); @@ -107,7 +107,6 @@ public function finishUpdate(): object { $return = $this->removeAllPending(); - $this->solrIndex->suggester = ['fulltextsuggester', 'titlesuggester']; $build = $this->solrIndex->buildSuggester(); if ($build->error) {