From 1ef0fdac936f90b3f57cc6c77f197ba61eb32fd4 Mon Sep 17 00:00:00 2001 From: Jun Kim Date: Tue, 26 Feb 2019 13:40:21 -0500 Subject: [PATCH 1/2] deleteByDistinctId added --- classes/AlgoliaEngine.inc.php | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/classes/AlgoliaEngine.inc.php b/classes/AlgoliaEngine.inc.php index 8392afb..d8ddc81 100644 --- a/classes/AlgoliaEngine.inc.php +++ b/classes/AlgoliaEngine.inc.php @@ -126,4 +126,24 @@ public function clear_index() return $e->getMessage(); } } + + /** + * Deletes an object by distictId + * @param $distinctId + * @return bool + */ + public function deleteByDistinctId($distinctId) + { + $index = $this->client->initIndex($this->index); + + try { + $index->deleteBy([ + 'filters' => 'distinctId:' . $distinctId, + ]); + } catch (AlgoliaException $e) { + return $e->getMessage(); + } + + return true; + } } \ No newline at end of file From 56297ecbb0bd1aaa78b4893d55aa4bc7900ae1af Mon Sep 17 00:00:00 2001 From: Jun Kim Date: Tue, 26 Feb 2019 13:40:45 -0500 Subject: [PATCH 2/2] clean up, bugfixes --- classes/AlgoliaService.inc.php | 57 ++++++++++++++++------------------ 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/classes/AlgoliaService.inc.php b/classes/AlgoliaService.inc.php index 6af5f9c..af4645c 100644 --- a/classes/AlgoliaService.inc.php +++ b/classes/AlgoliaService.inc.php @@ -21,12 +21,16 @@ define('ALGOLIA_INDEXINGSTATE_DIRTY', true); define('ALGOLIA_INDEXINGSTATE_CLEAN', false); -// // The max. number of articles that can -// // be indexed in a single batch. +// The max. number of articles that can +// be indexed in a single batch. define('ALGOLIA_INDEXING_MAX_BATCHSIZE', 2000); +// Number of words to split +define('ALGOLIA_WORDCOUNT_SPLIT', 250); + import('classes.search.ArticleSearch'); import('plugins.generic.algolia.classes.AlgoliaEngine'); +import('lib.pkp.classes.config.Config'); class AlgoliaService { var $indexer = null; @@ -176,7 +180,9 @@ function pushChangedArticles($batchSize = ALGOLIA_INDEXING_MAX_BATCHSIZE, $journ unset($toDelete); $this->indexer->clear_index(); }else{ - $this->indexer->delete($toDelete); + foreach($toDelete as $delete){ + $this->indexer->deleteByDistinctId($delete['distinctId']); + } } foreach($toAdd as $add){ @@ -199,7 +205,9 @@ function deleteArticleFromIndex($articleId) { $toDelete = array(); $toDelete[] = $this->buildAlgoliaObjectDelete($articleId); - $this->indexer->delete($toDelete); + foreach($toDelete as $delete){ + $this->indexer->deleteByDistinctId($delete['distinctId']); + } } /** @@ -328,13 +336,13 @@ function buildAlgoliaObjectDelete($articleOrArticleId){ if(!is_numeric($articleOrArticleId)) { return array( "objectAction" => "deleteObject", - "objectID" => $articleOrArticleId->getId(), + "distinctId" => $articleOrArticleId->getId(), ); } return array( "objectAction" => "deleteObject", - "objectID" => $articleOrArticleId, + "distinctId" => $articleOrArticleId, ); } @@ -485,27 +493,7 @@ function getGalleyHTML($article){ foreach($galleys as $galley){ if($galley->getFileType() == "text/html"){ $submissionFile = $galley->getFile(); - $contents = file_get_contents($submissionFile->getFilePath()); - - $contents = preg_replace( - '/([Ss][Rr][Cc]|[Hh][Rr][Ee][Ff]|[Dd][Aa][Tt][Aa])\s*=\s*"([^"]*' . $pattern . ')"/', - '\1="' . $fileUrl . '"', - $contents - ); - - // Replacement for Flowplayer - $contents = preg_replace( - '/[Uu][Rr][Ll]\s*\:\s*\'(' . $pattern . ')\'/', - 'url:\'' . $fileUrl . '\'', - $contents - ); - - // Replacement for other players (ested with odeo; yahoo and google player won't work w/ OJS URLs, might work for others) - $contents = preg_replace( - '/[Uu][Rr][Ll]=([^"]*' . $pattern . ')/', - 'url=' . $fileUrl , - $contents - ); + $contents .= file_get_contents($submissionFile->getFilePath()); } } @@ -515,12 +503,19 @@ function getGalleyHTML($article){ function chunkContent($content){ $data = array(); $updated_content = html_entity_decode($content); - $chunked_content = explode("

", wordwrap($updated_content, ALGOLIA_WORDCOUNT_SPLIT)); - foreach($chunked_content as $chunked){ - if($chunked){ - $data[] = strip_tags($chunked); + if($updated_content){ + $temp_content = str_replace("

", "", $updated_content); + $chunked_content = preg_split("/]*?(\/?)>/i", $temp_content); + + foreach($chunked_content as $chunked){ + if($chunked){ + $tagless_content = strip_tags($chunked); + $data[] = trim(wordwrap($tagless_content, ALGOLIA_WORDCOUNT_SPLIT)); + } } + }else{ + $data[] = trim(strip_tags($updated_content)); } return $data;