From 1ef0fdac936f90b3f57cc6c77f197ba61eb32fd4 Mon Sep 17 00:00:00 2001
From: Jun Kim
Date: Tue, 26 Feb 2019 13:40:21 -0500
Subject: [PATCH 1/2] deleteByDistinctId added
---
classes/AlgoliaEngine.inc.php | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/classes/AlgoliaEngine.inc.php b/classes/AlgoliaEngine.inc.php
index 8392afb..d8ddc81 100644
--- a/classes/AlgoliaEngine.inc.php
+++ b/classes/AlgoliaEngine.inc.php
@@ -126,4 +126,24 @@ public function clear_index()
return $e->getMessage();
}
}
+
+ /**
+ * Deletes an object by distictId
+ * @param $distinctId
+ * @return bool
+ */
+ public function deleteByDistinctId($distinctId)
+ {
+ $index = $this->client->initIndex($this->index);
+
+ try {
+ $index->deleteBy([
+ 'filters' => 'distinctId:' . $distinctId,
+ ]);
+ } catch (AlgoliaException $e) {
+ return $e->getMessage();
+ }
+
+ return true;
+ }
}
\ No newline at end of file
From 56297ecbb0bd1aaa78b4893d55aa4bc7900ae1af Mon Sep 17 00:00:00 2001
From: Jun Kim
Date: Tue, 26 Feb 2019 13:40:45 -0500
Subject: [PATCH 2/2] clean up, bugfixes
---
classes/AlgoliaService.inc.php | 57 ++++++++++++++++------------------
1 file changed, 26 insertions(+), 31 deletions(-)
diff --git a/classes/AlgoliaService.inc.php b/classes/AlgoliaService.inc.php
index 6af5f9c..af4645c 100644
--- a/classes/AlgoliaService.inc.php
+++ b/classes/AlgoliaService.inc.php
@@ -21,12 +21,16 @@
define('ALGOLIA_INDEXINGSTATE_DIRTY', true);
define('ALGOLIA_INDEXINGSTATE_CLEAN', false);
-// // The max. number of articles that can
-// // be indexed in a single batch.
+// The max. number of articles that can
+// be indexed in a single batch.
define('ALGOLIA_INDEXING_MAX_BATCHSIZE', 2000);
+// Number of words to split
+define('ALGOLIA_WORDCOUNT_SPLIT', 250);
+
import('classes.search.ArticleSearch');
import('plugins.generic.algolia.classes.AlgoliaEngine');
+import('lib.pkp.classes.config.Config');
class AlgoliaService {
var $indexer = null;
@@ -176,7 +180,9 @@ function pushChangedArticles($batchSize = ALGOLIA_INDEXING_MAX_BATCHSIZE, $journ
unset($toDelete);
$this->indexer->clear_index();
}else{
- $this->indexer->delete($toDelete);
+ foreach($toDelete as $delete){
+ $this->indexer->deleteByDistinctId($delete['distinctId']);
+ }
}
foreach($toAdd as $add){
@@ -199,7 +205,9 @@ function deleteArticleFromIndex($articleId) {
$toDelete = array();
$toDelete[] = $this->buildAlgoliaObjectDelete($articleId);
- $this->indexer->delete($toDelete);
+ foreach($toDelete as $delete){
+ $this->indexer->deleteByDistinctId($delete['distinctId']);
+ }
}
/**
@@ -328,13 +336,13 @@ function buildAlgoliaObjectDelete($articleOrArticleId){
if(!is_numeric($articleOrArticleId)) {
return array(
"objectAction" => "deleteObject",
- "objectID" => $articleOrArticleId->getId(),
+ "distinctId" => $articleOrArticleId->getId(),
);
}
return array(
"objectAction" => "deleteObject",
- "objectID" => $articleOrArticleId,
+ "distinctId" => $articleOrArticleId,
);
}
@@ -485,27 +493,7 @@ function getGalleyHTML($article){
foreach($galleys as $galley){
if($galley->getFileType() == "text/html"){
$submissionFile = $galley->getFile();
- $contents = file_get_contents($submissionFile->getFilePath());
-
- $contents = preg_replace(
- '/([Ss][Rr][Cc]|[Hh][Rr][Ee][Ff]|[Dd][Aa][Tt][Aa])\s*=\s*"([^"]*' . $pattern . ')"/',
- '\1="' . $fileUrl . '"',
- $contents
- );
-
- // Replacement for Flowplayer
- $contents = preg_replace(
- '/[Uu][Rr][Ll]\s*\:\s*\'(' . $pattern . ')\'/',
- 'url:\'' . $fileUrl . '\'',
- $contents
- );
-
- // Replacement for other players (ested with odeo; yahoo and google player won't work w/ OJS URLs, might work for others)
- $contents = preg_replace(
- '/[Uu][Rr][Ll]=([^"]*' . $pattern . ')/',
- 'url=' . $fileUrl ,
- $contents
- );
+ $contents .= file_get_contents($submissionFile->getFilePath());
}
}
@@ -515,12 +503,19 @@ function getGalleyHTML($article){
function chunkContent($content){
$data = array();
$updated_content = html_entity_decode($content);
- $chunked_content = explode("
", wordwrap($updated_content, ALGOLIA_WORDCOUNT_SPLIT));
- foreach($chunked_content as $chunked){
- if($chunked){
- $data[] = strip_tags($chunked);
+ if($updated_content){
+ $temp_content = str_replace("", "", $updated_content);
+ $chunked_content = preg_split("/]*?(\/?)>/i", $temp_content);
+
+ foreach($chunked_content as $chunked){
+ if($chunked){
+ $tagless_content = strip_tags($chunked);
+ $data[] = trim(wordwrap($tagless_content, ALGOLIA_WORDCOUNT_SPLIT));
+ }
}
+ }else{
+ $data[] = trim(strip_tags($updated_content));
}
return $data;