Skip to content

Commit

Permalink
Allow batch_size per class and index definition
Browse files Browse the repository at this point in the history
  • Loading branch information
chrispenny committed Dec 15, 2024
1 parent fc64778 commit 987de78
Show file tree
Hide file tree
Showing 15 changed files with 481 additions and 54 deletions.
8 changes: 6 additions & 2 deletions docs/en/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SilverStripe\Forager\Service\IndexConfiguration:
myindex:
includeClasses:
SilverStripe\CMS\Model\SiteTree:
batch_size: 50
fields:
title:
property: Title
Expand All @@ -36,6 +37,9 @@ should have the `SearchServiceExtension` applied, however. This is discussed fur
* `SilverStripe\CMS\Model\SiteTree`: This class already has the necessary extension applied
to it as a default configuration from the module

* `batch_size`: This batch size definition specifically applies when we are queueing jobs for this class and index. If
no class specific batch size is defined, then the default batch size will be used

* `fields`: The fields you want to index. This is a map of the _search field name_ as the key
(how you want it to be listed in your search index) to either a boolean, or another map

Expand Down Expand Up @@ -119,8 +123,8 @@ Use cases:
* Some services include rate limits. You could use this feature to effectively "slow down" your processing of records

* Some classes can be quite process intensive (EG: Files that require you to load them into memory in order to send
them to your service provider). This "cooldown", plus `batch_sizes` should provide you with some dials to turn to try
and reduce the impact that reindexing has on your application
them to your service provider). This "cooldown", plus `batch_sizes` at a class level, should provide you with some dials
to turn to try and reduce the impact that reindexing has on your application

## Advanced configuration

Expand Down
40 changes: 40 additions & 0 deletions src/Jobs/BatchJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace SilverStripe\Forager\Jobs;

use SilverStripe\Core\Config\Configurable;
use SilverStripe\Forager\Service\IndexConfiguration;
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
use Symbiote\QueuedJobs\Services\QueuedJob;

Expand Down Expand Up @@ -30,4 +31,43 @@ protected function cooldown(): void
usleep($cooldown);
}

protected function getIndexConfigurationBatchSize(?array $onlyClasses = null, ?array $onlyIndexes = null): int
{
$indexConfiguration = IndexConfiguration::singleton();

// If no specific classes or indexes have been requested, then we should use the lowest defined batch size
// across all of our configuration
if (!$onlyClasses && !$onlyIndexes) {
return $indexConfiguration->getLowestBatchSize();
}

if ($onlyIndexes) {
// If we've requested to only reindex a specific index, then set this limitation on our IndexConfiguration
$indexConfiguration->setOnlyIndexes($onlyIndexes);
}

if (!$onlyClasses) {
// We haven't limited this request to any specific classes, so just get the lowest batch size that we can
// find within our index configuration (it will be affected by the above $onlyIndexes filter)
return $indexConfiguration->getLowestBatchSize();
}

// There is a request to only include certain classes, so let's find out what the lowest batch size is for
// those requested classes
$batchSizes = [];

foreach ($onlyClasses as $class) {
// This will get either the defined batch size for the class, or the default batch size
$batchSizes[] = $indexConfiguration->getBatchSizeForClass($class);
}

if ($batchSizes) {
// If we were able to find any batch size definitions, then return the lowest
return min($batchSizes);
}

// If all else fails, return the lowest batch size across all of our configuration
return $indexConfiguration->getLowestBatchSize();
}

}
4 changes: 2 additions & 2 deletions src/Jobs/ClearIndexJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
use SilverStripe\Core\Injector\Injector;
use SilverStripe\Forager\Interfaces\BatchDocumentRemovalInterface;
use SilverStripe\Forager\Interfaces\IndexingInterface;
use SilverStripe\Forager\Service\IndexConfiguration;
use SilverStripe\Forager\Service\Traits\ServiceAware;

/**
Expand All @@ -36,7 +35,8 @@ public function __construct(?string $indexName = null, ?int $batchSize = null)
return;
}

$batchSize = $batchSize ?: IndexConfiguration::singleton()->getBatchSize();
// Use the provided batch size, or determine batch size from our IndexConfiguration
$batchSize = $batchSize ?: $this->getIndexConfigurationBatchSize(null, [$indexName]);

$this->setIndexName($indexName);
$this->setBatchSize($batchSize);
Expand Down
4 changes: 2 additions & 2 deletions src/Jobs/IndexJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
use SilverStripe\Core\Extensible;
use SilverStripe\Core\Injector\Injectable;
use SilverStripe\Forager\Interfaces\DocumentInterface;
use SilverStripe\Forager\Service\IndexConfiguration;
use SilverStripe\Forager\Service\Indexer;
use Symbiote\QueuedJobs\Services\QueuedJob;

Expand Down Expand Up @@ -35,7 +34,8 @@ public function __construct(
?int $batchSize = null,
bool $processDependencies = true
) {
$batchSize = $batchSize ?: IndexConfiguration::singleton()->getBatchSize();
// Use the provided batch size, or determine batch size from our IndexConfiguration
$batchSize = $batchSize ?: $this->getIndexConfigurationBatchSize();

$this->setDocuments($documents);
$this->setMethod($method);
Expand Down
3 changes: 2 additions & 1 deletion src/Jobs/ReindexJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ public function __construct(?array $onlyClasses = [], ?array $onlyIndexes = [],
{
parent::__construct();

$batchSize = $batchSize ?: IndexConfiguration::singleton()->getBatchSize();
// Use the provided batch size, or determine batch size from our IndexConfiguration
$batchSize = $batchSize ?: $this->getIndexConfigurationBatchSize($onlyClasses, $onlyIndexes);

$this->setOnlyClasses($onlyClasses);
$this->setOnlyIndexes($onlyIndexes);
Expand Down
77 changes: 77 additions & 0 deletions src/Service/IndexConfiguration.php
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,83 @@ public function getFieldsForClass(string $class): ?array
return $fieldObjs;
}

public function getBatchSizeForClass(string $class, ?string $index = null): int
{
$candidate = $class;
$batchSizes = [];
// Fetch all index configurations (these might be filtered if onlyIndexes has been set)
$indexes = $this->getIndexes();

if ($index) {
// If we're requesting the batch size for a specific index, then make sure we only have that specific index
// configuration available
$indexes = array_intersect_key($indexes, array_flip([$index]));
}

while ($candidate) {
// Loop through each potential index configuration
foreach ($indexes as $config) {
$includedClasses = $config['includeClasses'] ?? [];
$spec = $includedClasses[$candidate] ?? null;

if (!$spec || !is_array($spec)) {
continue;
}

// Check to see if a batch size was defined for this class
$batchSize = $spec['batch_size'] ?? null;

if (!$batchSize) {
continue;
}

// In the case where there are multiple candidate configurations, we'll keep them all and then pick the
// lowest at the end
$batchSizes[] = $batchSize;
}

$candidate = get_parent_class($candidate);
}

if ($batchSizes) {
// Return the lowest defined batch size
return min($batchSizes);
}

return $this->getBatchSize();
}

public function getLowestBatchSize(): int
{
$batchSizes = [];
// Fetch all index configurations (these might be filtered if onlyIndexes has been set)
$indexes = $this->getIndexes();

// Loop through each potential index configuration
foreach ($indexes as $config) {
$includedClasses = $config['includeClasses'] ?? [];

foreach ($includedClasses as $spec) {
// Check to see if a batch size was defined for this class
$batchSize = $spec['batch_size'] ?? null;

if (!$batchSize) {
continue;
}

// In the case where there are multiple candidate configurations, we'll keep them all and then pick the
// lowest at the end
$batchSizes[] = $batchSize;
}
}

if ($batchSizes) {
return min($batchSizes);
}

return $this->getBatchSize();
}

public function getFieldsForIndex(string $index): array
{
$fields = [];
Expand Down
40 changes: 32 additions & 8 deletions src/Tasks/SearchReindex.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,38 @@ public function run($request): void // phpcs:ignore SlevomatCodingStandard.TypeH
Environment::increaseMemoryLimitTo();
Environment::increaseTimeLimitTo();

$targetClass = $request->getVar('onlyClass');
$targetIndex = $request->getVar('onlyIndex');
$job = ReindexJob::create($targetClass ? [$targetClass] : null, $targetIndex ? [$targetIndex] : null);

if ($this->getConfiguration()->shouldUseSyncJobs()) {
SyncJobRunner::singleton()->runJob($job, false);
} else {
QueuedJobService::singleton()->queueJob($job);
$indexConfiguration = IndexConfiguration::singleton();

$onlyClass = $request->getVar('onlyClass');
$onlyIndex = $request->getVar('onlyIndex');

if ($onlyIndex) {
// If we've requested to only reindex a specific index, then set this limitation on our IndexConfiguration
$indexConfiguration->setOnlyIndexes([$onlyIndex]);
}

// Loop through all available indexes (with the above filter applied, if relevant)
foreach (array_keys($indexConfiguration->getIndexes()) as $index) {
// If a specific class has been requested, then we'll limit ourselves to that, otherwise get all classes
// for the index
$classes = $onlyClass
? [$onlyClass]
: $indexConfiguration->getClassesForIndex($index);

foreach ($classes as $class) {
// Find our desired batch size for that class. This will either be the batch_size that you have defined
// in the class configuration, or the default batch size
$batchSize = $indexConfiguration->getBatchSizeForClass($class, $index);

// Queue a job for this class and index
$job = ReindexJob::create([$class], [$index], $batchSize);

if ($this->getConfiguration()->shouldUseSyncJobs()) {
SyncJobRunner::singleton()->runJob($job, false);
} else {
QueuedJobService::singleton()->queueJob($job);
}
}
}
}

Expand Down
31 changes: 30 additions & 1 deletion tests/Fake/IndexConfigurationFake.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace SilverStripe\Forager\Tests\Fake;

use ReflectionProperty;
use SilverStripe\Forager\Interfaces\DocumentInterface;
use SilverStripe\Forager\Service\IndexConfiguration;

Expand Down Expand Up @@ -39,7 +40,35 @@ public function shouldIncludePageHTML(): bool

public function getIndexes(): array
{
return $this->override['indexes'] ?? parent::getIndexes();
$indexes = $this->override['indexes'] ?? null;

if (!$indexes) {
return parent::getIndexes();
}

// Convert environment variable defined in YML config to its value
array_walk($indexes, function (array &$configuration): void {
$configuration = $this->environmentVariableToValue($configuration);
});

// Using reflection because we don't want this property to be part of the public API, but we need access to it
// for testing purposes
$reflectionProperty = new ReflectionProperty(IndexConfiguration::class, 'onlyIndexes');
$reflectionProperty->setAccessible(true);

$onlyIndexes = $reflectionProperty->getValue($this);

if (!$onlyIndexes) {
return $indexes;
}

foreach (array_keys($indexes) as $index) {
if (!in_array($index, $onlyIndexes)) {
unset($indexes[$index]);
}
}

return $indexes;
}

public function shouldUseSyncJobs(): bool
Expand Down
33 changes: 26 additions & 7 deletions tests/Jobs/ClearIndexJobTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,39 @@ class ClearIndexJobTest extends SearchServiceTest

public function testConstruct(): void
{
$config = $this->mockConfig();
$this->mockConfig(true);

// Batch size of 0 is the same as not specifying a batch size, so we should get the lowest batch size defined
// in our config for index1
$job = ClearIndexJob::create('index1', 0);
$this->assertSame(50, $job->getBatchSize());

// Batch size of 0 is the same as not specifying a batch size, so we should get the batch size from config
$job = ClearIndexJob::create('myindex', 0);
$this->assertSame($config->getBatchSize(), $job->getBatchSize());
// Batch size of 0 is the same as not specifying a batch size, so we should get the lowest batch size defined
// in our config for index2
$job = ClearIndexJob::create('index2', 0);
$this->assertSame(25, $job->getBatchSize());

// Same with not specifying a batch size at all
$job = ClearIndexJob::create('index1');
$this->assertSame(50, $job->getBatchSize());

// Same with not specifying a batch size at all
$job = ClearIndexJob::create('myindex');
$this->assertSame($config->getBatchSize(), $job->getBatchSize());
$job = ClearIndexJob::create('index2');
$this->assertSame(25, $job->getBatchSize());

// Check that a batch size is set when explicitly provided
$job = ClearIndexJob::create('index1', 33);
$this->assertSame(33, $job->getBatchSize());
}

public function testConstructException(): void
{
$this->mockConfig(true);

// Specifying a batch size under 0 should throw an exception
$this->expectException(InvalidArgumentException::class);
$this->expectExceptionMessage('Batch size must be greater than 0');
$job = ClearIndexJob::create('myindex', -1);
$job = ClearIndexJob::create('index1', -1);

// If no index name is provided, then other config options should not be applied
$job = ClearIndexJob::create();
Expand Down
21 changes: 21 additions & 0 deletions tests/Jobs/IndexJobTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,25 @@ public function testJob(): void
$this->assertTrue($job->jobFinished());
}

public function testConstruct(): void
{
$this->mockConfig(true);

$job = IndexJob::create();

$this->assertEquals([], $job->getDocuments());
$this->assertEquals(Indexer::METHOD_ADD, $job->getMethod());
// Should be the lowest define batch_size across our index configuration
$this->assertEquals(25, $job->getBatchSize());

$service = $this->loadIndex(20);
$docs = $service->listDocuments('test', 33);
$job = IndexJob::create($docs, Indexer::METHOD_DELETE, 33);

$this->assertEquals($docs, $job->getDocuments());
$this->assertEquals(Indexer::METHOD_DELETE, $job->getMethod());
// Should be the batch_size that was explicitly set
$this->assertEquals(33, $job->getBatchSize());
}

}
Loading

0 comments on commit 987de78

Please sign in to comment.