Skip to content

Commit

Permalink
Merge pull request #8 from emilklindt/3-use-distance-in-k-means-conve…
Browse files Browse the repository at this point in the history
…rgence-check

Use distance in k means convergence check (#3)
  • Loading branch information
emilklindt authored Aug 30, 2021
2 parents 1747885 + 1691aff commit 1fc82a3
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 1 deletion.
14 changes: 14 additions & 0 deletions config/marker-clusterer.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@

'default_maximum_iterations' => 10,

/*
|--------------------------------------------------------------------------
| Default Maximum Convergence Distance
|--------------------------------------------------------------------------
|
| The maximum distance between iterations to count a cluster as converged,
| meaning that no further iteration is necessary. A higher value can provide
| better performance, due to the need of doing less iterations. A lower value
| will ensure that a cluster has actually converged.
|
*/

'default_convergence_maximum' => 100,

/*
|--------------------------------------------------------------------------
| Default Maximum Samples
Expand Down
1 change: 1 addition & 0 deletions src/BaseClusterer.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ private function mergeDefaultConfig(): void
'samples' => config('marker-clusterer.default_maximum_samples'),
'iterations' => config('marker-clusterer.default_maximum_iterations'),
'distanceFormula' => config('marker-clusterer.default_distance_formula'),
'convergenceMaximum' => config('marker-clusterer.default_convergence_maximum'),
];

foreach ($map as $key => $value) {
Expand Down
4 changes: 3 additions & 1 deletion src/Clusterers/KMeansClusterer.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public function validateConfig(): bool
return is_int($this->config->k)
&& is_int($this->config->iterations)
&& is_int($this->config->samples)
&& is_int($this->config->convergenceMaximum)
&& in_array($this->config->distanceFormula, DistanceFormula::getConstants());
}

Expand Down Expand Up @@ -188,7 +189,8 @@ private function hasConverged(Collection $centroids): bool
{
return $this->clusters
->every(function (Cluster $cluster, int $index) use ($centroids) {
return $cluster->centroid == $centroids->get($index);
return $this->distanceCalculator->measure($cluster->centroid, $centroids->get($index))
<= $this->config->convergenceMaximum;
});
}

Expand Down
8 changes: 8 additions & 0 deletions src/Models/Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ class Config extends DataTransferObject
*/
public ?int $iterations;

/**
* Maximum movement of a cluster between iterations,
* for it to count as convergence
*
* @see config/marker-clusterer.php
*/
public ?int $convergenceMaximum;

/**
* Maximum number of clustering samples
*
Expand Down
1 change: 1 addition & 0 deletions tests/Clusterers/KMeansClustererTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ private function getValidConfig()
'iterations' => 10,
'samples' => 10,
'distanceFormula' => DistanceFormula::HAVERSINE,
'convergenceMaximum' => 1000,
]);
}

Expand Down

0 comments on commit 1fc82a3

Please sign in to comment.