gchq · qh681248 · Feb 6, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 31, 2025
diff --git a/.pylintrc b/.pylintrc
@@ -46,13 +46,21 @@ fail-under=10
 #from-stdin=
 
 # Files or directories to be skipped. They should be base names, not paths.
-ignore=CVS
+ignore=
+    CVS,
+    .git,
+    .venv,
+    .cache,
+    build,
 
 # Add files or directories matching the regular expressions patterns to the
 # ignore-list. The regex matches against paths and can be in Posix or Windows
 # format. Because '\\' represents the directory delimiter on Windows systems,
 # it can't be used as an escape character.
 ignore-paths=
+    # For some reason this doesn't work in `ignore` above.
+    ^documentation/source/snippets/.*$,
+
 
 # Files or directories matching the regular expression patterns are skipped.
 # The regex matches against base names, not paths. The default value ignores

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -60,14 +60,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **[BREAKING CHANGE]** Equinox dependency version is changed from `<0.11.8` to `>=0.
 11.5`. (https://github.com/gchq/coreax/pull/898)
 - **[BREAKING CHANGE]** The `jaxtyping` version is now lower bounded at `v0.2.31` to enable `coreax.data.Data` jaxtyping compatibility.
+- Refactored the `Coreset` types - instead of `Coreset` and `Coresubset(Coreset)`, we
+  now have `AbstractCoreset`, `PseudoCoreset(AbstractCoreset)`, and
+  `Coresubset(AbstractCoreset)`. See "Deprecated" below for more details of this change.
+  (https://github.com/gchq/coreax/pull/943)
 
 ### Removed
 
 -
 
 ### Deprecated
 
--
+- Uses of `Coreset` should be replaced with `AbstractCoreset` (for a general coreset,
+  such as in a function argument type hint), or `PseudoCoreset` (for the specific case
+  of a coreset that is not necessarily a coresubset).
+  (https://github.com/gchq/coreax/pull/943)
+- Uses of `Coreset.coreset` should be replaced with `Coreset.points`.
+  (https://github.com/gchq/coreax/pull/943)
+- Uses of `Coreset.nodes` should be replaced with `Coresubset.indices` or
+  `PseudoCoreset.points`, depending on whether the coreset is a coresubset or a
+  pseudo-coreset. (https://github.com/gchq/coreax/pull/943)
+- Passing `Array` or `tuple[Array, Array]` into coreset constructors is now deprecated -
+  either pass in `Data` or `SupervisedData` instances, or use the `build()` class
+  method which handles the conversion. (https://github.com/gchq/coreax/pull/943)
 
 
 ## [0.3.1]

diff --git a/benchmark/blobs_benchmark.py b/benchmark/blobs_benchmark.py
@@ -179,8 +179,8 @@ def compute_solver_metrics(
     coresubset, _ = solver.reduce(dataset)
 
     # Unweighted metrics
-    unweighted_mmd = float(mmd_metric.compute(dataset, coresubset.coreset))
-    unweighted_ksd = float(ksd_metric.compute(dataset, coresubset.coreset))
+    unweighted_mmd = float(mmd_metric.compute(dataset, coresubset.points))
+    unweighted_ksd = float(ksd_metric.compute(dataset, coresubset.points))
 
     # Weighted metrics
     weighted_coresubset = coresubset.solve_weights(weights_optimiser)

diff --git a/benchmark/david_benchmark.py b/benchmark/david_benchmark.py
@@ -96,7 +96,7 @@ def benchmark_coreset_algorithms(
         start_time = time.perf_counter()
         coreset, _ = eqx.filter_jit(solver.reduce)(data)
         duration = time.perf_counter() - start_time
-        coresets[solver_name] = coreset.coreset.data
+        coresets[solver_name] = coreset.points.data
         solver_times[solver_name] = duration
 
     plt.figure(figsize=(15, 10))

diff --git a/benchmark/mnist_benchmark.py b/benchmark/mnist_benchmark.py
@@ -539,7 +539,7 @@ def main() -> None:
                 # pylint: enable=duplicate-code
                 coreset, _ = eqx.filter_jit(solver.reduce)(train_data_umap)
 
-                coreset_indices = coreset.nodes.data
+                coreset_indices = coreset.indices.data
 
                 train_data_coreset = train_data_jax[coreset_indices]
                 train_targets_coreset = train_targets_jax[coreset_indices]

diff --git a/coreax/__init__.py b/coreax/__init__.py
@@ -30,7 +30,7 @@
     MonteCarloApproximateKernel,
     NystromApproximateKernel,
 )
-from coreax.coreset import Coreset, Coresubset
+from coreax.coreset import AbstractCoreset, Coresubset, PseudoCoreset
 from coreax.data import Data, SupervisedData
 from coreax.kernels import (
     LaplacianKernel,
@@ -48,8 +48,9 @@
     "ApproximateKernel",
     "MonteCarloApproximateKernel",
     "NystromApproximateKernel",
-    "Coreset",
+    "AbstractCoreset",
     "Coresubset",
+    "PseudoCoreset",
     "Data",
     "SupervisedData",
     "LaplacianKernel",