From 8fe7beb0de70c205b03727f237f59092ee7b2fa8 Mon Sep 17 00:00:00 2001 From: Olivier Binette Date: Wed, 8 Nov 2023 15:05:46 -0500 Subject: [PATCH] Prepare 2.2.1 release --- CHANGELOG.rst | 4 ++++ docs/00-overview.rst | 2 +- docs/04-error_analysis.ipynb | 4 ++-- er_evaluation/__init__.py | 2 +- setup.py | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9eeede9..6f27536 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,10 @@ Changelog ========= +2.2.1 (November 8, 2023) +------------------------ +* Small fixes to paper and documentation. + 2.2.0 (October 26, 2023) ------------------------ * Streamline package structure diff --git a/docs/00-overview.rst b/docs/00-overview.rst index db23efa..eb927f8 100644 --- a/docs/00-overview.rst +++ b/docs/00-overview.rst @@ -43,7 +43,7 @@ Throughout this user guide and the documentation of the package, we use the foll - The output of an entity resolution system is a **predicted clustering,** i.e. an attempt at correctly clustering records/mentions according to the entity to which they refer. There may be errors in the predicted clustering, e.g. records/mentions may be incorrectly clustered together or split into multiple clusters. - A **reference** dataset, or a set of **ground truth** clusters, is a clustering of mentions/records that is assumed to be correct. -For more information on entity resolution, we refer the reader to [Binette & Steorts (2022)](https://www.science.org/doi/10.1126/sciadv.abi8021) and [Christophides et al. (2019)](https://arxiv.org/abs/1905.06397). +For more information on entity resolution, we refer the reader to `Binette & Steorts (2022) `_ and `Christophides et al. (2019) `_. We recommend `Splink `_ as a state-of-the-art large-scale entity resolution software. The Splink team provides a large list of `tutorials `_ and `training materials `_ on their website. The book `"Hands-On Entity Resolution" `_ provides an introduction to entity resolution with Splink. diff --git a/docs/04-error_analysis.ipynb b/docs/04-error_analysis.ipynb index b1098e5..270f87c 100644 --- a/docs/04-error_analysis.ipynb +++ b/docs/04-error_analysis.ipynb @@ -38,7 +38,7 @@ "- **Expected Missing Elements:** This metric represents the expected number of missing elements for each true cluster. It calculates the average number of elements that are missing from the predicted clusters compared to the true clusters.\n", "- **Expected Relative Missing Elements:** This metric represents the expected relative number of missing elements for each true cluster. It calculates the average relative number of elements that are missing from the predicted clusters compared to the true clusters.\n", "\n", - "You can find more information about these metrics, including formal mathematical definitions, in the {py:module}`er_evaluation.error_analysis` module." + "You can find more information about these metrics, including formal mathematical definitions, in the [er_evaluation.error_analysis](https://er-evaluation.readthedocs.io/en/latest/er_evaluation.error_analysis.html) module." ] }, { @@ -96,7 +96,7 @@ "source": [ "## Error Analysis with Decision Trees\n", "\n", - "To identify combinations of features leading to performance disparities, we recommend doing error analysis using decision trees. First, define features associated with each cluster and choose an error metric to target. You can use any error metric from the {py:func}`er_evaluation.error_analysis` module. We recommend using thresholded 0-1 features for interpretability." + "To identify combinations of features leading to performance disparities, we recommend doing error analysis using decision trees. First, define features associated with each cluster and choose an error metric to target. You can use any error metric from the [er_evaluation.error_analysis](https://er-evaluation.readthedocs.io/en/latest/er_evaluation.error_analysis.html) module. We recommend using thresholded 0-1 features for interpretability." ] }, { diff --git a/er_evaluation/__init__.py b/er_evaluation/__init__.py index f1ac90b..ca1fd92 100644 --- a/er_evaluation/__init__.py +++ b/er_evaluation/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.2.0" +__version__ = "2.2.1" import er_evaluation.data_structures import er_evaluation.datasets diff --git a/setup.py b/setup.py index 1a66793..fe91b0f 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,6 @@ name="ER-Evaluation", packages=find_packages(), url="https://github.com/OlivierBinette/er_evaluation", - version="2.2.0", + version="2.2.1", zip_safe=False, )