From 4ddef7c3136101ecd189f18715faca36e46f713f Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Sat, 6 Jun 2020 16:54:34 +0530 Subject: [PATCH 01/10] Implements ELO Ranking Algorithm --- pythia/cleaning/elo.py | 217 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 pythia/cleaning/elo.py diff --git a/pythia/cleaning/elo.py b/pythia/cleaning/elo.py new file mode 100644 index 0000000..04610ec --- /dev/null +++ b/pythia/cleaning/elo.py @@ -0,0 +1,217 @@ +import pandas as pd +import numpy as np +from collections import deque +from sunpy.util import SunpyUserWarning + + +__all__ = ['ELO'] + + +class ELO: + """ + Recreating the ELO rating algirithm for Sunspotter. + """ + + def __init__(self, score_board: pd.DataFrame, *, k_value=32, default_score=1400, + max_comparisons=50, max_score_change=32, min_score_change=16, score_memory=10, + delimiter=';', column_map={"player 0": "image_id_0", + "player 1": "image_id_1", + "score for player 0": "image0_more_complex_image1"}): + """ + Parameters + ---------- + score_board : pandas.DataFrame + DataFrame holding the scores of individual matches. + k_value : int, optional + Initial K Value to be used for calculating new ratings, by default 32 + default_score : int, optional + Initial rating, by default 1400 + max_comparisons : int, optional + Max comparisions for any player, by default 50 + max_score_change : int, optional + Upper limit on K Value updation, by default 32 + min_score_change : int, optional + Lower limit on K Value updation, by default 16 + score_memory : int, optional + Number of previous scores to consider while calculating + standard deviation and new K value, by default 10 + column_map : dict, optional + Dictionary, for mapping the column names of the score_board dataframe + to variable names used in the ELO ranking system. + by default {"player 0": "image_id_0", + "player 1": "image_id_1", + "score for player 0": "image0_more_complex_image1"} + """ + self.score_board = score_board + self.k_value = k_value + self.default_score = default_score + self.score_change = {'min': min_score_change, 'max': max_score_change} + self.max_comparisions = max_comparisons + self.score_memory = score_memory + self.column_map = column_map + + if not set(self.column_map.values()).issubset(self.score_board.columns): + missing_columns = set(self.column_map.values()) - set(self.column_map.values()).intersection(self.score_board.columns) + missing_columns = ", ".join(missing_columns) + + raise SunpyUserWarning("The following columns mentioned in the column map" + " are not present in the score board: " + + missing_columns) + + self._create_ranking() + + def _create_ranking(self): + """ + Prepares the Ranking DataFrame. + """ + image_ids = set(self.score_board[self.column_map['player 0']]).union(self.score_board[self.column_map['player 1']]) + self.rankings = pd.DataFrame(image_ids, columns=['player id']) + self.rankings.set_axis(self.rankings['player id'], inplace=True) + self.rankings['score'] = self.default_score + self.rankings['k value'] = self.k_value + self.rankings['count'] = 0 + self.rankings['std dev'] = self.score_change['max'] + self.rankings['last scores'] = str(self.default_score) + + def expected_score(self, score_image_0, score_image_1): + """ + Given two AR scores, calculates expected probability of `image_0` being more complex. + + Parameters + ---------- + score_image_0 : int + Score for first image + score_image_1 : int + Score for second image + + Returns + ------- + expected_0_score : float + Expected probability of `image_0` being more complex. + """ + expected_0_score = 1.0 / (1.0 + 10 ** ((score_image_1 - score_image_0) / 400.00)) + return expected_0_score + + def new_rating(self, rating_for_image, k_value, score_for_image, image_expected_score): + """ + Calculates new rating based on the ELO algorithm. + + Parameters + ---------- + rating_for_image : float + Current Rating for the image + k_value : float + Current k_value for the image + score_for_image : int + Actual result of classification of the image in a pairwise match. + `0` denotes less complex, `1` denotes more complex + image_expected_score : float + Expected result of classification of image in a pairwise match + based on current rating of the image. + + Returns + ------- + new_image_rating : float + New rating of image after the classification match. + """ + new_image_rating = rating_for_image + k_value * (score_for_image - image_expected_score) + return new_image_rating + + def score_update(self, image_0, image_1, score_for_image_0): + """ + Updates the ratings of the two images based on the complexity classification. + + Parameters + ---------- + image_0 : int + Image id for first image + image_1 : int + Image id for second image + score_for_image_0 : int + Actual result of classification of the image 0 in a pairwise match. + `0` denotes less complex, `1` denotes more complex + + Notes + ----- + To make updates in the original rankings DataFrame, for each classification, + two state dictionaries need to be maintained, corresponfing to the two AR images. + The changes are made to these state dictionaries and then the ranking DataFrame is updated. + """ + # state dicts + state_dict_0 = self.rankings.loc[image_0].to_dict() + state_dict_0['last scores'] = deque(map(float, state_dict_0['last scores'].split(',')), maxlen=self.score_memory) + state_dict_1 = self.rankings.loc[image_1].to_dict() + state_dict_1['last scores'] = deque(map(float, state_dict_1['last scores'].split(',')), maxlen=self.score_memory) + + expected_score_0 = self.expected_score(self.rankings.loc[image_0]['score'], + self.rankings.loc[image_1]['score']) + expected_score_1 = 1 - expected_score_0 + + new_rating_0 = self.new_rating(self.rankings.loc[image_0]['score'], self.rankings.loc[image_0]['k value'], + score_for_image_0, expected_score_0) + new_rating_1 = self.new_rating(self.rankings.loc[image_1]['score'], self.rankings.loc[image_1]['k value'], + 1 - score_for_image_0, expected_score_1) + + state_dict_0['last scores'].append(new_rating_0) + state_dict_1['last scores'].append(new_rating_1) + + new_std_dev_0 = min(np.std(state_dict_0['last scores']), 1_000_000) # prevents Infinity + new_k_0 = min(max(new_std_dev_0, self.score_change['min']), self.score_change['max']) + + new_std_dev_1 = min(np.std(state_dict_1['last scores']), 1_000_000) # prevents Infinity + new_k_1 = min(max(new_std_dev_1, self.score_change['min']), self.score_change['max']) + + # Updating Data + state_dict_0['score'] = new_rating_0 + state_dict_0['std dev'] = new_std_dev_0 + state_dict_0['k value'] = new_k_0 + state_dict_0['count'] += 1 + state_dict_0['last scores'] = ",".join(map(str,state_dict_0['last scores'])) # Storing the list of states as a String + + state_dict_1['score'] = new_rating_1 + state_dict_1['std dev'] = new_std_dev_1 + state_dict_1['k value'] = new_k_1 + state_dict_1['count'] += 1 + state_dict_1['last scores'] = ",".join(map(str,state_dict_1['last scores'])) # Storing the list of states as a String + + # Making the Update DataFrames + update_df = pd.DataFrame([state_dict_0, state_dict_1]) + update_df.set_index("player id", inplace=True) + + # Updating the original DataFrame + self.rankings.update(update_df) + + def run(self, save_to_disk=True, filename='run_results.csv'): + """ + Runs the ELO ranking Algorithm for all score_board. + + Parameters + ---------- + save_to_disk : bool, optional + If true, saves the rankins in a CSV file on the disk, by default True + filename : str, optional + filename to store the results, by default 'run_results.csv' + """ + for index, row in self.score_board.iterrows(): + + if row[self.column_map['player 0']] == row[self.column_map['player 1']]: + continue + + self.score_update(image_0=row[self.column_map['player 0']], image_1=row[self.column_map['player 1']], + score_for_image_0=row[self.column_map['score for player 0']]) + print(f"Index {index} done!") + + if save_to_disk: + self.save_as_csv(filename) + + def save_as_csv(self, filename): + """ + Saves the Ranking DataFrame to the disk as a CSV file. + + Parameters + ---------- + filename : str + filename to store the results. + """ + self.rankings.drop(columns=["last_scores"], inplace=True) + self.rankings.to_csv(filename) From bf01a172972198bb1a87dbeb312e2a3d6c7818a6 Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Sat, 13 Jun 2020 04:21:35 +0530 Subject: [PATCH 02/10] Adds ELO tests --- data/elo_test/test_classifications.csv | 6 +++ pythia/cleaning/tests/test_elo.py | 66 +++++++++++++++++++++++ pythia/seo/tests/test_classifications.csv | 6 +++ pythia/seo/tests/test_rankings.csv | 11 ++++ 4 files changed, 89 insertions(+) create mode 100644 data/elo_test/test_classifications.csv create mode 100644 pythia/cleaning/tests/test_elo.py create mode 100644 pythia/seo/tests/test_classifications.csv create mode 100644 pythia/seo/tests/test_rankings.csv diff --git a/data/elo_test/test_classifications.csv b/data/elo_test/test_classifications.csv new file mode 100644 index 0000000..934a11a --- /dev/null +++ b/data/elo_test/test_classifications.csv @@ -0,0 +1,6 @@ +;image_id_0;image_id_1;image0_more_complex_image1 +0;1;6;1 +1;2;7;0 +2;3;8;0 +3;4;9;1 +4;5;10;1 diff --git a/pythia/cleaning/tests/test_elo.py b/pythia/cleaning/tests/test_elo.py new file mode 100644 index 0000000..17c07ad --- /dev/null +++ b/pythia/cleaning/tests/test_elo.py @@ -0,0 +1,66 @@ +import pytest +import pandas as pd +from pythia.cleaning import ELO +from pathlib import Path +from pythia.seo import Sunspotter +from sunpy.util import SunpyUserWarning + +path = Path(__file__).resolve().parent.parent.parent.parent / "data/elo_test" + + +@pytest.fixture +def elo(): + sunspotter = Sunspotter(timesfits=path / "../all_clear/lookup_timesfits.csv", + properties=path / "../all_clear/lookup_properties.csv", + classifications=path / "test_classifications.csv", + classifications_columns=['image_id_0', 'image_id_1', + 'image0_more_complex_image1']) + column_map = {"player 0": "image_id_0", + "player 1": "image_id_1", + "score for player 0": "image0_more_complex_image1"} + + return ELO(score_board=sunspotter.classifications, column_map=column_map) + + +@pytest.mark.parametrize('rating_0,rating_1,expected_score', + [(1400, 1400.0, 0.5), + (1450, 1450.5, 0.49928044265518673), + (1500, 1602.0, 0.3572869311673796), + (1550, 1854.5, 0.14768898365874825), + (1600, 2208.0, 0.029314241270450396)]) +def test_expected_score(elo, rating_0, rating_1, expected_score): + assert elo.expected_score(rating_0, rating_1) == expected_score + + +@pytest.mark.parametrize('rating_for_image,k_value,score_for_image,image_expected_score,new_rating', + [(1400.0, 32, 1, 0.5, 1416.0), + (1450.0, 32, 0, 0.49928044265518673, 1434.023025835034), + (1500.0, 32, 0, 0.3572869311673796, 1488.5668182026438), + (1550.0, 32, 1, 0.14768898365874825, 1577.2739525229201), + (1600.0, 32, 1, 0.029314241270450396, 1631.0619442793457), + (1400.0, 32, 0, 0.5, 1384.0), + (1450.5, 32, 1, 0.5007195573448133, 1466.476974164966), + (1602.0, 32, 1, 0.6427130688326204, 1613.4331817973562), + (1854.5, 32, 0, 0.8523110163412517, 1827.2260474770799), + (2208.0, 32, 0, 0.9706857587295497, 2176.9380557206546)]) +def test_new_rating(elo, rating_for_image, k_value, score_for_image, image_expected_score, new_rating): + assert elo.new_rating(rating_for_image, k_value, score_for_image, image_expected_score) == new_rating + + +def test_column_map(elo): + assert set(elo.column_map.values()).issubset(elo.score_board.columns) + + +def test_incorrect_column_map(): + + sunspotter = Sunspotter(timesfits=path / "../all_clear/lookup_timesfits.csv", + properties=path / "../all_clear/lookup_properties.csv", + classifications=path / "test_classifications.csv", + classifications_columns=['image_id_0', 'image_id_1', + 'image0_more_complex_image1']) + column_map = {"player 0": "This is not player 0", + "player 1": "This is not player 1", + "score for player 0": "Player 0 is in it for the fun"} + + with pytest.raises(SunpyUserWarning): + ELO(score_board=sunspotter.classifications, column_map=column_map) diff --git a/pythia/seo/tests/test_classifications.csv b/pythia/seo/tests/test_classifications.csv new file mode 100644 index 0000000..e3e8fb4 --- /dev/null +++ b/pythia/seo/tests/test_classifications.csv @@ -0,0 +1,6 @@ +,image_id_0,image_id_1,image0_more_complex_image1 +0,1,6,1 +1,2,7,0 +2,3,8,0 +3,4,9,1 +4,5,10,1 diff --git a/pythia/seo/tests/test_rankings.csv b/pythia/seo/tests/test_rankings.csv new file mode 100644 index 0000000..bf11d4d --- /dev/null +++ b/pythia/seo/tests/test_rankings.csv @@ -0,0 +1,11 @@ +,image_id,score,k_value,count,std_dev +0,1,1400.0,32,0,32 +1,2,1450.0,32,0,32 +2,3,1500.0,32,0,32 +3,4,1550.0,32,0,32 +4,5,1600.0,32,0,32 +5,6,1400.0,32,0,32 +6,7,1450.5,32,0,32 +7,8,1602.0,32,0,32 +8,9,1854.5,32,0,32 +9,10,2208.0,32,0,32 From 6b885d9556c1f5e465850094254cd5f04e99128c Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Sat, 13 Jun 2020 04:25:19 +0530 Subject: [PATCH 03/10] Updates cleaning __init__ file --- pythia/cleaning/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pythia/cleaning/__init__.py b/pythia/cleaning/__init__.py index e69de29..f782c15 100644 --- a/pythia/cleaning/__init__.py +++ b/pythia/cleaning/__init__.py @@ -0,0 +1 @@ +from pythia.cleaning.elo import * From 7f7432ad8e8203a923b5fa6e4e4cc373b6d9c2ba Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Sat, 13 Jun 2020 04:28:32 +0530 Subject: [PATCH 04/10] Adds numpy to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index fc6e807..a4837f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ sunpy pandas +numpy From 2f99d8ac28303af7860c67b6420ad0ff4aba993a Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Sat, 13 Jun 2020 04:28:40 +0530 Subject: [PATCH 05/10] Adds changelog and solves pep8 --- changelog/26.feature.rst | 1 + pythia/cleaning/elo.py | 4 ++-- pythia/cleaning/tests/test_elo.py | 22 +++++++++++----------- pythia/seo/tests/test_classifications.csv | 6 ------ pythia/seo/tests/test_rankings.csv | 11 ----------- 5 files changed, 14 insertions(+), 30 deletions(-) create mode 100644 changelog/26.feature.rst delete mode 100644 pythia/seo/tests/test_classifications.csv delete mode 100644 pythia/seo/tests/test_rankings.csv diff --git a/changelog/26.feature.rst b/changelog/26.feature.rst new file mode 100644 index 0000000..02e252d --- /dev/null +++ b/changelog/26.feature.rst @@ -0,0 +1 @@ +Adds a module for ELO Ranking Algorithm. diff --git a/pythia/cleaning/elo.py b/pythia/cleaning/elo.py index 04610ec..5c67f2a 100644 --- a/pythia/cleaning/elo.py +++ b/pythia/cleaning/elo.py @@ -38,8 +38,8 @@ def __init__(self, score_board: pd.DataFrame, *, k_value=32, default_score=1400, column_map : dict, optional Dictionary, for mapping the column names of the score_board dataframe to variable names used in the ELO ranking system. - by default {"player 0": "image_id_0", - "player 1": "image_id_1", + by default {"player 0": "image_id_0", + "player 1": "image_id_1", "score for player 0": "image0_more_complex_image1"} """ self.score_board = score_board diff --git a/pythia/cleaning/tests/test_elo.py b/pythia/cleaning/tests/test_elo.py index 17c07ad..857a171 100644 --- a/pythia/cleaning/tests/test_elo.py +++ b/pythia/cleaning/tests/test_elo.py @@ -18,7 +18,7 @@ def elo(): column_map = {"player 0": "image_id_0", "player 1": "image_id_1", "score for player 0": "image0_more_complex_image1"} - + return ELO(score_board=sunspotter.classifications, column_map=column_map) @@ -33,16 +33,16 @@ def test_expected_score(elo, rating_0, rating_1, expected_score): @pytest.mark.parametrize('rating_for_image,k_value,score_for_image,image_expected_score,new_rating', - [(1400.0, 32, 1, 0.5, 1416.0), - (1450.0, 32, 0, 0.49928044265518673, 1434.023025835034), - (1500.0, 32, 0, 0.3572869311673796, 1488.5668182026438), - (1550.0, 32, 1, 0.14768898365874825, 1577.2739525229201), - (1600.0, 32, 1, 0.029314241270450396, 1631.0619442793457), - (1400.0, 32, 0, 0.5, 1384.0), - (1450.5, 32, 1, 0.5007195573448133, 1466.476974164966), - (1602.0, 32, 1, 0.6427130688326204, 1613.4331817973562), - (1854.5, 32, 0, 0.8523110163412517, 1827.2260474770799), - (2208.0, 32, 0, 0.9706857587295497, 2176.9380557206546)]) + [(1400.0, 32, 1, 0.5, 1416.0), + (1450.0, 32, 0, 0.49928044265518673, 1434.023025835034), + (1500.0, 32, 0, 0.3572869311673796, 1488.5668182026438), + (1550.0, 32, 1, 0.14768898365874825, 1577.2739525229201), + (1600.0, 32, 1, 0.029314241270450396, 1631.0619442793457), + (1400.0, 32, 0, 0.5, 1384.0), + (1450.5, 32, 1, 0.5007195573448133, 1466.476974164966), + (1602.0, 32, 1, 0.6427130688326204, 1613.4331817973562), + (1854.5, 32, 0, 0.8523110163412517, 1827.2260474770799), + (2208.0, 32, 0, 0.9706857587295497, 2176.9380557206546)]) def test_new_rating(elo, rating_for_image, k_value, score_for_image, image_expected_score, new_rating): assert elo.new_rating(rating_for_image, k_value, score_for_image, image_expected_score) == new_rating diff --git a/pythia/seo/tests/test_classifications.csv b/pythia/seo/tests/test_classifications.csv deleted file mode 100644 index e3e8fb4..0000000 --- a/pythia/seo/tests/test_classifications.csv +++ /dev/null @@ -1,6 +0,0 @@ -,image_id_0,image_id_1,image0_more_complex_image1 -0,1,6,1 -1,2,7,0 -2,3,8,0 -3,4,9,1 -4,5,10,1 diff --git a/pythia/seo/tests/test_rankings.csv b/pythia/seo/tests/test_rankings.csv deleted file mode 100644 index bf11d4d..0000000 --- a/pythia/seo/tests/test_rankings.csv +++ /dev/null @@ -1,11 +0,0 @@ -,image_id,score,k_value,count,std_dev -0,1,1400.0,32,0,32 -1,2,1450.0,32,0,32 -2,3,1500.0,32,0,32 -3,4,1550.0,32,0,32 -4,5,1600.0,32,0,32 -5,6,1400.0,32,0,32 -6,7,1450.5,32,0,32 -7,8,1602.0,32,0,32 -8,9,1854.5,32,0,32 -9,10,2208.0,32,0,32 From 384cd1a1aae13ee4c227a59461e12f5e7308e0b1 Mon Sep 17 00:00:00 2001 From: David Perez-Suarez Date: Thu, 18 Jun 2020 21:07:30 +0100 Subject: [PATCH 06/10] =?UTF-8?q?=F0=9F=8C=9E=20drying=20the=20update=20of?= =?UTF-8?q?=20state=20dictionaries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pythia/cleaning/elo.py | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/pythia/cleaning/elo.py b/pythia/cleaning/elo.py index 5c67f2a..5621d6f 100644 --- a/pythia/cleaning/elo.py +++ b/pythia/cleaning/elo.py @@ -147,32 +147,8 @@ def score_update(self, image_0, image_1, score_for_image_0): self.rankings.loc[image_1]['score']) expected_score_1 = 1 - expected_score_0 - new_rating_0 = self.new_rating(self.rankings.loc[image_0]['score'], self.rankings.loc[image_0]['k value'], - score_for_image_0, expected_score_0) - new_rating_1 = self.new_rating(self.rankings.loc[image_1]['score'], self.rankings.loc[image_1]['k value'], - 1 - score_for_image_0, expected_score_1) - - state_dict_0['last scores'].append(new_rating_0) - state_dict_1['last scores'].append(new_rating_1) - - new_std_dev_0 = min(np.std(state_dict_0['last scores']), 1_000_000) # prevents Infinity - new_k_0 = min(max(new_std_dev_0, self.score_change['min']), self.score_change['max']) - - new_std_dev_1 = min(np.std(state_dict_1['last scores']), 1_000_000) # prevents Infinity - new_k_1 = min(max(new_std_dev_1, self.score_change['min']), self.score_change['max']) - - # Updating Data - state_dict_0['score'] = new_rating_0 - state_dict_0['std dev'] = new_std_dev_0 - state_dict_0['k value'] = new_k_0 - state_dict_0['count'] += 1 - state_dict_0['last scores'] = ",".join(map(str,state_dict_0['last scores'])) # Storing the list of states as a String - - state_dict_1['score'] = new_rating_1 - state_dict_1['std dev'] = new_std_dev_1 - state_dict_1['k value'] = new_k_1 - state_dict_1['count'] += 1 - state_dict_1['last scores'] = ",".join(map(str,state_dict_1['last scores'])) # Storing the list of states as a String + _update_state_dict(state_dict_0, image_0, expected_score_0, score_for_image_0) + _update_state_dict(state_dict_1, image_1, expected_score_1, 1 - score_for_image_0) # Making the Update DataFrames update_df = pd.DataFrame([state_dict_0, state_dict_1]) @@ -181,6 +157,19 @@ def score_update(self, image_0, image_1, score_for_image_0): # Updating the original DataFrame self.rankings.update(update_df) + def _update_state_dict(state_dict, image, expected_score, score): + new_rating = self.new_rating(self.rankings.loc[image]['score'], self.rankings.loc[image]['k value'], + score, expected_score) + state_dict['last scores'].append(new_rating) + new_std_dev = min(np.std(state_dict['last scores']), 1_000_000) # prevents Infinity + new_k = min(max(new_std_dev, self.score_change['min']), self.score_change['max']) + # Updating Data + state_dict['score'] = new_rating + state_dict['std dev'] = new_std_dev + state_dict['k value'] = new_k + state_dict['count'] += 1 + state_dict['last scores'] = ",".join(map(str, state_dict['last scores'])) # Storing the list of states as a String + def run(self, save_to_disk=True, filename='run_results.csv'): """ Runs the ELO ranking Algorithm for all score_board. From 061a6a104d9035b66afe153d2cdc42d01e02d979 Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Tue, 30 Jun 2020 15:53:55 +0530 Subject: [PATCH 07/10] codestyle --- pythia/cleaning/elo.py | 6 +++--- pythia/cleaning/tests/test_elo.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pythia/cleaning/elo.py b/pythia/cleaning/elo.py index 5c67f2a..c8adda4 100644 --- a/pythia/cleaning/elo.py +++ b/pythia/cleaning/elo.py @@ -1,8 +1,8 @@ -import pandas as pd -import numpy as np from collections import deque -from sunpy.util import SunpyUserWarning +import numpy as np +import pandas as pd +from sunpy.util import SunpyUserWarning __all__ = ['ELO'] diff --git a/pythia/cleaning/tests/test_elo.py b/pythia/cleaning/tests/test_elo.py index 857a171..24b6013 100644 --- a/pythia/cleaning/tests/test_elo.py +++ b/pythia/cleaning/tests/test_elo.py @@ -1,7 +1,7 @@ +from pathlib import Path + import pytest -import pandas as pd from pythia.cleaning import ELO -from pathlib import Path from pythia.seo import Sunspotter from sunpy.util import SunpyUserWarning From e54264138079b0850686193207402983a60507f3 Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Tue, 30 Jun 2020 16:00:56 +0530 Subject: [PATCH 08/10] Use pytest approx --- pythia/cleaning/tests/test_elo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythia/cleaning/tests/test_elo.py b/pythia/cleaning/tests/test_elo.py index 24b6013..f59cb0f 100644 --- a/pythia/cleaning/tests/test_elo.py +++ b/pythia/cleaning/tests/test_elo.py @@ -29,7 +29,7 @@ def elo(): (1550, 1854.5, 0.14768898365874825), (1600, 2208.0, 0.029314241270450396)]) def test_expected_score(elo, rating_0, rating_1, expected_score): - assert elo.expected_score(rating_0, rating_1) == expected_score + assert pytest.approx(elo.expected_score(rating_0, rating_1)) == expected_score @pytest.mark.parametrize('rating_for_image,k_value,score_for_image,image_expected_score,new_rating', @@ -44,7 +44,7 @@ def test_expected_score(elo, rating_0, rating_1, expected_score): (1854.5, 32, 0, 0.8523110163412517, 1827.2260474770799), (2208.0, 32, 0, 0.9706857587295497, 2176.9380557206546)]) def test_new_rating(elo, rating_for_image, k_value, score_for_image, image_expected_score, new_rating): - assert elo.new_rating(rating_for_image, k_value, score_for_image, image_expected_score) == new_rating + assert pytest.approx(elo.new_rating(rating_for_image, k_value, score_for_image, image_expected_score)), new_rating def test_column_map(elo): From 7f827d59317eee91bb9695329284ca17ccacf151 Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Tue, 30 Jun 2020 16:11:02 +0530 Subject: [PATCH 09/10] Moves test file around --- .../cleaning/tests}/test_classifications.csv | 0 pythia/cleaning/tests/test_elo.py | 14 +++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) rename {data/elo_test => pythia/cleaning/tests}/test_classifications.csv (100%) diff --git a/data/elo_test/test_classifications.csv b/pythia/cleaning/tests/test_classifications.csv similarity index 100% rename from data/elo_test/test_classifications.csv rename to pythia/cleaning/tests/test_classifications.csv diff --git a/pythia/cleaning/tests/test_elo.py b/pythia/cleaning/tests/test_elo.py index f59cb0f..a3f8bc0 100644 --- a/pythia/cleaning/tests/test_elo.py +++ b/pythia/cleaning/tests/test_elo.py @@ -5,14 +5,14 @@ from pythia.seo import Sunspotter from sunpy.util import SunpyUserWarning -path = Path(__file__).resolve().parent.parent.parent.parent / "data/elo_test" +path = Path(__file__).parent.parent.parent.parent / "data/all_clear" @pytest.fixture def elo(): - sunspotter = Sunspotter(timesfits=path / "../all_clear/lookup_timesfits.csv", - properties=path / "../all_clear/lookup_properties.csv", - classifications=path / "test_classifications.csv", + sunspotter = Sunspotter(timesfits=path / "lookup_timesfits.csv", + properties=path / "lookup_properties.csv", + classifications= Path(__file__).parent / "test_classifications.csv", classifications_columns=['image_id_0', 'image_id_1', 'image0_more_complex_image1']) column_map = {"player 0": "image_id_0", @@ -53,9 +53,9 @@ def test_column_map(elo): def test_incorrect_column_map(): - sunspotter = Sunspotter(timesfits=path / "../all_clear/lookup_timesfits.csv", - properties=path / "../all_clear/lookup_properties.csv", - classifications=path / "test_classifications.csv", + sunspotter = Sunspotter(timesfits=path / "lookup_timesfits.csv", + properties=path / "lookup_properties.csv", + classifications=Path(__file__).parent / "test_classifications.csv", classifications_columns=['image_id_0', 'image_id_1', 'image0_more_complex_image1']) column_map = {"player 0": "This is not player 0", From f7581c256d4435913e70215b97fc29a9369132cb Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Tue, 30 Jun 2020 16:17:53 +0530 Subject: [PATCH 10/10] Update pythia/cleaning/elo.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: David Pérez-Suárez --- pythia/cleaning/elo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pythia/cleaning/elo.py b/pythia/cleaning/elo.py index 5621d6f..5c9f020 100644 --- a/pythia/cleaning/elo.py +++ b/pythia/cleaning/elo.py @@ -55,8 +55,7 @@ def __init__(self, score_board: pd.DataFrame, *, k_value=32, default_score=1400, missing_columns = ", ".join(missing_columns) raise SunpyUserWarning("The following columns mentioned in the column map" - " are not present in the score board: " + - missing_columns) + f" are not present in the score board: {missing_columns}") self._create_ranking()