diff --git a/swaglyrics_backend/issue_maker.py b/swaglyrics_backend/issue_maker.py index 391c800..ecd0f7c 100644 --- a/swaglyrics_backend/issue_maker.py +++ b/swaglyrics_backend/issue_maker.py @@ -158,10 +158,7 @@ def genius_stripper(song: str, artist: str) -> Optional[str]: title = re.sub(alg, '', title) logging.info(f'stripped title: {title}') - words = title.split() - max_err = len(words) // 2 - - # allow half length mismatch + max_err = len(title.split()) // 2 # allow half length mismatch logging.info(f'max_err is set to {max_err}') if r.status_code == 200: @@ -169,13 +166,15 @@ def genius_stripper(song: str, artist: str) -> Optional[str]: if data['meta']['status'] == 200: hits = data['response']['hits'] for hit in hits: - full_title = hit['result']['full_title'] - logging.info(f' full title: {full_title}') + g_title = hit['result']['full_title'] + g_song = hit['result']['title'] + g_artist = hit['result']['primary_artist']['name'] + logging.info(f' full title: {g_title}') # remove punctuation before comparison - full_title = re.sub(alg, '', full_title) - logging.info(f' stripped full title: {full_title}') + g_title = re.sub(alg, '', g_title) + logging.info(f' stripped full title: {g_title}') - if not is_title_mismatched(words, full_title, max_err): + if not is_title_mismatched(title, song, artist, g_song, g_artist, max_err): # return stripper as no mismatch if path := gstr.search(hit['result']['path']): stripper = path.group() @@ -188,10 +187,21 @@ def genius_stripper(song: str, artist: str) -> Optional[str]: @log_args(max_chars=-1) -def is_title_mismatched(words: List[str], full_title: str, max_err: int) -> bool: - mismatch = [word for word in words if word.lower() not in full_title.lower()] +def is_title_mismatched(title: str, song: str, artist: str, g_song: str, g_artist: str, max_err: int) -> bool: + g_title = f"{g_song} by {g_artist}".lower() + mismatch = [word for word in title.split() if word.lower() not in g_title.split()] logging.debug(f"broke on {mismatch}") - return len(mismatch) > max_err + + mismatched = len(mismatch) > max_err + + # handle discord cases where single word song, artist so max_err is 1 + # TODO: update tests + if not len(g_title.split()) == 3: + # check for artist same but song different false positive + if not mismatched and artist.split() in g_artist.split(): + mismatched = song.split() in g_song.split() + + return mismatched def create_issue(song: str, artist: str, version: str, stripper: str = 'not supported yet') -> JSONDict: @@ -510,8 +520,8 @@ def delete_line(): @limiter.exempt # disable limiter for firehose def github_webhook(): """ - `github_webhook` function handles all notification from GitHub relating to the org. Documentation for the webhooks can - be found at https://developer.github.com/webhooks/ + `github_webhook` function handles all notification from GitHub relating to the org. Documentation for the webhooks + can be found at https://developer.github.com/webhooks/ """ if request.method != 'POST': return 'OK' diff --git a/tests/sample_genius_data.json b/tests/sample_genius_data.json index 86a94b7..7062917 100644 --- a/tests/sample_genius_data.json +++ b/tests/sample_genius_data.json @@ -54,7 +54,7 @@ "id": 4317921, "lyrics_owner_id": 7918137, "lyrics_state": "complete", - "path": "/Caravan-palace-miracle-traduction-francaise-lyrics", + "path": "/Caravan-palace-miracle-traduction-francaise-aaaaafake", "pyongs_count": null, "song_art_image_thumbnail_url": "https://images.genius.com/ca9236bb9c4f5bd7065c3b9b8684aea5.300x300x1.jpg", "song_art_image_url": "https://images.genius.com/ca9236bb9c4f5bd7065c3b9b8684aea5.1000x1000x1.jpg", @@ -192,13 +192,13 @@ "result": { "annotation_count": 1, "api_path": "/songs/1259613", - "full_title": "Caravan by Utopia", + "full_title": "Not Caravan by Utopia", "header_image_thumbnail_url": "https://images.genius.com/f7e576e0d12167b15d3f51ba1fc33e2d.300x300x1.jpg", "header_image_url": "https://images.genius.com/f7e576e0d12167b15d3f51ba1fc33e2d.1000x1000x1.jpg", "id": 1259613, "lyrics_owner_id": 5593635, "lyrics_state": "complete", - "path": "/Utopia-caravan-lyrics", + "path": "/Utopia-caravan-annotated", "pyongs_count": null, "song_art_image_thumbnail_url": "https://images.genius.com/f7e576e0d12167b15d3f51ba1fc33e2d.300x300x1.jpg", "song_art_image_url": "https://images.genius.com/f7e576e0d12167b15d3f51ba1fc33e2d.1000x1000x1.jpg", @@ -206,7 +206,7 @@ "unreviewed_annotations": 0, "hot": false }, - "title": "Caravan", + "title": "Not Caravan", "title_with_featured": "Caravan", "url": "https://genius.com/Utopia-caravan-lyrics", "primary_artist": { diff --git a/tests/test_issue_maker.py b/tests/test_issue_maker.py index 19dfd76..3610332 100644 --- a/tests/test_issue_maker.py +++ b/tests/test_issue_maker.py @@ -244,6 +244,8 @@ def test_that_genius_stripper_checks_for_stripper_format(self, mock_get): fake_json['response']['hits'][0]['result']['path'] = "/Caravan-palace-miracle-annotated" # no lyrics at end # adjust titles so none match fake_json['response']['hits'][1]['result']["full_title"] = "fake title" + fake_json['response']['hits'][1]['result']["name"] = "fake song" + fake_json['response']['hits'][1]['result']["primary_artist"]["name"] = "fake name" fake_json['response']['hits'][4]['result']["full_title"] = "fake title" fake_json['response']['hits'][5]['result']["full_title"] = "fake title" @@ -263,15 +265,18 @@ def test_that_check_stripper_checks_stripper(self, fake_get): def test_that_title_mismatches(self): from swaglyrics_backend.issue_maker import is_title_mismatched - assert is_title_mismatched(["Bohemian", "Rhapsody", "by", "Queen"], "Miracle by Caravan Palace", 2) + assert is_title_mismatched("Bohemian Rhapsody by Queen", "Miracle", "Caravan Palace", "Miracle", + "Caravan Palace", 2) def test_that_title_not_mismatches(self): from swaglyrics_backend.issue_maker import is_title_mismatched - assert not is_title_mismatched(["Bohemian", "Rhapsody", "by", "Queen"], "bohemian rhapsody by queen", 2) + assert not is_title_mismatched("Bohemian Rhapsody by Queen", "bohemian rhapsody", "queen", "Bohemian Rhapsody", + "Queen", 2) def test_that_title_not_mismatches_with_one_error(self): from swaglyrics_backend.issue_maker import is_title_mismatched - assert not is_title_mismatched(["BoHemIaN", "RhaPsoDy", "2011", "bY", "queen"], "bohemian RHAPSODY By QUEEN", 2) + assert not is_title_mismatched("Bohemian Rhapsody by Queen", "bohemian RHAPSODY", "QUEEN", "Bohemian Rhapsody", + "Queen", 2) @patch('swaglyrics_backend.issue_maker.get_github_token', return_value='fake token') @patch('swaglyrics_backend.issue_maker.requests.post')