-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_lyrics.py
56 lines (45 loc) · 1.68 KB
/
get_lyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
import sys
from bs4 import BeautifulSoup
import pandas as pd
import config
def get_lyrics(song_title, artist_name):
print(song_title)
song_title = song_title.replace('?', '')
artist_name = artist_name.replace('?', '')
# https://github.com/willamesoares/lyrics-crawler
base_url = 'https://api.genius.com'
headers = {'Authorization': 'Bearer ' + config.genius_api_token}
search_url = base_url + '/search'
data = {'q': song_title + ' ' + artist_name}
response = requests.get(search_url, data=data, headers=headers)
json = response.json()
remote_song_info = None
for hit in json['response']['hits']:
if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
remote_song_info = hit
break
if remote_song_info is None:
# retry with only first word
if len(song_title.split()) > 1:
return get_lyrics(song_title.split()[0], artist_name)
else:
return None
else:
song_url = remote_song_info['result']['url']
return scrap_song_url(song_url)
def scrap_song_url(url):
# https://github.com/willamesoares/lyrics-crawler
page = requests.get(url)
html = BeautifulSoup(page.text, 'html.parser')
[h.extract() for h in html('script')]
lyrics = html.find('div', class_='lyrics').get_text()
return lyrics
if __name__ == "__main__":
if len(sys.argv) < 2:
print("No filename provided")
exit()
fname = sys.argv[1].rsplit(".", 1)[0]
songs = pd.read_csv(fname + ".csv")
songs["lyrics"] = songs.apply(lambda row: get_lyrics(row["name"], row["artists"]), axis=1)
songs.to_csv(fname + "_lyrics.csv")