diff --git a/poetry.lock b/poetry.lock index 62fb982..4621b01 100644 --- a/poetry.lock +++ b/poetry.lock @@ -87,6 +87,17 @@ files = [ [package.extras] tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] +[[package]] +name = "async-lru" +version = "2.0.4" +description = "Simple LRU cache for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"}, + {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"}, +] + [[package]] name = "attrs" version = "23.2.0" @@ -1758,4 +1769,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "cde2194f4da1bd7402b4ffaed566937745f18e0758784876a62b1914faec9c00" +content-hash = "6eb629b08089983b830419b9b143f4ebcbb7e824ed4110cc3a4aba25e13e5df7" diff --git a/pyproject.toml b/pyproject.toml index 5c5eae5..173eb0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ frida = "^16.2.1" tenacity = "^8.2.3" prompt-toolkit = "^3.0.43" mitmproxy = "^10.3.0" +async-lru = "^2.0.4" [build-system] requires = ["poetry-core"] diff --git a/src/api.py b/src/api.py index fade0e9..afa6227 100644 --- a/src/api.py +++ b/src/api.py @@ -5,6 +5,7 @@ import httpcore import httpx import regex +from async_lru import alru_cache from loguru import logger from tenacity import retry, retry_if_exception_type, stop_after_attempt, before_sleep_log @@ -12,181 +13,226 @@ from src.models.song_data import Datum client: httpx.AsyncClient -lock: asyncio.Semaphore +download_lock: asyncio.Semaphore +request_lock: asyncio.Semaphore user_agent_browser = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" user_agent_itunes = "iTunes/12.11.3 (Windows; Microsoft Windows 10 x64 Professional Edition (Build 19041); x64) AppleWebKit/7611.1022.4001.1 (dt:2)" user_agent_app = "Music/5.7 Android/10 model/Pixel6GR1YH build/1234 (dt:66)" def init_client_and_lock(proxy: str, parallel_num: int): - global client, lock + global client, download_lock, request_lock if proxy: client = httpx.AsyncClient(proxy=proxy) else: client = httpx.AsyncClient() - lock = asyncio.Semaphore(parallel_num) + download_lock = asyncio.Semaphore(parallel_num) + request_lock = asyncio.Semaphore(64) async def get_m3u8_from_api(endpoint: str, song_id: str) -> str: - resp = (await client.get(endpoint, params={"songid": song_id})).text - if resp == "no_found": - return "" - return resp + async with request_lock: + resp = (await client.get(endpoint, params={"songid": song_id})).text + if resp == "no_found": + return "" + return resp async def upload_m3u8_to_api(endpoint: str, m3u8_url: str, song_info: Datum): - await client.post(endpoint, json={ - "method": "add_m3u8", - "params": { - "songid": song_info.id, - "song_title": f"Disk {song_info.attributes.discNumber} Track {song_info.attributes.trackNumber} - {song_info.attributes.name}", - "albumid": song_info.relationships.albums.data[0].id, - "album_title": song_info.attributes.albumName, - "m3u8": m3u8_url, - } - }) + async with request_lock: + await client.post(endpoint, json={ + "method": "add_m3u8", + "params": { + "songid": song_info.id, + "song_title": f"Disk {song_info.attributes.discNumber} Track {song_info.attributes.trackNumber} - {song_info.attributes.name}", + "albumid": song_info.relationships.albums.data[0].id, + "album_title": song_info.attributes.albumName, + "m3u8": m3u8_url, + } + }) @retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), stop=stop_after_attempt(5), before_sleep=before_sleep_log(logger, logging.WARN)) async def get_token(): - req = await client.get("https://beta.music.apple.com") - index_js_uri = regex.findall(r"/assets/index-legacy-[^/]+\.js", req.text)[0] - js_req = await client.get("https://beta.music.apple.com" + index_js_uri) - token = regex.search(r'eyJh([^"]*)', js_req.text)[0] - return token + async with request_lock: + req = await client.get("https://beta.music.apple.com") + index_js_uri = regex.findall(r"/assets/index-legacy-[^/]+\.js", req.text)[0] + js_req = await client.get("https://beta.music.apple.com" + index_js_uri) + token = regex.search(r'eyJh([^"]*)', js_req.text)[0] + return token -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), +@alru_cache +@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, + httpcore.RemoteProtocolError)), stop=stop_after_attempt(5), before_sleep=before_sleep_log(logger, logging.WARN)) async def download_song(url: str) -> bytes: - async with lock: + async with download_lock: return (await client.get(url)).content -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_album_info(album_id: str, token: str, storefront: str, lang: str): - req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/albums/{album_id}", - params={"omit[resource]": "autos", "include": "tracks,artists,record-labels", - "include[songs]": "artists", "fields[artists]": "name", - "fields[albums:albums]": "artistName,artwork,name,releaseDate,url", - "fields[record-labels]": "name", "l": lang}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - return AlbumMeta.model_validate(req.json()) - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/albums/{album_id}", + params={"omit[resource]": "autos", "include": "tracks,artists,record-labels", + "include[songs]": "artists", "fields[artists]": "name", + "fields[albums:albums]": "artistName,artwork,name,releaseDate,url", + "fields[record-labels]": "name", "l": lang}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + return AlbumMeta.model_validate(req.json()) + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_playlist_info_and_tracks(playlist_id: str, token: str, storefront: str, lang: str): - resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}", - params={"l": lang}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - playlist_info_obj = PlaylistInfo.parse_obj(resp.json()) - if playlist_info_obj.data[0].relationships.tracks.next: - all_tracks = await get_playlist_tracks(playlist_id, token, storefront, lang) - playlist_info_obj.data[0].relationships.tracks.data = all_tracks - return playlist_info_obj - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}", + params={"l": lang}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + playlist_info_obj = PlaylistInfo.parse_obj(resp.json()) + if playlist_info_obj.data[0].relationships.tracks.next: + all_tracks = await get_playlist_tracks(playlist_id, token, storefront, lang) + playlist_info_obj.data[0].relationships.tracks.data = all_tracks + return playlist_info_obj + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_playlist_tracks(playlist_id: str, token: str, storefront: str, lang: str, offset: int = 0): - resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}/tracks", - params={"l": lang, "offset": offset}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - playlist_tracks = PlaylistTracks.parse_obj(resp.json()) - tracks = playlist_tracks.data - if playlist_tracks.next: - next_tracks = await get_playlist_info_and_tracks(playlist_id, token, storefront, lang, offset + 100) - tracks.extend(next_tracks) - return tracks - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + resp = await client.get( + f"https://amp-api.music.apple.com/v1/catalog/{storefront}/playlists/{playlist_id}/tracks", + params={"l": lang, "offset": offset}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + playlist_tracks = PlaylistTracks.parse_obj(resp.json()) + tracks = playlist_tracks.data + if playlist_tracks.next: + next_tracks = await get_playlist_info_and_tracks(playlist_id, token, storefront, lang, offset + 100) + tracks.extend(next_tracks) + return tracks + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_cover(url: str, cover_format: str, cover_size: str): - formatted_url = regex.sub('bb.jpg', f'bb.{cover_format}', url) - req = await client.get(formatted_url.replace("{w}x{h}", cover_size), - headers={"User-Agent": user_agent_browser}) - return req.content - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + formatted_url = regex.sub('bb.jpg', f'bb.{cover_format}', url) + req = await client.get(formatted_url.replace("{w}x{h}", cover_size), + headers={"User-Agent": user_agent_browser}) + return req.content + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_song_info(song_id: str, token: str, storefront: str, lang: str): - req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}", - params={"extend": "extendedAssetUrls", "include": "albums", "l": lang}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_itunes, - "Origin": "https://music.apple.com"}) - song_data_obj = SongData.model_validate(req.json()) - for data in song_data_obj.data: - if data.id == song_id: - return data - return None - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}", + params={"extend": "extendedAssetUrls", "include": "albums", "l": lang}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_itunes, + "Origin": "https://music.apple.com"}) + song_data_obj = SongData.model_validate(req.json()) + for data in song_data_obj.data: + if data.id == song_id: + return data + return None + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_song_lyrics(song_id: str, storefront: str, token: str, dsid: str, account_token: str, lang: str) -> str: - req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}/lyrics", - params={"l": lang}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_app, - "X-Dsid": dsid}, - cookies={f"mz_at_ssl-{dsid}": account_token}) - result = SongLyrics.model_validate(req.json()) - return result.data[0].attributes.ttml - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + req = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/songs/{song_id}/lyrics", + params={"l": lang}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_app, + "X-Dsid": dsid}, + cookies={f"mz_at_ssl-{dsid}": account_token}) + result = SongLyrics.model_validate(req.json()) + return result.data[0].attributes.ttml + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_albums_from_artist(artist_id: str, storefront: str, token: str, lang: str, offset: int = 0): - resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/albums", - params={"l": lang, "offset": offset}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - artist_album = ArtistAlbums.parse_obj(resp.json()) - albums = [album.attributes.url for album in artist_album.data] - if artist_album.next: - next_albums = await get_albums_from_artist(artist_id, storefront, token, lang, offset + 25) - albums.extend(next_albums) - return list(set(albums)) - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/albums", + params={"l": lang, "offset": offset}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + artist_album = ArtistAlbums.parse_obj(resp.json()) + albums = [album.attributes.url for album in artist_album.data] + if artist_album.next: + next_albums = await get_albums_from_artist(artist_id, storefront, token, lang, offset + 25) + albums.extend(next_albums) + return list(set(albums)) + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_songs_from_artist(artist_id: str, storefront: str, token: str, lang: str, offset: int = 0): - resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/songs", - params={"l": lang, "offset": offset}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - artist_song = ArtistSongs.parse_obj(resp.json()) - songs = [song.attributes.url for song in artist_song.data] - if artist_song.next: - next_songs = await get_songs_from_artist(artist_id, storefront, token, lang, offset + 20) - songs.extend(next_songs) - return list[set(songs)] - - -@retry(retry=retry_if_exception_type((httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError)), - stop=stop_after_attempt(5), - before_sleep=before_sleep_log(logger, logging.WARN)) + async with request_lock: + resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}/songs", + params={"l": lang, "offset": offset}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + artist_song = ArtistSongs.parse_obj(resp.json()) + songs = [song.attributes.url for song in artist_song.data] + if artist_song.next: + next_songs = await get_songs_from_artist(artist_id, storefront, token, lang, offset + 20) + songs.extend(next_songs) + return list[set(songs)] + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) async def get_artist_info(artist_id: str, storefront: str, token: str, lang: str): - resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}", - params={"l": lang}, - headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, - "Origin": "https://music.apple.com"}) - return ArtistInfo.parse_obj(resp.json()) + async with request_lock: + resp = await client.get(f"https://amp-api.music.apple.com/v1/catalog/{storefront}/artists/{artist_id}", + params={"l": lang}, + headers={"Authorization": f"Bearer {token}", "User-Agent": user_agent_browser, + "Origin": "https://music.apple.com"}) + return ArtistInfo.parse_obj(resp.json()) + + +@alru_cache +@retry(retry=retry_if_exception_type( + (httpx.TimeoutException, httpcore.ConnectError, SSLError, FileNotFoundError, httpcore.RemoteProtocolError)), + stop=stop_after_attempt(5), + before_sleep=before_sleep_log(logger, logging.WARN)) +async def download_m3u8(m3u8_url: str) -> str: + async with request_lock: + return (await client.get(m3u8_url)).text diff --git a/src/mp4.py b/src/mp4.py index af79ebd..d79e994 100644 --- a/src/mp4.py +++ b/src/mp4.py @@ -1,16 +1,17 @@ import subprocess import uuid +from datetime import datetime from io import BytesIO from pathlib import Path from tempfile import TemporaryDirectory from typing import Tuple -from datetime import datetime import m3u8 import regex from bs4 import BeautifulSoup from loguru import logger +from src.api import download_m3u8 from src.exceptions import CodecNotFoundException from src.metadata import SongMetadata from src.types import * @@ -18,7 +19,7 @@ async def get_available_codecs(m3u8_url: str) -> Tuple[list[str], list[str]]: - parsed_m3u8 = m3u8.load(m3u8_url) + parsed_m3u8 = m3u8.loads(await download_m3u8(m3u8_url), uri=m3u8_url) codec_ids = [playlist.stream_info.audio for playlist in parsed_m3u8.playlists] codecs = [get_codec_from_codec_id(codec_id) for codec_id in codec_ids] return codecs, codec_ids @@ -26,7 +27,7 @@ async def get_available_codecs(m3u8_url: str) -> Tuple[list[str], list[str]]: async def extract_media(m3u8_url: str, codec: str, song_metadata: SongMetadata, codec_priority: list[str], alternative_codec: bool = False) -> Tuple[str, list[str]]: - parsed_m3u8 = m3u8.load(m3u8_url) + parsed_m3u8 = m3u8.loads(await download_m3u8(m3u8_url), uri=m3u8_url) specifyPlaylist = find_best_codec(parsed_m3u8, codec) if not specifyPlaylist and alternative_codec: logger.warning(f"Codec {codec} of song: {song_metadata.artist} - {song_metadata.title} did not found") @@ -39,7 +40,7 @@ async def extract_media(m3u8_url: str, codec: str, song_metadata: SongMetadata, raise CodecNotFoundException selected_codec = specifyPlaylist.media[0].group_id logger.info(f"Selected codec: {selected_codec} for song: {song_metadata.artist} - {song_metadata.title}") - stream = m3u8.load(specifyPlaylist.absolute_uri) + stream = m3u8.loads(await download_m3u8(specifyPlaylist.absolute_uri), uri=specifyPlaylist.absolute_uri) skds = [key.uri for key in stream.keys if regex.match('(skd?://[^"]*)', key.uri)] keys = [prefetchKey] key_suffix = CodecKeySuffix.KeySuffixDefault