From 2357cba3c14e6baad58a2436e5bd8818a6295b82 Mon Sep 17 00:00:00 2001 From: Glyphack Date: Tue, 14 Nov 2023 21:53:03 +0100 Subject: [PATCH] Make fetch old indexes async for speedup --- pytse_client/scraper/symbol_scraper.py | 59 ++++++++++++--------- pytse_client/scripts/update_symbols_json.py | 13 ++++- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/pytse_client/scraper/symbol_scraper.py b/pytse_client/scraper/symbol_scraper.py index 52beca1..904b8f5 100644 --- a/pytse_client/scraper/symbol_scraper.py +++ b/pytse_client/scraper/symbol_scraper.py @@ -4,12 +4,13 @@ from dataclasses import dataclass from typing import List +import aiohttp import requests from bs4 import BeautifulSoup +from requests import HTTPError + from pytse_client import config, tse_settings -from pytse_client.utils import requests_retry_session from pytse_client.utils.persian import replace_arabic -from requests import HTTPError logger = logging.getLogger(config.LOGGER_NAME) @@ -105,7 +106,7 @@ def get_market_symbols_from_market_watch_page() -> List[MarketSymbol]: return market_symbols -def add_old_indexes_to_market_symbols( +async def add_old_indexes_to_market_symbols( symbols: List[MarketSymbol], ) -> List[MarketSymbol]: """ @@ -123,9 +124,11 @@ def add_old_indexes_to_market_symbols( """ market_symbols = [] + conn = aiohttp.TCPConnector(limit=5) + session = aiohttp.ClientSession(connector=conn) for symbol in symbols: - index, old_ids = get_symbol_ids(symbol.symbol) + index, old_ids = await get_symbol_ids(symbol.symbol, session) if index is None: index = symbol.index market_symbols.append( @@ -138,28 +141,34 @@ def add_old_indexes_to_market_symbols( ) ) + await session.close() + return market_symbols -def get_symbol_ids(symbol_name: str): +async def get_symbol_ids(symbol_name: str, session: aiohttp.ClientSession): url = tse_settings.TSE_SYMBOL_ID_URL.format(symbol_name.strip()) - response = requests_retry_session().get(url, timeout=10) - try: - response.raise_for_status() - except HTTPError: - raise Exception(f"{symbol_name}: Sorry, tse server did not respond") - - symbols = response.text.split(";") - index = None - old_ids = [] - for symbol_full_info in symbols: - if symbol_full_info.strip() == "": - continue - symbol_full_info = symbol_full_info.split(",") - if replace_arabic(symbol_full_info[0]) == symbol_name: - if symbol_full_info[7] == "1": - index = symbol_full_info[2] # active symbol id - else: - old_ids.append(symbol_full_info[2]) # old symbol id - - return index, old_ids + print(f"get_symbol_ids url: {url}") + + timeout = aiohttp.ClientTimeout(total=20) + async with session.get(url, timeout=timeout) as response: + if response.status != 200: + raise HTTPError( + f"get_symbol_ids failed with status code: {response.status}" + ) + + response_text = await response.text() + symbols = response_text.split(";") + index = None + old_ids = [] + for symbol_full_info in symbols: + if symbol_full_info.strip() == "": + continue + symbol_full_info = symbol_full_info.split(",") + if replace_arabic(symbol_full_info[0]) == symbol_name: + if symbol_full_info[7] == "1": + index = symbol_full_info[2] # active symbol id + else: + old_ids.append(symbol_full_info[2]) # old symbol id + + return index, old_ids diff --git a/pytse_client/scripts/update_symbols_json.py b/pytse_client/scripts/update_symbols_json.py index 53f3c32..4fb6051 100644 --- a/pytse_client/scripts/update_symbols_json.py +++ b/pytse_client/scripts/update_symbols_json.py @@ -1,3 +1,4 @@ +import asyncio import json import locale from pathlib import Path @@ -31,20 +32,28 @@ def write_symbols_to_json( json.dump(data, file, ensure_ascii=False, indent=2) -if __name__ == "__main__": +async def main(): # the sum order is important # https://github.com/Glyphack/pytse-client/issues/123 market_symbols = ( get_market_symbols_from_market_watch_page() + get_market_symbols_from_symbols_list_page() ) + print("finished fetching symbols") + print(f"Total symbols: {len(market_symbols)}") deduplicated_market_symbols = list(set(market_symbols)) + print(f"Total deduplicated symbols: {len(deduplicated_market_symbols)}") # fetch old indexes of symbols - deduplicated_market_symbols = add_old_indexes_to_market_symbols( + deduplicated_market_symbols = await add_old_indexes_to_market_symbols( deduplicated_market_symbols ) + print("finished fetching old indexes") # sort by sybmol sorted_market_symbols = sorted(deduplicated_market_symbols) write_symbols_to_json( sorted_market_symbols, "symbols_name.json", f"{config.pytse_dir}/data" ) + + +if __name__ == "__main__": + asyncio.run(main())