Skip to content

Commit

Permalink
Make fetch old indexes async for speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
Glyphack committed Nov 14, 2023
1 parent 4103c0d commit f9f6d3a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 29 deletions.
59 changes: 34 additions & 25 deletions pytse_client/scraper/symbol_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from dataclasses import dataclass
from typing import List

import aiohttp
import requests
from bs4 import BeautifulSoup
from requests import HTTPError

from pytse_client import config, tse_settings
from pytse_client.utils import requests_retry_session
from pytse_client.utils.persian import replace_arabic
from requests import HTTPError

logger = logging.getLogger(config.LOGGER_NAME)

Expand Down Expand Up @@ -105,7 +106,7 @@ def get_market_symbols_from_market_watch_page() -> List[MarketSymbol]:
return market_symbols


def add_old_indexes_to_market_symbols(
async def add_old_indexes_to_market_symbols(
symbols: List[MarketSymbol],
) -> List[MarketSymbol]:
"""
Expand All @@ -123,9 +124,11 @@ def add_old_indexes_to_market_symbols(
"""
market_symbols = []
conn = aiohttp.TCPConnector(limit=5)
session = aiohttp.ClientSession(connector=conn)

for symbol in symbols:
index, old_ids = get_symbol_ids(symbol.symbol)
index, old_ids = await get_symbol_ids(symbol.symbol, session)
if index is None:
index = symbol.index
market_symbols.append(
Expand All @@ -138,28 +141,34 @@ def add_old_indexes_to_market_symbols(
)
)

await session.close()

return market_symbols


def get_symbol_ids(symbol_name: str):
async def get_symbol_ids(symbol_name: str, session: aiohttp.ClientSession):
url = tse_settings.TSE_SYMBOL_ID_URL.format(symbol_name.strip())
response = requests_retry_session().get(url, timeout=10)
try:
response.raise_for_status()
except HTTPError:
raise Exception(f"{symbol_name}: Sorry, tse server did not respond")

symbols = response.text.split(";")
index = None
old_ids = []
for symbol_full_info in symbols:
if symbol_full_info.strip() == "":
continue
symbol_full_info = symbol_full_info.split(",")
if replace_arabic(symbol_full_info[0]) == symbol_name:
if symbol_full_info[7] == "1":
index = symbol_full_info[2] # active symbol id
else:
old_ids.append(symbol_full_info[2]) # old symbol id

return index, old_ids
print(f"get_symbol_ids url: {url}")

timeout = aiohttp.ClientTimeout(total=20)
async with session.get(url, timeout=timeout) as response:
if response.status != 200:
raise HTTPError(
f"get_symbol_ids failed with status code: {response.status}"
)

response_text = await response.text()
symbols = response_text.split(";")
index = None
old_ids = []
for symbol_full_info in symbols:
if symbol_full_info.strip() == "":
continue
symbol_full_info = symbol_full_info.split(",")
if replace_arabic(symbol_full_info[0]) == symbol_name:
if symbol_full_info[7] == "1":
index = symbol_full_info[2] # active symbol id
else:
old_ids.append(symbol_full_info[2]) # old symbol id

return index, old_ids
15 changes: 11 additions & 4 deletions pytse_client/scripts/update_symbols_json.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import json
import locale
from pathlib import Path
Expand All @@ -11,7 +12,7 @@
get_market_symbols_from_symbols_list_page,
)

locale.setlocale(locale.LC_COLLATE, "fa_IR.UTF-8")
# locale.setlocale(locale.LC_COLLATE, "fa_IR.UTF-8")


def write_symbols_to_json(
Expand All @@ -30,21 +31,27 @@ def write_symbols_to_json(
}
json.dump(data, file, ensure_ascii=False, indent=2)


if __name__ == "__main__":
async def main():
# the sum order is important
# https://github.com/Glyphack/pytse-client/issues/123
market_symbols = (
get_market_symbols_from_market_watch_page()
+ get_market_symbols_from_symbols_list_page()
)
print("finished fetching symbols")
print(f"Total symbols: {len(market_symbols)}")
deduplicated_market_symbols = list(set(market_symbols))
print(f"Total deduplicated symbols: {len(deduplicated_market_symbols)}")
# fetch old indexes of symbols
deduplicated_market_symbols = add_old_indexes_to_market_symbols(
deduplicated_market_symbols = await add_old_indexes_to_market_symbols(
deduplicated_market_symbols
)
print("finished fetching old indexes")
# sort by sybmol
sorted_market_symbols = sorted(deduplicated_market_symbols)
write_symbols_to_json(
sorted_market_symbols, "symbols_name.json", f"{config.pytse_dir}/data"
)

if __name__ == "__main__":
asyncio.run(main())

0 comments on commit f9f6d3a

Please sign in to comment.