Skip to content

Commit

Permalink
feat: uploader: checking IA S3 load average
Browse files Browse the repository at this point in the history
  • Loading branch information
yzqzss committed Sep 19, 2023
1 parent bae4a78 commit 45960e5
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 0 deletions.
23 changes: 23 additions & 0 deletions wikiteam3/uploader/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from datetime import datetime
import json
import os
import random
import re
import shutil
from dataclasses import dataclass
import sys
import time
import traceback
from typing import Dict, List, Optional, Tuple, Union
Expand All @@ -22,6 +24,7 @@
from wikiteam3.uploader.socketLock import NoLock, SocketLockServer
from wikiteam3.utils import url2prefix_from_config, sha1sum
from wikiteam3.uploader.compresser import ZstdCompressor, SevenZipCompressor
from wikiteam3.utils.ia_checker import ia_s3_tasks_load_avg
from wikiteam3.utils.util import ALL_DUMPED_MARK, UPLOADED_MARK, mark_as_done, is_markfile_exists

DEFAULT_COLLECTION = 'opensource'
Expand Down Expand Up @@ -380,6 +383,26 @@ def upload(arg: Args):
print("=== Preparing metadata ===")
metadata, logo_url = prepare_item_metadata(wikidump_dir, config, arg)

print("=== Checking IA S3 load average (optional) ===")

try:
avg_load = ia_s3_tasks_load_avg(session=item.session) # check IA load
print(f"IA S3 load: {avg_load * 100:.4f}%")
if avg_load > 0.99:
print("WARNING: IA S3 is heavily overloaded, upload may fail")
print("Deciding whether to continue even if IA S3 is heavily overloaded... (20% chance to continue, random)")
if random.random() < 0.8:
print("To prevent IA S3 from being overloaded further, please try uploading later, exiting...")
sys.exit(99)
print("Continuing anyway...")
elif avg_load > 0.9:
print("WARNING: IA S3 is overloaded, upload may fail")
except Exception as e:
traceback.print_exc()
print(f"Failed to get IA S3 load average: {e}")
print("Don't worry, it's optional.")


if arg.dry_run:
print("=== Dry run, exiting ===")
return
Expand Down
12 changes: 12 additions & 0 deletions wikiteam3/utils/ia_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@
IA_MAX_RETRY = 5
logger = logging.getLogger(__name__)


def ia_s3_tasks_load_avg(session: ArchiveSession) -> float:
api = "https://s3.us.archive.org/?check_limit=1"
r = session.get(api, timeout=16)
r.raise_for_status()
r_json = r.json()
total_tasks_queued = r_json["detail"]["total_tasks_queued"]
total_global_limit = r_json["detail"]["total_global_limit"]
logger.info(f"ia_s3_load_avg(): {total_tasks_queued} / {total_global_limit}")
return total_tasks_queued / total_global_limit


def search_ia(apiurl: Optional[str] = None, indexurl: Optional[str] = None, addeddate_intervals: Optional[List[str]] = None):
if apiurl is None:
apiurl = 'api.php'.join(indexurl.rsplit('index.php', 1)) if indexurl else None
Expand Down

0 comments on commit 45960e5

Please sign in to comment.