From 45c0acb50f5a56a054e67fa6b87a845fde623e61 Mon Sep 17 00:00:00 2001 From: Fitti Date: Thu, 1 Jul 2021 13:15:41 +0200 Subject: [PATCH 1/4] Change flags to accept lists --- clipper.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/clipper.py b/clipper.py index b9c885a..48211b8 100644 --- a/clipper.py +++ b/clipper.py @@ -96,8 +96,9 @@ def dl_progress(count, block_size, total_size): if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("streamer", - help="name of the streamer to pull clips from", + parser.add_argument("streamers", + help="names of the streamers to pull clips from", + nargs="+", type=str) parser.add_argument("--start_date", help="first day to start looking " @@ -125,16 +126,18 @@ def dl_progress(count, block_size, total_size): help="store clips locally (only necessary " "if credentials.txt for Google Drive is present)", action="store_true") - parser.add_argument("--clipper", - help="only download clips made by this person", + parser.add_argument("--clippers", + help="only download clips made by these accounts", metavar="username", + nargs="*", type=str) - parser.add_argument("--category", - help="only download clips from this category/game " + parser.add_argument("--categories", + help="only download clips from these categorys/games " "(some non-game categories like Just Chatting " "don't get reported by the API, type \"NOGAME\" " "for these if you notice they're missing)", - metavar="game", + metavar="games", + nargs="*", type=str) parser.add_argument("--regex", help="only download clips matching the regular " From 1cc6ce434fae95feabb1de2d6d7cbd688303a86d Mon Sep 17 00:00:00 2001 From: Fitti Date: Thu, 1 Jul 2021 13:18:44 +0200 Subject: [PATCH 2/4] Make last metavar plural --- clipper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clipper.py b/clipper.py index 48211b8..0a1dc9d 100644 --- a/clipper.py +++ b/clipper.py @@ -128,7 +128,7 @@ def dl_progress(count, block_size, total_size): action="store_true") parser.add_argument("--clippers", help="only download clips made by these accounts", - metavar="username", + metavar="usernames", nargs="*", type=str) parser.add_argument("--categories", From ec69a8c63ee6541e06828456c2a6888170754013 Mon Sep 17 00:00:00 2001 From: Fitti Date: Thu, 1 Jul 2021 13:40:58 +0200 Subject: [PATCH 3/4] Handle streamers start time and clippers/categories filter --- clipper.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/clipper.py b/clipper.py index 0a1dc9d..ae8ed80 100644 --- a/clipper.py +++ b/clipper.py @@ -48,10 +48,12 @@ def get_gdrive_files(credentials, clips, staging): def get_urls(twitch, start, end, b_id, pagination=None, - clipper=None, category=None, regex=None, + clippers=None, categories=None, regex=None, flags=[]): clips_list = [] + clippers = [clipper.lower() for clipper in clippers] + categories = [category.lower() for category in categories] global game_ids clips = twitch.get_clips(broadcaster_id=b_id, first=100, @@ -75,8 +77,8 @@ def get_urls(twitch, start, end, b_id, pagination=None, title = clip["created_at"] + " _ " + game + " _ " + c_title title += " _ " + creator + " _ " + clip["id"] if ( - (clipper and clipper.lower() != creator.lower()) or - (category and category.lower() != game.lower()) or + (clippers and creator.lower() not in clippers) or + (categories and game.lower() not in categories) or (regex and not re.search(regex, c_title, *flags)) ): pass @@ -195,26 +197,30 @@ def dl_progress(count, block_size, total_size): raise FileNotFoundError(e_msg) game_ids = {} + b_ids = [] + start = None twitch = Twitch(t_id, t_t) twitch.authenticate_app([]) - try: - streamer = twitch.get_users(logins=args.streamer)["data"][0] - except IndexError: - raise Exception("Streamer not found!") - b_id = streamer["id"] + for streamer in args.streamers: + try: + streamer = twitch.get_users(logins=args.streamer)["data"][0] + year, month, day = streamer["created_at"].split("-") + day = day.split("T")[0] + new_start = datetime(*map(int, [year, month, day])) + if not start or new_start < start: + start = new_start + except IndexError: + raise Exception("Streamer not found: " + streamer) + b_ids.append(streamer["id"]) if args.start_date: try: year, month, day = [int(num) for num in args.start_date.split("/")] + start = datetime(year, month, day) except Exception: raise Exception("Please provice a correct start date in the " + "format YYYY/MM/DD") - else: - year, month, day = streamer["created_at"].split("-") - day = day.split("T")[0] - year, month, day = [int(num) for num in [year, month, day]] - start = datetime(year, month, day) if args.end_date: try: @@ -248,10 +254,10 @@ def dl_progress(count, block_size, total_size): new_urls, pagination = get_urls(twitch=twitch, start=start, end=start + timedelta(days=1), - b_id=b_id, + b_ids=b_ids, pagination=pagination, - clipper=args.clipper, - category=args.category, + clipper=args.clippers, + category=args.categories, regex=args.regex, flags=[re.I] if args.case_insensitive else []) From b2c8074dd0f6981aa3a27bc9743c6c3097333c3b Mon Sep 17 00:00:00 2001 From: Fitti Date: Thu, 1 Jul 2021 13:52:43 +0200 Subject: [PATCH 4/4] Add multi-streamer support --- clipper.py | 142 +++++++++++++++++++++++++++-------------------------- 1 file changed, 73 insertions(+), 69 deletions(-) diff --git a/clipper.py b/clipper.py index ae8ed80..6d642f4 100644 --- a/clipper.py +++ b/clipper.py @@ -52,8 +52,10 @@ def get_urls(twitch, start, end, b_id, pagination=None, flags=[]): clips_list = [] - clippers = [clipper.lower() for clipper in clippers] - categories = [category.lower() for category in categories] + clippers = [clipper.lower() for clipper in clippers] if clippers else None + categories = [ + category.lower() for category in categories + ] if categories else None global game_ids clips = twitch.get_clips(broadcaster_id=b_id, first=100, @@ -197,22 +199,22 @@ def dl_progress(count, block_size, total_size): raise FileNotFoundError(e_msg) game_ids = {} - b_ids = [] + b_ids = {} start = None twitch = Twitch(t_id, t_t) twitch.authenticate_app([]) for streamer in args.streamers: try: - streamer = twitch.get_users(logins=args.streamer)["data"][0] - year, month, day = streamer["created_at"].split("-") + _streamer = twitch.get_users(logins=streamer)["data"][0] + year, month, day = _streamer["created_at"].split("-") day = day.split("T")[0] new_start = datetime(*map(int, [year, month, day])) if not start or new_start < start: start = new_start except IndexError: raise Exception("Streamer not found: " + streamer) - b_ids.append(streamer["id"]) + b_ids[streamer] = _streamer["id"] if args.start_date: try: @@ -244,68 +246,70 @@ def dl_progress(count, block_size, total_size): "same settings.") break - all_urls = [] - pagination = None - total = 0 - datestring = start.strftime("%a, %Y/%B/%d") - - while pagination != "DONE": - last_pagination = pagination - new_urls, pagination = get_urls(twitch=twitch, - start=start, - end=start + timedelta(days=1), - b_ids=b_ids, - pagination=pagination, - clipper=args.clippers, - category=args.categories, - regex=args.regex, - flags=[re.I] if - args.case_insensitive else []) - all_urls += new_urls - print(f"Clips created on {datestring}: " + str(len(all_urls)), - end="\r") - - print(f"Clips created on {datestring}: " + str(len(all_urls))) - base_path = pjoin(filedir, "clips", args.streamer) - if not isdir(base_path): - makedirs(base_path, exist_ok=True) - exist_clips = listdir(base_path) - exist_ids = [filename.split(" _ ")[-1] for filename in exist_clips] - - for url in all_urls: - total += 1 - dl_url = url[1] - file_name = url[0] + ".mp4" - clip_id = file_name.split(" _ ")[-1] - if sys.platform.startswith("win"): - file_name = file_name.strip().replace(" ", "_") - file_name = re.sub(r'(?u)[^-\w.]', "", file_name) - fullpath = pjoin(base_path, file_name) - if gdrive and clip_id in files: - continue - elif clip_id in exist_ids and not gdrive: - continue - try: - print(str(total) + "/" + str(len(all_urls)) + "\t" + - fullpath) - dl.urlretrieve(dl_url, fullpath, - reporthook=dl_progress) - if gdrive: - upload = drive.CreateFile({'title': file_name, - 'parents': [{ - 'id': staging_folder - }]}) - upload.SetContentFile(fullpath) - upload.Upload() - remove(fullpath) - print() - except Exception as e: - print(e) - if not isfile(base_path + "failed.txt"): - with open("failed.txt", "w"): - pass - with open("failed.txt", "a") as failed_file: - failed_file.write(url[0] + " - " + url[1]) - print(file_name + ": FAILED!") + for streamer, b_id in b_ids.items(): + print(f"\n\tStreamer: {streamer}\n") + total = 0 + pagination = None + all_urls = [] + datestring = start.strftime("%a, %Y/%B/%d") + + while pagination != "DONE": + last_pagination = pagination + new_urls, pagination = get_urls(twitch=twitch, + start=start, + end=start + timedelta(days=1), + b_id=b_id, + pagination=pagination, + clippers=args.clippers, + categories=args.categories, + regex=args.regex, + flags=[re.I] if + args.case_insensitive else []) + all_urls += new_urls + print(f"Clips created on {datestring}: " + str(len(all_urls)), + end="\r") + + print(f"Clips created on {datestring}: " + str(len(all_urls))) + base_path = pjoin(filedir, "clips", streamer) + if not isdir(base_path): + makedirs(base_path, exist_ok=True) + exist_clips = listdir(base_path) + exist_ids = [filename.split(" _ ")[-1] for filename in exist_clips] + + for url in all_urls: + total += 1 + dl_url = url[1] + file_name = url[0] + ".mp4" + clip_id = file_name.split(" _ ")[-1] + if sys.platform.startswith("win"): + file_name = file_name.strip().replace(" ", "_") + file_name = re.sub(r'(?u)[^-\w.]', "", file_name) + fullpath = pjoin(base_path, file_name) + if gdrive and clip_id in files: + continue + elif clip_id in exist_ids and not gdrive: + continue + try: + print(str(total) + "/" + str(len(all_urls)) + "\t" + + fullpath) + dl.urlretrieve(dl_url, fullpath, + reporthook=dl_progress) + if gdrive: + upload = drive.CreateFile({'title': file_name, + 'parents': [{ + 'id': staging_folder + }]}) + upload.SetContentFile(fullpath) + upload.Upload() + remove(fullpath) + print() + except Exception as e: + print(e) + if not isfile(base_path + "failed.txt"): + with open("failed.txt", "w"): + pass + with open("failed.txt", "a") as failed_file: + failed_file.write(url[0] + " - " + url[1]) + print(file_name + ": FAILED!") start += timedelta(days=1)