Skip to content

Commit

Permalink
Merge pull request #4 from Fittiboy/multi
Browse files Browse the repository at this point in the history
Allow filtering by multiple streamers + categories + clippers
  • Loading branch information
Fittiboy authored Jul 1, 2021
2 parents 8014d7d + b2c8074 commit 52371a9
Showing 1 changed file with 97 additions and 84 deletions.
181 changes: 97 additions & 84 deletions clipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,14 @@ def get_gdrive_files(credentials, clips, staging):


def get_urls(twitch, start, end, b_id, pagination=None,
clipper=None, category=None, regex=None,
clippers=None, categories=None, regex=None,
flags=[]):

clips_list = []
clippers = [clipper.lower() for clipper in clippers] if clippers else None
categories = [
category.lower() for category in categories
] if categories else None
global game_ids

clips = twitch.get_clips(broadcaster_id=b_id, first=100,
Expand All @@ -75,8 +79,8 @@ def get_urls(twitch, start, end, b_id, pagination=None,
title = clip["created_at"] + " _ " + game + " _ " + c_title
title += " _ " + creator + " _ " + clip["id"]
if (
(clipper and clipper.lower() != creator.lower()) or
(category and category.lower() != game.lower()) or
(clippers and creator.lower() not in clippers) or
(categories and game.lower() not in categories) or
(regex and not re.search(regex, c_title, *flags))
):
pass
Expand All @@ -96,8 +100,9 @@ def dl_progress(count, block_size, total_size):

if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("streamer",
help="name of the streamer to pull clips from",
parser.add_argument("streamers",
help="names of the streamers to pull clips from",
nargs="+",
type=str)
parser.add_argument("--start_date",
help="first day to start looking "
Expand Down Expand Up @@ -125,16 +130,18 @@ def dl_progress(count, block_size, total_size):
help="store clips locally (only necessary "
"if credentials.txt for Google Drive is present)",
action="store_true")
parser.add_argument("--clipper",
help="only download clips made by this person",
metavar="username",
parser.add_argument("--clippers",
help="only download clips made by these accounts",
metavar="usernames",
nargs="*",
type=str)
parser.add_argument("--category",
help="only download clips from this category/game "
parser.add_argument("--categories",
help="only download clips from these categorys/games "
"(some non-game categories like Just Chatting "
"don't get reported by the API, type \"NOGAME\" "
"for these if you notice they're missing)",
metavar="game",
metavar="games",
nargs="*",
type=str)
parser.add_argument("--regex",
help="only download clips matching the regular "
Expand Down Expand Up @@ -192,26 +199,30 @@ def dl_progress(count, block_size, total_size):
raise FileNotFoundError(e_msg)

game_ids = {}
b_ids = {}
start = None

twitch = Twitch(t_id, t_t)
twitch.authenticate_app([])
try:
streamer = twitch.get_users(logins=args.streamer)["data"][0]
except IndexError:
raise Exception("Streamer not found!")
b_id = streamer["id"]
for streamer in args.streamers:
try:
_streamer = twitch.get_users(logins=streamer)["data"][0]
year, month, day = _streamer["created_at"].split("-")
day = day.split("T")[0]
new_start = datetime(*map(int, [year, month, day]))
if not start or new_start < start:
start = new_start
except IndexError:
raise Exception("Streamer not found: " + streamer)
b_ids[streamer] = _streamer["id"]

if args.start_date:
try:
year, month, day = [int(num) for num in args.start_date.split("/")]
start = datetime(year, month, day)
except Exception:
raise Exception("Please provice a correct start date in the " +
"format YYYY/MM/DD")
else:
year, month, day = streamer["created_at"].split("-")
day = day.split("T")[0]
year, month, day = [int(num) for num in [year, month, day]]
start = datetime(year, month, day)

if args.end_date:
try:
Expand All @@ -235,68 +246,70 @@ def dl_progress(count, block_size, total_size):
"same settings.")
break

all_urls = []
pagination = None
total = 0
datestring = start.strftime("%a, %Y/%B/%d")

while pagination != "DONE":
last_pagination = pagination
new_urls, pagination = get_urls(twitch=twitch,
start=start,
end=start + timedelta(days=1),
b_id=b_id,
pagination=pagination,
clipper=args.clipper,
category=args.category,
regex=args.regex,
flags=[re.I] if
args.case_insensitive else [])
all_urls += new_urls
print(f"Clips created on {datestring}: " + str(len(all_urls)),
end="\r")

print(f"Clips created on {datestring}: " + str(len(all_urls)))
base_path = pjoin(filedir, "clips", args.streamer)
if not isdir(base_path):
makedirs(base_path, exist_ok=True)
exist_clips = listdir(base_path)
exist_ids = [filename.split(" _ ")[-1] for filename in exist_clips]

for url in all_urls:
total += 1
dl_url = url[1]
file_name = url[0] + ".mp4"
clip_id = file_name.split(" _ ")[-1]
if sys.platform.startswith("win"):
file_name = file_name.strip().replace(" ", "_")
file_name = re.sub(r'(?u)[^-\w.]', "", file_name)
fullpath = pjoin(base_path, file_name)
if gdrive and clip_id in files:
continue
elif clip_id in exist_ids and not gdrive:
continue
try:
print(str(total) + "/" + str(len(all_urls)) + "\t" +
fullpath)
dl.urlretrieve(dl_url, fullpath,
reporthook=dl_progress)
if gdrive:
upload = drive.CreateFile({'title': file_name,
'parents': [{
'id': staging_folder
}]})
upload.SetContentFile(fullpath)
upload.Upload()
remove(fullpath)
print()
except Exception as e:
print(e)
if not isfile(base_path + "failed.txt"):
with open("failed.txt", "w"):
pass
with open("failed.txt", "a") as failed_file:
failed_file.write(url[0] + " - " + url[1])
print(file_name + ": FAILED!")
for streamer, b_id in b_ids.items():
print(f"\n\tStreamer: {streamer}\n")
total = 0
pagination = None
all_urls = []
datestring = start.strftime("%a, %Y/%B/%d")

while pagination != "DONE":
last_pagination = pagination
new_urls, pagination = get_urls(twitch=twitch,
start=start,
end=start + timedelta(days=1),
b_id=b_id,
pagination=pagination,
clippers=args.clippers,
categories=args.categories,
regex=args.regex,
flags=[re.I] if
args.case_insensitive else [])
all_urls += new_urls
print(f"Clips created on {datestring}: " + str(len(all_urls)),
end="\r")

print(f"Clips created on {datestring}: " + str(len(all_urls)))
base_path = pjoin(filedir, "clips", streamer)
if not isdir(base_path):
makedirs(base_path, exist_ok=True)
exist_clips = listdir(base_path)
exist_ids = [filename.split(" _ ")[-1] for filename in exist_clips]

for url in all_urls:
total += 1
dl_url = url[1]
file_name = url[0] + ".mp4"
clip_id = file_name.split(" _ ")[-1]
if sys.platform.startswith("win"):
file_name = file_name.strip().replace(" ", "_")
file_name = re.sub(r'(?u)[^-\w.]', "", file_name)
fullpath = pjoin(base_path, file_name)
if gdrive and clip_id in files:
continue
elif clip_id in exist_ids and not gdrive:
continue
try:
print(str(total) + "/" + str(len(all_urls)) + "\t" +
fullpath)
dl.urlretrieve(dl_url, fullpath,
reporthook=dl_progress)
if gdrive:
upload = drive.CreateFile({'title': file_name,
'parents': [{
'id': staging_folder
}]})
upload.SetContentFile(fullpath)
upload.Upload()
remove(fullpath)
print()
except Exception as e:
print(e)
if not isfile(base_path + "failed.txt"):
with open("failed.txt", "w"):
pass
with open("failed.txt", "a") as failed_file:
failed_file.write(url[0] + " - " + url[1])
print(file_name + ": FAILED!")

start += timedelta(days=1)

0 comments on commit 52371a9

Please sign in to comment.