From 598f2895f7b3b8e05934438cd9857c74c547cb3c Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 5 Dec 2024 13:36:06 -0500 Subject: [PATCH] get-directors: append if output file is present --- src/letsrolld/cmd/get_directors.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/letsrolld/cmd/get_directors.py b/src/letsrolld/cmd/get_directors.py index bf13587..26bb84a 100644 --- a/src/letsrolld/cmd/get_directors.py +++ b/src/letsrolld/cmd/get_directors.py @@ -1,11 +1,13 @@ import argparse import csv +import os import sys import time import traceback from letsrolld import film from letsrolld import filmlist +from letsrolld.directorlist import read_director_list _SEC_WAIT_ON_FAIL = 5 @@ -23,8 +25,9 @@ def get_directors_by_films(film_list): directors[director.base_url] = director yield director break - except Exception as e: - # TODO: deduplicate error handling with update script + except ( + Exception + ) as e: # TODO: deduplicate error handling with update script traceback.print_exception(e) print(f"Retrying in {_SEC_WAIT_ON_FAIL} seconds...") time.sleep(_SEC_WAIT_ON_FAIL) @@ -42,11 +45,26 @@ def main(): film_list = list(filmlist.read_film_list(args.input)) - with open(args.output, "w", newline="") as csvfile: + directors = set() + if os.path.exists(args.output): + print(f"Output file {args.output} already exists, appending to it...") + sys.stdout.flush() + + directors = {d.uri for d in read_director_list(args.output)} + + mode = "a" if directors else "w" + with open(args.output, mode, newline="") as csvfile: writer = csv.writer(csvfile, dialect=csv.unix_dialect) - writer.writerow(["Name", "Letterboxd URI"]) + if mode == "w": + writer.writerow(["Name", "Letterboxd URI"]) + else: + csvfile.seek(0, os.SEEK_END) for i, director_ in enumerate(get_directors_by_films(film_list), start=1): + if director_.base_url in directors: + print(f"Skipping director #{i}: {director_.name}") + sys.stdout.flush() + continue print(f"Adding director #{i}: {director_.name}") sys.stdout.flush() writer.writerow([director_.name, director_.base_url])