From 59ea42ccdbde26c2c8837645b7f7f63d8049ac1e Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 5 Dec 2024 13:24:57 -0500 Subject: [PATCH] Add tool to convert a list of films into directors --- Makefile | 7 ++-- pyproject.toml | 1 + src/letsrolld/cmd/get_directors.py | 53 ++++++++++++++++++++++++++++++ src/letsrolld/director.py | 23 ------------- 4 files changed, 59 insertions(+), 25 deletions(-) create mode 100644 src/letsrolld/cmd/get_directors.py diff --git a/Makefile b/Makefile index 7909197..a5da576 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ DIRECTORS_FILE?=directors.csv RUN_LOG?=run.log RUN_LOG_CMD?=ts | tee -a $(RUN_LOG) -.PHONY: install lint test populate run-update-directors run-update-films run-update-offers run-cleanup run-all run-db-upgrade webapp ui swagger swagger-py swagger-js swagger-ts swagger-all get-dirs get-films +.PHONY: install lint test populate-directors run-update-directors run-update-films run-update-offers run-cleanup run-all run-db-upgrade webapp ui swagger swagger-py swagger-js swagger-ts swagger-all get-dirs get-films install: pdm install -vd @@ -16,7 +16,10 @@ lint: install swagger test: lint pdm run pytest -populate: +get-directors: + pdm run get-directors -i ./data/lists/everything.csv -o ./directors.csv | $(RUN_LOG_CMD) + +populate-directors: pdm run populate-directors -d ${DIRECTORS_FILE} -n ${DIRECTORS_NUMBER} run-update-directors: diff --git a/pyproject.toml b/pyproject.toml index 240a729..5b3346d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ build-backend = "pdm.backend" distribution = true [project.scripts] +get-directors = "letsrolld.cmd.get_directors:main" populate-directors = "letsrolld.cmd.populate_directors:main" update-directors = "letsrolld.cmd.update:directors_main" update-films = "letsrolld.cmd.update:films_main" diff --git a/src/letsrolld/cmd/get_directors.py b/src/letsrolld/cmd/get_directors.py new file mode 100644 index 0000000..bf13587 --- /dev/null +++ b/src/letsrolld/cmd/get_directors.py @@ -0,0 +1,53 @@ +import argparse +import csv +import sys +import time +import traceback + +from letsrolld import film +from letsrolld import filmlist + +_SEC_WAIT_ON_FAIL = 5 + + +def get_directors_by_films(film_list): + film_list = film_list[:] + + directors = {} + for i, film_ in enumerate(film_list): + movie = film.Film(film_.uri) + while True: + try: + for director in movie.directors: + if director.base_url not in directors: + directors[director.base_url] = director + yield director + break + except Exception as e: + # TODO: deduplicate error handling with update script + traceback.print_exception(e) + print(f"Retrying in {_SEC_WAIT_ON_FAIL} seconds...") + time.sleep(_SEC_WAIT_ON_FAIL) + continue + print(f"Processed {i + 1}/{len(film_list)} films") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input", help="input movie list file", required=True) + parser.add_argument( + "-o", "--output", help="output director list file", required=True + ) + args = parser.parse_args() + + film_list = list(filmlist.read_film_list(args.input)) + + with open(args.output, "w", newline="") as csvfile: + writer = csv.writer(csvfile, dialect=csv.unix_dialect) + writer.writerow(["Name", "Letterboxd URI"]) + + for i, director_ in enumerate(get_directors_by_films(film_list), start=1): + print(f"Adding director #{i}: {director_.name}") + sys.stdout.flush() + writer.writerow([director_.name, director_.base_url]) + csvfile.flush() diff --git a/src/letsrolld/director.py b/src/letsrolld/director.py index 0bb8ce9..ab3d6a6 100644 --- a/src/letsrolld/director.py +++ b/src/letsrolld/director.py @@ -1,6 +1,5 @@ import functools import os.path -import random from letsrolld.base import BaseObject from letsrolld import film @@ -37,25 +36,3 @@ def film_urls(self): def films(self): for url in self.film_urls: yield film.Film(url) - - -def get_directors_by_films(film_list): - film_list = film_list[:] - random.shuffle(film_list) - - directors = {} - for film_ in film_list: - movie = film.Film(film_.uri) - for director in movie.directors: - if director.base_url not in directors: - directors[director.base_url] = director - yield director - - -def get_directors_by_urls(director_list): - director_list = director_list[:] - random.shuffle(director_list) - - for director_ in director_list: - # assume unique entries in the input list - yield Director(director_.uri)