Skip to content

Commit

Permalink
Merge pull request #159 from booxter/dirs
Browse files Browse the repository at this point in the history
Add tool to convert a list of films into directors
  • Loading branch information
booxter authored Dec 5, 2024
2 parents ce64b8b + 59ea42c commit 3ab8167
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 25 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ DIRECTORS_FILE?=directors.csv
RUN_LOG?=run.log
RUN_LOG_CMD?=ts | tee -a $(RUN_LOG)

.PHONY: install lint test populate run-update-directors run-update-films run-update-offers run-cleanup run-all run-db-upgrade webapp ui swagger swagger-py swagger-js swagger-ts swagger-all get-dirs get-films
.PHONY: install lint test populate-directors run-update-directors run-update-films run-update-offers run-cleanup run-all run-db-upgrade webapp ui swagger swagger-py swagger-js swagger-ts swagger-all get-dirs get-films

install:
pdm install -vd
Expand All @@ -16,7 +16,10 @@ lint: install swagger
test: lint
pdm run pytest

populate:
get-directors:
pdm run get-directors -i ./data/lists/everything.csv -o ./directors.csv | $(RUN_LOG_CMD)

populate-directors:
pdm run populate-directors -d ${DIRECTORS_FILE} -n ${DIRECTORS_NUMBER}

run-update-directors:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ build-backend = "pdm.backend"
distribution = true

[project.scripts]
get-directors = "letsrolld.cmd.get_directors:main"
populate-directors = "letsrolld.cmd.populate_directors:main"
update-directors = "letsrolld.cmd.update:directors_main"
update-films = "letsrolld.cmd.update:films_main"
Expand Down
53 changes: 53 additions & 0 deletions src/letsrolld/cmd/get_directors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import argparse
import csv
import sys
import time
import traceback

from letsrolld import film
from letsrolld import filmlist

_SEC_WAIT_ON_FAIL = 5


def get_directors_by_films(film_list):
film_list = film_list[:]

directors = {}
for i, film_ in enumerate(film_list):
movie = film.Film(film_.uri)
while True:
try:
for director in movie.directors:
if director.base_url not in directors:
directors[director.base_url] = director
yield director
break
except Exception as e:
# TODO: deduplicate error handling with update script
traceback.print_exception(e)
print(f"Retrying in {_SEC_WAIT_ON_FAIL} seconds...")
time.sleep(_SEC_WAIT_ON_FAIL)
continue
print(f"Processed {i + 1}/{len(film_list)} films")


def main():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", help="input movie list file", required=True)
parser.add_argument(
"-o", "--output", help="output director list file", required=True
)
args = parser.parse_args()

film_list = list(filmlist.read_film_list(args.input))

with open(args.output, "w", newline="") as csvfile:
writer = csv.writer(csvfile, dialect=csv.unix_dialect)
writer.writerow(["Name", "Letterboxd URI"])

for i, director_ in enumerate(get_directors_by_films(film_list), start=1):
print(f"Adding director #{i}: {director_.name}")
sys.stdout.flush()
writer.writerow([director_.name, director_.base_url])
csvfile.flush()
23 changes: 0 additions & 23 deletions src/letsrolld/director.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import functools
import os.path
import random

from letsrolld.base import BaseObject
from letsrolld import film
Expand Down Expand Up @@ -37,25 +36,3 @@ def film_urls(self):
def films(self):
for url in self.film_urls:
yield film.Film(url)


def get_directors_by_films(film_list):
film_list = film_list[:]
random.shuffle(film_list)

directors = {}
for film_ in film_list:
movie = film.Film(film_.uri)
for director in movie.directors:
if director.base_url not in directors:
directors[director.base_url] = director
yield director


def get_directors_by_urls(director_list):
director_list = director_list[:]
random.shuffle(director_list)

for director_ in director_list:
# assume unique entries in the input list
yield Director(director_.uri)

0 comments on commit 3ab8167

Please sign in to comment.