Skip to content

Commit

Permalink
Merge pull request #163 from booxter/dirs
Browse files Browse the repository at this point in the history
Ignore known movies from db when fetching directors
  • Loading branch information
booxter authored Dec 6, 2024
2 parents 840d555 + 40dbd95 commit 020786d
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 3 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ test: lint

# One can use e.g. https://letterboxd.com/hershwin/list/all-the-movies/ as the base list
fetch-directors:
pdm run fetch-directors --new-only -i ./data/lists/everything.csv -o ${DIRECTORS_FILE} | $(RUN_LOG_CMD)

fetch-directors-all:
pdm run fetch-directors -i ./data/lists/everything.csv -o ${DIRECTORS_FILE} | $(RUN_LOG_CMD)

populate-directors:
Expand Down
34 changes: 31 additions & 3 deletions src/letsrolld/cmd/fetch_directors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
import time
import traceback

from sqlalchemy.orm import sessionmaker

from letsrolld import db
from letsrolld.db import models
from letsrolld import film
from letsrolld import filmlist
from letsrolld.directorlist import read_director_list

# TODO: deduplicate error handling with update script
_SEC_WAIT_ON_FAIL = 5


Expand All @@ -25,14 +30,29 @@ def get_directors_by_films(film_list):
directors[director.base_url] = director
yield director
break
except (
Exception
) as e: # TODO: deduplicate error handling with update script
except Exception as e:
traceback.print_exception(e)
print(f"Retrying in {_SEC_WAIT_ON_FAIL} seconds...")
sys.stdout.flush()
time.sleep(_SEC_WAIT_ON_FAIL)
continue
print(f"Processed {i + 1}/{len(film_list)} films")
sys.stdout.flush()


def is_known_film(film_):
session = sessionmaker(bind=db.create_engine())()
film = (
session.query(models.Film)
.filter(models.Film.title == film_.name)
.filter(models.Film.year == film_.year)
.first()
)
if film is not None:
print(f"Skipping known film: {film_.name} ({film_.year})")
sys.stdout.flush()
return True
return False


def main():
Expand All @@ -41,9 +61,17 @@ def main():
parser.add_argument(
"-o", "--output", help="output director list file", required=True
)
parser.add_argument(
"-N",
"--new-only",
action="store_true",
help="whether to ignore (probably) known movies",
)
args = parser.parse_args()

film_list = list(filmlist.read_film_list(args.input))
if args.new_only:
film_list = [f for f in film_list if not is_known_film(f)]

directors = set()
if os.path.exists(args.output):
Expand Down

0 comments on commit 020786d

Please sign in to comment.