Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore known movies from db when fetching directors #163

Merged
merged 2 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ test: lint

# One can use e.g. https://letterboxd.com/hershwin/list/all-the-movies/ as the base list
fetch-directors:
pdm run fetch-directors --new-only -i ./data/lists/everything.csv -o ${DIRECTORS_FILE} | $(RUN_LOG_CMD)

fetch-directors-all:
pdm run fetch-directors -i ./data/lists/everything.csv -o ${DIRECTORS_FILE} | $(RUN_LOG_CMD)

populate-directors:
Expand Down
34 changes: 31 additions & 3 deletions src/letsrolld/cmd/fetch_directors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
import time
import traceback

from sqlalchemy.orm import sessionmaker

from letsrolld import db
from letsrolld.db import models
from letsrolld import film
from letsrolld import filmlist
from letsrolld.directorlist import read_director_list

# TODO: deduplicate error handling with update script
_SEC_WAIT_ON_FAIL = 5


Expand All @@ -25,14 +30,29 @@ def get_directors_by_films(film_list):
directors[director.base_url] = director
yield director
break
except (
Exception
) as e: # TODO: deduplicate error handling with update script
except Exception as e:
traceback.print_exception(e)
print(f"Retrying in {_SEC_WAIT_ON_FAIL} seconds...")
sys.stdout.flush()
time.sleep(_SEC_WAIT_ON_FAIL)
continue
print(f"Processed {i + 1}/{len(film_list)} films")
sys.stdout.flush()


def is_known_film(film_):
session = sessionmaker(bind=db.create_engine())()
film = (
session.query(models.Film)
.filter(models.Film.title == film_.name)
.filter(models.Film.year == film_.year)
.first()
)
if film is not None:
print(f"Skipping known film: {film_.name} ({film_.year})")
sys.stdout.flush()
return True
return False


def main():
Expand All @@ -41,9 +61,17 @@ def main():
parser.add_argument(
"-o", "--output", help="output director list file", required=True
)
parser.add_argument(
"-N",
"--new-only",
action="store_true",
help="whether to ignore (probably) known movies",
)
args = parser.parse_args()

film_list = list(filmlist.read_film_list(args.input))
if args.new_only:
film_list = [f for f in film_list if not is_known_film(f)]

directors = set()
if os.path.exists(args.output):
Expand Down
Loading