Skip to content

Commit

Permalink
Merge pull request #34 from dataforgoodfr/udpate-loader
Browse files Browse the repository at this point in the history
Update movies data loader
  • Loading branch information
kaaloo authored Apr 29, 2024
2 parents b87f679 + acd2201 commit 50631bc
Show file tree
Hide file tree
Showing 23 changed files with 331 additions and 197 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,19 @@ https://observatoire-des-imaginaires.observablehq.cloud/questionnaire
This repo includes invoke for pythonic task execution. To see the
is of available tasks you can run:

```bash
invoke -l
```

###

To run the observable site in development mode you can run:

```bash
invoke dev
```

# Updating the Movie Database
### Updating the Movie Database

The [French regional TMDB Movies Dataset](https://huggingface.co/datasets/DataForGood/observatoire_des_imaginaires_movies)
on Hugging Face can be updated using the following command:
Expand Down
2 changes: 1 addition & 1 deletion site-observable/.gitignore → observable/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.DS_Store
dist/
docs/.observablehq/cache/
src/.observablehq/cache/
node_modules/
yarn-error.log

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// See https://observablehq.com/framework/config for documentation.
export default {
root: "src",
// The project’s title; used in the sidebar and webpage titles.
title: "L'observatoire des imaginaires",
sidebar: false,
Expand Down
2 changes: 1 addition & 1 deletion site-observable/package.json → observable/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"observable": "observable"
},
"dependencies": {
"@observablehq/framework": "1.5.1",
"@observablehq/framework": "1.7.0",
"d3-dsv": "^3.0.1",
"d3-time-format": "^4.1.0"
},
Expand Down
File renamed without changes.
79 changes: 79 additions & 0 deletions observable/src/data/films.sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import sqlite3
import tempfile
from datetime import datetime

from observatoire.tmdb.movies.hf import load_movies_dataset

# Load the dataset
df = load_movies_dataset()

# Remove adult movies
df = df[df["adult"] == False] # noqa: E712

# Remove documentaries
df = df[df["genres"].str.contains("Documentary") == False] # noqa: E712

# Remove movies with a future release date
now = datetime.now().strftime("%Y-%m-%d")
df = df[df["release_date"] < now]

# Remove movies with no known revenue
# and original_language other than EU languages
df = df[
(df["revenue"] == 0)
& (
df["original_language"].isin(
[
"cs",
"da",
"de",
"en",
"es",
"et",
"fi",
"fr",
"hr",
"hu",
"is",
"it",
"lt",
"lv",
"nl",
"no",
"pl",
"pt",
"ro",
"sl",
"sv",
],
)
)
| (df["revenue"] > 0)
]

# Add a column with the production_year based on the release_date
df["production_year"] = df["release_date"].str[:4]

# Select the columns we want
df = df[
[
"id",
"title",
"original_title",
"production_year",
"poster_path",
]
]

# Set original title to blank string if same as title
df["original_title"] = df["original_title"].where(df["title"] != df["original_title"], "")

# Save the dataframe to a SQLite database
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as temp_file:
temp_filename = temp_file.name
with sqlite3.connect(temp_filename) as conn:
df.to_sql("films", conn, index=False)

# Print db file to stdout
os.system(f"cat {temp_filename}")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
121 changes: 114 additions & 7 deletions site-observable/yarn.lock → observable/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -261,15 +261,23 @@
wrap-ansi "^8.1.0"
wrap-ansi-cjs "npm:wrap-ansi@^7.0.0"

"@observablehq/[email protected]":
version "1.5.1"
resolved "https://registry.yarnpkg.com/@observablehq/framework/-/framework-1.5.1.tgz#35c0713693be42e55d71347294d26b45c9f549e3"
integrity sha512-nZaZ3/2xM0NUkahna5SGy1KaYf/4QmaBf9f7Gabcwzi/8pxRD8fwy3ffmW947C6ejNJAmIftSfPcq7mnb7S2OA==
"@jridgewell/sourcemap-codec@^1.4.15":
version "1.4.15"
resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz#d7c6e6755c78567a951e04ab52ef0fd26de59f32"
integrity sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==

"@observablehq/[email protected]":
version "1.7.0"
resolved "https://registry.yarnpkg.com/@observablehq/framework/-/framework-1.7.0.tgz#170e82dac37cc9e84c63b29cedcc742fb072f197"
integrity sha512-2+2+ZXyAyv9Z+DPlIZqqbXU+6aFXoeOoTy8BHAEnUnQSFMsfGis2tUZYZVLpm6TTY8182bVJYHTZ7gkoHvPCNg==
dependencies:
"@clack/prompts" "^0.7.0"
"@observablehq/inputs" "^0.10.6"
"@observablehq/runtime" "^5.9.4"
"@rollup/plugin-commonjs" "^25.0.7"
"@rollup/plugin-json" "^6.1.0"
"@rollup/plugin-node-resolve" "^15.2.3"
"@rollup/plugin-virtual" "^3.0.2"
acorn "^8.11.2"
acorn-walk "^8.3.0"
ci-info "^4.0.0"
Expand All @@ -290,6 +298,7 @@
mime "^4.0.0"
minisearch "^6.3.0"
open "^10.1.0"
pkg-dir "^8.0.0"
rollup "^4.6.0"
rollup-plugin-esbuild "^6.1.0"
semver "^7.5.4"
Expand Down Expand Up @@ -337,6 +346,25 @@
resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33"
integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==

"@rollup/plugin-commonjs@^25.0.7":
version "25.0.7"
resolved "https://registry.yarnpkg.com/@rollup/plugin-commonjs/-/plugin-commonjs-25.0.7.tgz#145cec7589ad952171aeb6a585bbeabd0fd3b4cf"
integrity sha512-nEvcR+LRjEjsaSsc4x3XZfCCvZIaSMenZu/OiwOKGN2UhQpAYI7ru7czFvyWbErlpoGjnSX3D5Ch5FcMA3kRWQ==
dependencies:
"@rollup/pluginutils" "^5.0.1"
commondir "^1.0.1"
estree-walker "^2.0.2"
glob "^8.0.3"
is-reference "1.2.1"
magic-string "^0.30.3"

"@rollup/plugin-json@^6.1.0":
version "6.1.0"
resolved "https://registry.yarnpkg.com/@rollup/plugin-json/-/plugin-json-6.1.0.tgz#fbe784e29682e9bb6dee28ea75a1a83702e7b805"
integrity sha512-EGI2te5ENk1coGeADSIwZ7G2Q8CJS2sF120T7jLw4xFw9n7wIOXHo+kIYRAoVpJAN+kmqZSoO3Fp4JtoNF4ReA==
dependencies:
"@rollup/pluginutils" "^5.1.0"

"@rollup/plugin-node-resolve@^15.2.3":
version "15.2.3"
resolved "https://registry.yarnpkg.com/@rollup/plugin-node-resolve/-/plugin-node-resolve-15.2.3.tgz#e5e0b059bd85ca57489492f295ce88c2d4b0daf9"
Expand All @@ -349,7 +377,12 @@
is-module "^1.0.0"
resolve "^1.22.1"

"@rollup/pluginutils@^5.0.1", "@rollup/pluginutils@^5.0.5":
"@rollup/plugin-virtual@^3.0.2":
version "3.0.2"
resolved "https://registry.yarnpkg.com/@rollup/plugin-virtual/-/plugin-virtual-3.0.2.tgz#17e17eeecb4c9fa1c0a6e72c9e5f66382fddbb82"
integrity sha512-10monEYsBp3scM4/ND4LNH5Rxvh3e/cVeL3jWTgZ2SrQ+BmUoQcopVQvnaMcOnykb1VkxUFuDAN+0FnpTFRy2A==

"@rollup/pluginutils@^5.0.1", "@rollup/pluginutils@^5.0.5", "@rollup/pluginutils@^5.1.0":
version "5.1.0"
resolved "https://registry.yarnpkg.com/@rollup/pluginutils/-/pluginutils-5.1.0.tgz#7e53eddc8c7f483a4ad0b94afb1f7f5fd3c771e0"
integrity sha512-XTIWOPPcpvyKI6L1NHo0lFlCyznUEyPmPY1mc3KpPVDYulHSTvyeLNVW00QTLIAFNhR3kYnJTQHeGqU4M3n09g==
Expand Down Expand Up @@ -423,7 +456,7 @@
resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.12.1.tgz#cd8d175e001c212d5ac71c7827ef1d5c5e14494c"
integrity sha512-n+vkrSyphvmU0qkQ6QBNXCGr2mKjhP08mPRM/Xp5Ck2FV4NrHU+y6axzDeixUrCBHVUS51TZhjqrKBBsHLKb2Q==

"@types/[email protected]", "@types/estree@^1.0.0":
"@types/estree@*", "@types/estree@1.0.5", "@types/estree@^1.0.0":
version "1.0.5"
resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4"
integrity sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==
Expand Down Expand Up @@ -552,6 +585,11 @@ commander@7:
resolved "https://registry.yarnpkg.com/commander/-/commander-7.2.0.tgz#a36cb57d0b501ce108e4d20559a150a391d97ab7"
integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==

commondir@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/commondir/-/commondir-1.0.1.tgz#ddd800da0c66127393cca5950ea968a3aaf1253b"
integrity sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg==

core-util-is@~1.0.0:
version "1.0.3"
resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.3.tgz#a6042d3634c2b27e9328f837b965fac83808db85"
Expand Down Expand Up @@ -815,6 +853,11 @@ fast-fifo@^1.1.0, fast-fifo@^1.2.0:
resolved "https://registry.yarnpkg.com/fast-fifo/-/fast-fifo-1.3.2.tgz#286e31de96eb96d38a97899815740ba2a4f3640c"
integrity sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==

find-up-simple@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/find-up-simple/-/find-up-simple-1.0.0.tgz#21d035fde9fdbd56c8f4d2f63f32fd93a1cfc368"
integrity sha512-q7Us7kcjj2VMePAa02hDAF6d+MzsdsAWEwYyOpwUtlerRBkOEPBCRZrAV4XfcSN8fHAgaD0hP7miwoay6DCprw==

foreground-child@^3.1.0:
version "3.1.1"
resolved "https://registry.yarnpkg.com/foreground-child/-/foreground-child-3.1.1.tgz#1d173e776d75d2772fed08efe4a0de1ea1b12d0d"
Expand All @@ -837,6 +880,11 @@ [email protected]:
resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7"
integrity sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==

fs.realpath@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==

fsevents@~2.3.2, fsevents@~2.3.3:
version "2.3.3"
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6"
Expand Down Expand Up @@ -870,6 +918,17 @@ glob@^10.3.7:
minipass "^5.0.0 || ^6.0.2 || ^7.0.0"
path-scurry "^1.10.1"

glob@^8.0.3:
version "8.1.0"
resolved "https://registry.yarnpkg.com/glob/-/glob-8.1.0.tgz#d388f656593ef708ee3e34640fdfb99a9fd1c33e"
integrity sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==
dependencies:
fs.realpath "^1.0.0"
inflight "^1.0.4"
inherits "2"
minimatch "^5.0.1"
once "^1.3.0"

gray-matter@^4.0.3:
version "4.0.3"
resolved "https://registry.yarnpkg.com/gray-matter/-/gray-matter-4.0.3.tgz#e893c064825de73ea1f5f7d88c7a9f7274288798"
Expand Down Expand Up @@ -948,7 +1007,15 @@ immediate@~3.0.5:
resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b"
integrity sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==

[email protected], inherits@~2.0.3:
inflight@^1.0.4:
version "1.0.6"
resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
integrity sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==
dependencies:
once "^1.3.0"
wrappy "1"

inherits@2, [email protected], inherits@~2.0.3:
version "2.0.4"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
Expand Down Expand Up @@ -1004,6 +1071,13 @@ is-potential-custom-element-name@^1.0.1:
resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz#171ed6f19e3ac554394edf78caa05784a45bebb5"
integrity sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==

[email protected]:
version "1.2.1"
resolved "https://registry.yarnpkg.com/is-reference/-/is-reference-1.2.1.tgz#8b2dac0b371f4bc994fdeaba9eb542d03002d0b7"
integrity sha512-U82MsXXiFIrjCK4otLT+o2NA2Cd2g5MLoOVXUZjIOhLurrRxpEXzI8O0KZHr3IjLvlAH1kTPYSuqer5T9ZVBKQ==
dependencies:
"@types/estree" "*"

is-wsl@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-3.1.0.tgz#e1c657e39c10090afcbedec61720f6b924c3cbd2"
Expand Down Expand Up @@ -1111,6 +1185,13 @@ lru-cache@^6.0.0:
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.2.0.tgz#0bd445ca57363465900f4d1f9bd8db343a4d95c3"
integrity sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q==

magic-string@^0.30.3:
version "0.30.10"
resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.10.tgz#123d9c41a0cb5640c892b041d4cfb3bd0aa4b39e"
integrity sha512-iIRwTIf0QKV3UAnYK4PU8uiEc4SRh5jX0mwpIwETPpHdhVM4f53RSwS/vXvN1JhGX+Cs7B8qIq3d6AH49O5fAQ==
dependencies:
"@jridgewell/sourcemap-codec" "^1.4.15"

markdown-it-anchor@^8.6.7:
version "8.6.7"
resolved "https://registry.yarnpkg.com/markdown-it-anchor/-/markdown-it-anchor-8.6.7.tgz#ee6926daf3ad1ed5e4e3968b1740eef1c6399634"
Expand Down Expand Up @@ -1155,6 +1236,13 @@ mime@^4.0.0:
resolved "https://registry.yarnpkg.com/mime/-/mime-4.0.1.tgz#ad7563d1bfe30253ad97dedfae2b1009d01b9470"
integrity sha512-5lZ5tyrIfliMXzFtkYyekWbtRXObT9OWa8IwQ5uxTBDHucNNwniRqo0yInflj+iYi5CBa6qxadGzGarDfuEOxA==

minimatch@^5.0.1:
version "5.1.6"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.6.tgz#1cfcb8cf5522ea69952cd2af95ae09477f122a96"
integrity sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==
dependencies:
brace-expansion "^2.0.1"

minimatch@^9.0.1:
version "9.0.3"
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.3.tgz#a6e00c3de44c3a542bfaae70abfc22420a6da825"
Expand Down Expand Up @@ -1199,6 +1287,13 @@ [email protected]:
dependencies:
ee-first "1.1.1"

once@^1.3.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==
dependencies:
wrappy "1"

open@^10.1.0:
version "10.1.0"
resolved "https://registry.yarnpkg.com/open/-/open-10.1.0.tgz#a7795e6e5d519abe4286d9937bb24b51122598e1"
Expand Down Expand Up @@ -1249,6 +1344,13 @@ picomatch@^2.3.1:
resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42"
integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==

pkg-dir@^8.0.0:
version "8.0.0"
resolved "https://registry.yarnpkg.com/pkg-dir/-/pkg-dir-8.0.0.tgz#8f3de8ba83d46b72a05c80bfd4e579f060fa91e2"
integrity sha512-4peoBq4Wks0riS0z8741NVv+/8IiTvqnZAr8QGgtdifrtpdXbNw/FxRS1l6NFqm4EMzuS0EDqNNx4XGaz8cuyQ==
dependencies:
find-up-simple "^1.0.0"

process-nextick-args@~2.0.0:
version "2.0.1"
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
Expand Down Expand Up @@ -1673,6 +1775,11 @@ wrap-ansi@^9.0.0:
string-width "^7.0.0"
strip-ansi "^7.1.0"

wrappy@1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==

ws@^8.14.2, ws@^8.16.0:
version "8.16.0"
resolved "https://registry.yarnpkg.com/ws/-/ws-8.16.0.tgz#d1cd774f36fbc07165066a60e40323eab6446fd4"
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from tqdm import tqdm

from observatoire.tmdb.config import TMDB_BATCH_SIZE
from observatoire.tmdb.data import transform_movie_json
from observatoire.tmdb.helpers import merge
from observatoire.tmdb.hf import load_movies_dataset, save_movies_dataset
from observatoire.tmdb.logger import setup_logger
from observatoire.tmdb.tmdb import get_latest_movie_id, get_movie_data
from observatoire.tmdb.movies.data import transform_movie_json
from observatoire.tmdb.movies.helpers import merge
from observatoire.tmdb.movies.hf import load_movies_dataset, save_movies_dataset
from observatoire.tmdb.movies.tmdb import get_latest_movie_id, get_movie_data


def executor() -> None:
Expand Down
File renamed without changes.
Loading

0 comments on commit 50631bc

Please sign in to comment.