From 2d8c3c72635ac6810de948003c0cf7448a8fafd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodrigo=20Gir=C3=A3o=20Serr=C3=A3o?= <5621605+rodrigogiraoserrao@users.noreply.github.com> Date: Thu, 10 Oct 2024 09:44:09 +0100 Subject: [PATCH] Download data in the Python script. I was downloading the data in a separate hook but there is a test that just runs all of the Python scripts in src/python and apparently it's not trivial to get the data download to trigger just once, when the appropriate Python script test is running, so it's just easier to move the data download to the script that actually uses it. We also add a comment to the Rust script to direct users to the Python script for the data location. --- docs/source/_build/scripts/prep_data.py | 28 ------------------- .../user-guide/transformations/joins.py | 25 +++++++++++++++++ .../rust/user-guide/transformations/joins.rs | 3 ++ mkdocs.yml | 1 - 4 files changed, 28 insertions(+), 29 deletions(-) delete mode 100644 docs/source/_build/scripts/prep_data.py diff --git a/docs/source/_build/scripts/prep_data.py b/docs/source/_build/scripts/prep_data.py deleted file mode 100644 index 71e949671027..000000000000 --- a/docs/source/_build/scripts/prep_data.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Downloads data once when serving the docs so that subsequent -subsequent rebuilds do not have to access remote resources again. -""" - -import requests - - -DATA = [ - ( - "https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_groups.csv", - "docs/assets/data/monopoly_props_groups.csv", - ), - ( - "https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_prices.csv", - "docs/assets/data/monopoly_props_prices.csv", - ), -] - - -for url, dest in DATA: - with open(dest, "wb") as f: - try: - f.write(requests.get(url, timeout=10).content) - except Exception as e: - print(f"WARNING: failed to download file {dest} ({e})") - else: - print(f"INFO: downloaded {dest}") diff --git a/docs/source/src/python/user-guide/transformations/joins.py b/docs/source/src/python/user-guide/transformations/joins.py index e8d13c1dad43..57bb6dd81649 100644 --- a/docs/source/src/python/user-guide/transformations/joins.py +++ b/docs/source/src/python/user-guide/transformations/joins.py @@ -1,3 +1,28 @@ +import requests + + +DATA = [ + ( + "https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_groups.csv", + "docs/assets/data/monopoly_props_groups.csv", + ), + ( + "https://raw.githubusercontent.com/pola-rs/polars-static/refs/heads/master/data/monopoly_props_prices.csv", + "docs/assets/data/monopoly_props_prices.csv", + ), +] + + +for url, dest in DATA: + with open(dest, "wb") as f: + try: + f.write(requests.get(url, timeout=10).content) + except Exception as e: + print(f"WARNING: failed to download file {dest} ({e})") + else: + print(f"INFO: downloaded {dest}") + + # --8<-- [start:props_groups] import polars as pl diff --git a/docs/source/src/rust/user-guide/transformations/joins.rs b/docs/source/src/rust/user-guide/transformations/joins.rs index 621a8538afae..5d1c50f733b1 100644 --- a/docs/source/src/rust/user-guide/transformations/joins.rs +++ b/docs/source/src/rust/user-guide/transformations/joins.rs @@ -3,6 +3,9 @@ use polars::prelude::*; // --8<-- [end:setup] fn main() -> Result<(), Box> { + // NOTE: This assumes the data has been downloaded and is available. + // See the corresponding Python script for the remote location of the data. + // --8<-- [start:props_groups] let props_groups = CsvReadOptions::default() .with_has_header(true) diff --git a/mkdocs.yml b/mkdocs.yml index b128adca6502..c180bbfc6b8e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -173,7 +173,6 @@ markdown_extensions: hooks: - docs/source/_build/scripts/people.py - - docs/source/_build/scripts/prep_data.py plugins: - search: