From fddbef744ae434124474e099078a5fa5358f143a Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 6 Jan 2024 09:26:17 +0100 Subject: [PATCH 1/3] Add arrow 49 support --- .github/workflows/release.yml | 60 ++++++------ .github/workflows/test.yml | 60 ++++++------ Cargo.lock | 92 ++++++++++++++---- serde_arrow/Cargo.toml | 19 +++- serde_arrow/benches/groups/impls.rs | 6 +- serde_arrow/build.rs | 3 + serde_arrow/src/lib.rs | 33 ++++--- x.py | 143 +++++++++++++++++----------- 8 files changed, 269 insertions(+), 147 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9cad8cbc..781f3e03 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,69 +32,73 @@ "name": "Check", "run": "cargo check" }, - { - "name": "Check arrow2-0-16", - "run": "cargo check --features arrow2-0-16" - }, { "name": "Check arrow2-0-17", "run": "cargo check --features arrow2-0-17" }, { - "name": "Check arrow-37", - "run": "cargo check --features arrow-37" + "name": "Check arrow2-0-16", + "run": "cargo check --features arrow2-0-16" }, { - "name": "Check arrow-38", - "run": "cargo check --features arrow-38" + "name": "Check arrow-49", + "run": "cargo check --features arrow-49" }, { - "name": "Check arrow-39", - "run": "cargo check --features arrow-39" + "name": "Check arrow-48", + "run": "cargo check --features arrow-48" }, { - "name": "Check arrow-40", - "run": "cargo check --features arrow-40" + "name": "Check arrow-47", + "run": "cargo check --features arrow-47" }, { - "name": "Check arrow-41", - "run": "cargo check --features arrow-41" + "name": "Check arrow-46", + "run": "cargo check --features arrow-46" }, { - "name": "Check arrow-42", - "run": "cargo check --features arrow-42" + "name": "Check arrow-45", + "run": "cargo check --features arrow-45" + }, + { + "name": "Check arrow-44", + "run": "cargo check --features arrow-44" }, { "name": "Check arrow-43", "run": "cargo check --features arrow-43" }, { - "name": "Check arrow-44", - "run": "cargo check --features arrow-44" + "name": "Check arrow-42", + "run": "cargo check --features arrow-42" }, { - "name": "Check arrow-45", - "run": "cargo check --features arrow-45" + "name": "Check arrow-41", + "run": "cargo check --features arrow-41" }, { - "name": "Check arrow-46", - "run": "cargo check --features arrow-46" + "name": "Check arrow-40", + "run": "cargo check --features arrow-40" }, { - "name": "Check arrow-47", - "run": "cargo check --features arrow-47" + "name": "Check arrow-39", + "run": "cargo check --features arrow-39" }, { - "name": "Check arrow-48", - "run": "cargo check --features arrow-48" + "name": "Check arrow-38", + "run": "cargo check --features arrow-38" + }, + { + "name": "Check arrow-37", + "run": "cargo check --features arrow-37" }, { "name": "Build", - "run": "cargo build --features arrow2-0-17,arrow-48" + "run": "cargo build --features arrow2-0-17,arrow-49" }, { "name": "Test", - "run": "cargo test --features arrow2-0-17,arrow-48" + "run": "cargo test --features arrow2-0-17,arrow-49" }, { "name": "Publish to crates.io", diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 963ae2e7..85e6de82 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -37,69 +37,73 @@ "name": "Check", "run": "cargo check" }, - { - "name": "Check arrow2-0-16", - "run": "cargo check --features arrow2-0-16" - }, { "name": "Check arrow2-0-17", "run": "cargo check --features arrow2-0-17" }, { - "name": "Check arrow-37", - "run": "cargo check --features arrow-37" + "name": "Check arrow2-0-16", + "run": "cargo check --features arrow2-0-16" }, { - "name": "Check arrow-38", - "run": "cargo check --features arrow-38" + "name": "Check arrow-49", + "run": "cargo check --features arrow-49" }, { - "name": "Check arrow-39", - "run": "cargo check --features arrow-39" + "name": "Check arrow-48", + "run": "cargo check --features arrow-48" }, { - "name": "Check arrow-40", - "run": "cargo check --features arrow-40" + "name": "Check arrow-47", + "run": "cargo check --features arrow-47" }, { - "name": "Check arrow-41", - "run": "cargo check --features arrow-41" + "name": "Check arrow-46", + "run": "cargo check --features arrow-46" }, { - "name": "Check arrow-42", - "run": "cargo check --features arrow-42" + "name": "Check arrow-45", + "run": "cargo check --features arrow-45" + }, + { + "name": "Check arrow-44", + "run": "cargo check --features arrow-44" }, { "name": "Check arrow-43", "run": "cargo check --features arrow-43" }, { - "name": "Check arrow-44", - "run": "cargo check --features arrow-44" + "name": "Check arrow-42", + "run": "cargo check --features arrow-42" }, { - "name": "Check arrow-45", - "run": "cargo check --features arrow-45" + "name": "Check arrow-41", + "run": "cargo check --features arrow-41" }, { - "name": "Check arrow-46", - "run": "cargo check --features arrow-46" + "name": "Check arrow-40", + "run": "cargo check --features arrow-40" }, { - "name": "Check arrow-47", - "run": "cargo check --features arrow-47" + "name": "Check arrow-39", + "run": "cargo check --features arrow-39" }, { - "name": "Check arrow-48", - "run": "cargo check --features arrow-48" + "name": "Check arrow-38", + "run": "cargo check --features arrow-38" + }, + { + "name": "Check arrow-37", + "run": "cargo check --features arrow-37" }, { "name": "Build", - "run": "cargo build --features arrow2-0-17,arrow-48" + "run": "cargo build --features arrow2-0-17,arrow-49" }, { "name": "Test", - "run": "cargo test --features arrow2-0-17,arrow-48" + "run": "cargo test --features arrow2-0-17,arrow-49" } ] } diff --git a/Cargo.lock b/Cargo.lock index add77fb5..6ecdd49a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,6 +241,22 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +dependencies = [ + "ahash", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", + "chrono", + "half 2.2.1", + "hashbrown 0.14.0", + "num", +] + [[package]] name = "arrow-buffer" version = "37.0.0" @@ -364,17 +380,29 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +dependencies = [ + "bytes", + "half 2.2.1", + "num", +] + [[package]] name = "arrow-cast" -version = "48.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af01fc1a06f6f2baf31a04776156d47f9f31ca5939fe6d00cd7a059f95a46ff1" +checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" dependencies = [ - "arrow-array 48.0.0", - "arrow-buffer 48.0.0", - "arrow-data 48.0.0", - "arrow-schema 48.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "arrow-select", + "base64", "chrono", "half 2.2.1", "lexical-core", @@ -525,6 +553,18 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-data" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +dependencies = [ + "arrow-buffer 49.0.0", + "arrow-schema 49.0.0", + "half 2.2.1", + "num", +] + [[package]] name = "arrow-format" version = "0.8.1" @@ -537,15 +577,15 @@ dependencies = [ [[package]] name = "arrow-json" -version = "48.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac346bc84846ab425ab3c8c7b6721db90643bc218939677ed7e071ccbfb919d" +checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" dependencies = [ - "arrow-array 48.0.0", - "arrow-buffer 48.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", "arrow-cast", - "arrow-data 48.0.0", - "arrow-schema 48.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "chrono", "half 2.2.1", "indexmap 2.0.0", @@ -627,17 +667,23 @@ version = "48.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d7a8c3f97f5ef6abd862155a6f39aaba36b029322462d72bbcfa69782a50614" +[[package]] +name = "arrow-schema" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" + [[package]] name = "arrow-select" -version = "48.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f868f4a5001429e20f7c1994b5cd1aa68b82e3db8cf96c559cdb56dc8be21410" +checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" dependencies = [ "ahash", - "arrow-array 48.0.0", - "arrow-buffer 48.0.0", - "arrow-data 48.0.0", - "arrow-schema 48.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "num", ] @@ -723,6 +769,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + [[package]] name = "bitflags" version = "1.3.2" @@ -1649,6 +1701,7 @@ dependencies = [ "arrow-array 46.0.0", "arrow-array 47.0.0", "arrow-array 48.0.0", + "arrow-array 49.0.0", "arrow-buffer 37.0.0", "arrow-buffer 38.0.0", "arrow-buffer 39.0.0", @@ -1661,6 +1714,7 @@ dependencies = [ "arrow-buffer 46.0.0", "arrow-buffer 47.0.0", "arrow-buffer 48.0.0", + "arrow-buffer 49.0.0", "arrow-data 37.0.0", "arrow-data 38.0.0", "arrow-data 39.0.0", @@ -1673,6 +1727,7 @@ dependencies = [ "arrow-data 46.0.0", "arrow-data 47.0.0", "arrow-data 48.0.0", + "arrow-data 49.0.0", "arrow-json", "arrow-schema 37.0.0", "arrow-schema 38.0.0", @@ -1686,6 +1741,7 @@ dependencies = [ "arrow-schema 46.0.0", "arrow-schema 47.0.0", "arrow-schema 48.0.0", + "arrow-schema 49.0.0", "arrow2 0.16.0", "arrow2 0.17.0", "arrow2_convert", diff --git a/serde_arrow/Cargo.toml b/serde_arrow/Cargo.toml index 27b04594..6c747daf 100644 --- a/serde_arrow/Cargo.toml +++ b/serde_arrow/Cargo.toml @@ -13,15 +13,19 @@ bench = false [[bench]] name = "arrow2" -required-features = ["arrow2-0-17", "arrow-48"] +# arrow-version:replace: required-features = ["arrow2-0-17", "arrow-{version}"] +required-features = ["arrow2-0-17", "arrow-49"] harness = false [package.metadata.docs.rs] -features = ["arrow2-0-17", "arrow-48"] +# arrow-version:replace: features = ["arrow2-0-17", "arrow-{version}"] +features = ["arrow2-0-17", "arrow-49"] [features] default = [] +# arrow-version:insert: arrow-{version} = ["dep:arrow-array-{version}", "dep:arrow-schema-{version}", "dep:arrow-data-{version}", "dep:arrow-buffer-{version}"] +arrow-49 = ["dep:arrow-array-49", "dep:arrow-schema-49", "dep:arrow-data-49", "dep:arrow-buffer-49"] arrow-48 = ["dep:arrow-array-48", "dep:arrow-schema-48", "dep:arrow-data-48", "dep:arrow-buffer-48"] arrow-47 = ["dep:arrow-array-47", "dep:arrow-schema-47", "dep:arrow-data-47", "dep:arrow-buffer-47"] arrow-46 = ["dep:arrow-array-46", "dep:arrow-schema-46", "dep:arrow-data-46", "dep:arrow-buffer-46"] @@ -42,6 +46,12 @@ chrono = "0.4" half = { version = "2", features = ["bytemuck"] } serde = { version = "1.0", features = ["derive"] } +# arrow-version:insert: arrow-array-{version} = {{ package = "arrow-array", version = "{version}", optional = true }}{\n}arrow-buffer-{version} = {{ package = "arrow-buffer", version = "{version}", optional = true }}{\n}arrow-data-{version} = {{ package = "arrow-data", version="{version}", optional = true }}{\n}arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", optional = true }}{\n} +arrow-array-49 = { package = "arrow-array", version = "49", optional = true } +arrow-buffer-49 = { package = "arrow-buffer", version = "49", optional = true } +arrow-data-49 = { package = "arrow-data", version="49", optional = true } +arrow-schema-49 = { package = "arrow-schema", version = "49", optional = true } + arrow-array-48 = { package = "arrow-array", version = "48", optional = true } arrow-buffer-48 = { package = "arrow-buffer", version = "48", optional = true } arrow-data-48 = { package = "arrow-data", version="48", optional = true } @@ -113,6 +123,7 @@ serde_json = "1" rand = "0.8" # for benchmarks -arrow-json-48 = { package = "arrow-json", version = "48" } +# arrow-version:replace: arrow-json-{version} = {{ package = "arrow-json", version = "{version}" }} +arrow-json-49 = { package = "arrow-json", version = "49" } criterion = "0.4" -arrow2_convert = "0.5.0" +arrow2_convert = "0.5.0" \ No newline at end of file diff --git a/serde_arrow/benches/groups/impls.rs b/serde_arrow/benches/groups/impls.rs index 6cdb7a83..1932eaf6 100644 --- a/serde_arrow/benches/groups/impls.rs +++ b/serde_arrow/benches/groups/impls.rs @@ -94,8 +94,10 @@ pub mod arrow { use std::sync::Arc; - use arrow_json_48::ReaderBuilder; - use arrow_schema_48::Schema; + // arrow-version:replace: use arrow_json_{version}::ReaderBuilder; + use arrow_json_49::ReaderBuilder; + // arrow-version:replace: use arrow_schema_{version}::Schema; + use arrow_schema_49::Schema; use serde::Serialize; diff --git a/serde_arrow/build.rs b/serde_arrow/build.rs index 463dcd8b..353b0532 100644 --- a/serde_arrow/build.rs +++ b/serde_arrow/build.rs @@ -14,6 +14,9 @@ fn main() { } let max_arrow_version: Option = [ + // arrow-version:insert: #[cfg(feature = "arrow-{version}")]{\n}{version}, + #[cfg(feature = "arrow-49")] + 49, #[cfg(feature = "arrow-48")] 48, #[cfg(feature = "arrow-47")] diff --git a/serde_arrow/src/lib.rs b/serde_arrow/src/lib.rs index e1a9e197..b46ebba2 100644 --- a/serde_arrow/src/lib.rs +++ b/serde_arrow/src/lib.rs @@ -109,19 +109,24 @@ //! //! Available features: //! -//! | Arrow Feature | Arrow Version | | Arrow2 version | Arrow2 Version | -//! |---------------|---------------|---|----------------|----------------| -//! | `arrow-46` | `arrow=46` | | `arrow2-0-17` | `arrow2=0.17`  | -//! | `arrow-45` | `arrow=45` | | `arrow2-0-16` | `arrow2=0.16` | -//! | `arrow-44` | `arrow=44` | | | | -//! | `arrow-43` | `arrow=43` | | | | -//! | `arrow-42` | `arrow=42` | | | | -//! | `arrow-41` | `arrow=41` | | | | -//! | `arrow-40` | `arrow=40` | | | | -//! | `arrow-39` | `arrow=39` | | | | -//! | `arrow-38` | `arrow=38` | | | | -//! | `arrow-37` | `arrow=37` | | | | -//! +//! | Arrow Feature | Arrow Version | +//! |---------------|---------------| +// arrow-version:insert: //! | `arrow-{version}` | `arrow={version}` | +//! | `arrow-49` | `arrow=49` | +//! | `arrow-48` | `arrow=48` | +//! | `arrow-47` | `arrow=47` | +//! | `arrow-46` | `arrow=46` | +//! | `arrow-45` | `arrow=45` | +//! | `arrow-44` | `arrow=44` | +//! | `arrow-43` | `arrow=43` | +//! | `arrow-42` | `arrow=42` | +//! | `arrow-41` | `arrow=41` | +//! | `arrow-40` | `arrow=40` | +//! | `arrow-39` | `arrow=39` | +//! | `arrow-38` | `arrow=38` | +//! | `arrow-37` | `arrow=37` | +//! | `arrow2-0-17` | `arrow2=0.17` | +//! | `arrow2-0-16` | `arrow2=0.16` | // be more forgiving without any active implementation #[cfg_attr(all(not(has_arrow), not(has_arrow2)), allow(unused))] @@ -188,6 +193,8 @@ pub mod _impl { }; } + // arrow-version:insert: #[cfg(has_arrow_{version})] build_arrow_crate!(arrow_array_{version}, arrow_buffer_{version}, arrow_data_{version}, arrow_schema_{version}); +#[cfg(has_arrow_49)] build_arrow_crate!(arrow_array_49, arrow_buffer_49, arrow_data_49, arrow_schema_49); #[cfg(has_arrow_48)] build_arrow_crate!(arrow_array_48, arrow_buffer_48, arrow_data_48, arrow_schema_48); #[cfg(has_arrow_47)] build_arrow_crate!(arrow_array_47, arrow_buffer_47, arrow_data_47, arrow_schema_47); #[cfg(has_arrow_46)] build_arrow_crate!(arrow_array_46, arrow_buffer_46, arrow_data_46, arrow_schema_46); diff --git a/x.py b/x.py index 85fd56c1..0b37461b 100644 --- a/x.py +++ b/x.py @@ -1,25 +1,28 @@ self_path = __import__("pathlib").Path(__file__).parent.resolve() +python = __import__("shlex").quote(__import__("sys").executable) __effect = lambda effect: lambda func: [func, effect(func.__dict__)][0] cmd = lambda **kw: __effect(lambda d: d.setdefault("@cmd", {}).update(kw)) arg = lambda *a, **kw: __effect(lambda d: d.setdefault("@arg", []).append((a, kw))) all_arrow_features = [ - "arrow-37", - "arrow-38", - "arrow-39", - "arrow-40", - "arrow-41", - "arrow-42", - "arrow-43", - "arrow-44", - "arrow-45", - "arrow-46", - "arrow-47", + # arrow-version:insert: "arrow-{version}", + "arrow-49", "arrow-48", + "arrow-47", + "arrow-46", + "arrow-45", + "arrow-44", + "arrow-43", + "arrow-42", + "arrow-41", + "arrow-40", + "arrow-39", + "arrow-38", + "arrow-37", ] -all_arrow2_features = ["arrow2-0-16", "arrow2-0-17"] -default_features = f"{all_arrow2_features[-1]},{all_arrow_features[-1]}" +all_arrow2_features = ["arrow2-0-17", "arrow2-0-16"] +default_features = f"{all_arrow2_features[0]},{all_arrow_features[0]}" CHECKS_PLACEHOLDER = "<<< checks >>>" @@ -85,7 +88,7 @@ def precommit(backtrace=False): update_workflows() - fmt() + format() lint() test(backtrace=backtrace) example() @@ -145,31 +148,27 @@ def _generate_workflow_check_steps(): @cmd(help="Format the code") -def fmt(): - python("-m", "black", __file__) - cargo("fmt") +def format(): + _sh(f"{python} -m black {_q(__file__)}") + _sh("cargo fmt") @cmd(help="Run the linting") @arg("--fast", action="store_true") def lint(fast=False): check_cargo_toml() - cargo("check", "--features", default_features) - cargo("clippy", "--features", default_features) + _sh(f"cargo check --features {default_features}") + _sh(f"cargo clippy --features {default_features}") if not fast: for arrow2_feature in (*all_arrow2_features, *all_arrow_features): - cargo( - "check", - "--features", - arrow2_feature, - ) + _sh(f"cargo check --features {arrow2_feature}") @cmd(help="Run the example") def example(): - cargo("run", "-p", "example") - python("-c", 'import polars as pl; print(pl.read_ipc("example.ipc"))') + _sh("cargo run -p example") + _sh(f"{python} -c 'import polars as pl; print(pl.read_ipc(\"example.ipc\"))'") @cmd(help="Run the tests") @@ -179,24 +178,20 @@ def test(backtrace=False, full=False): import os if not full: - flag_combinations = [["--features", default_features]] + feature_selections = [f"--features {default_features}"] else: - flag_combinations = [] - for arrow_feature in [[], *([feat] for feat in all_arrow_features)]: - for arrow2_feature in [[], *([feat] for feat in all_arrow2_features)]: - if not arrow_feature and not arrow2_feature: - flag_combinations.append([]) + feature_selections = [ + f"--features {', '.join(arrow_feature + arrow2_feature)}" + if arrow_feature or arrow2_feature + else "" + for arrow_feature in [[], *([feat] for feat in all_arrow_features)] + for arrow2_feature in [[], *([feat] for feat in all_arrow2_features)] + ] - else: - flag_combinations.append( - ["--features", ",".join(arrow_feature + arrow2_feature)] - ) - - for flags in flag_combinations: - cargo( - "test", - *flags, + for feature_selection in feature_selections: + _sh( + f"cargo test {feature_selection}", env=dict(os.environ, RUST_BACKTRACE="1" if backtrace else "0"), ) @@ -268,7 +263,7 @@ def check_cargo_toml(): @cmd(help="Run the benchmarks") def bench(): - cargo("bench", "--features", default_features) + _sh(f"cargo bench --features {default_features}") summarize_bench() @@ -483,29 +478,69 @@ def flatten(i): yield from ii -@cmd() +@cmd(help="Generate the documentation") @arg("--private", action="store_true", default=False) def doc(private=False): - cargo( - "doc", - "--features", - default_features, - *(["--document-private-items"] if private else []), + _sh( + f"cargo doc --features {default_features} {'--document-private-items' if private else ''}", cwd=self_path / "serde_arrow", ) -cargo = lambda *a, **kw: run("cargo", *a, **kw) -python = lambda *a, **kw: run(__import__("sys").executable, *a, **kw) -run = lambda *a, **kw: __import__("subprocess").run( - [str(aa) for aa in a], - executable=print("::", *a), +@cmd(help="Add a new arrow version") +@arg("version") +def add_arrow_version(version): + import re + + if _sh("git diff-files --quiet", check=False).returncode != 0: + print( + "WARNING: potentially destructive changes. " + "Please stage or commit the working tree first." + ) + raise SystemExit(1) + + for p in [ + self_path / "x.py", + *self_path.glob("serde_arrow/**/*.rs"), + *self_path.glob("serde_arrow/**/*.toml"), + ]: + content = p.read_text() + if "arrow-version" not in content: + continue + + print(f"process {p}") + new_content = [] + include_next = True + for line in content.splitlines(): + if ( + m := re.match(r"^.*(//|#) arrow-version:(replace|insert): (.*)$", line) + ) is not None: + new_content.append(line) + new_content.append( + m.group(3).format_map({"version": version, "\\n": "\n"}) + ) + include_next = m.group(2) != "replace" + + else: + if include_next: + new_content.append(line) + + include_next = True + + p.write_text("\n".join(new_content)) + + format() + + +_sh = lambda c, **kw: __import__("subprocess").run( + [args := __import__("shlex").split(c.replace("\n", " ")), print("::", *args)][0], **{"check": True, "cwd": self_path, "encoding": "utf-8", **kw}, ) +_q = lambda arg: __import__("shlex").quote(str(arg)) if __name__ == "__main__": _sps = (_p := __import__("argparse").ArgumentParser()).add_subparsers() - for _f in (f for f in list(globals().values()) if hasattr(f, "@cmd")): + for _f in (f for _, f in sorted(globals().items()) if hasattr(f, "@cmd")): _kw = {"name": _f.__name__.replace("_", "-"), **getattr(_f, "@cmd")} (_sp := _sps.add_parser(**_kw)).set_defaults(_=_f) [_sp.add_argument(*a, **kw) for a, kw in reversed(getattr(_f, "@arg", []))] From 4a7a5f2f95dae7726d4dfc4922bce5f86315203c Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 6 Jan 2024 09:26:41 +0100 Subject: [PATCH 2/3] Fix clippy lints --- .../src/arrow2_impl/deserialization.rs | 80 ++++++++----------- serde_arrow/src/arrow2_impl/schema.rs | 13 ++- serde_arrow/src/arrow_impl/deserialization.rs | 56 ++++++------- serde_arrow/src/arrow_impl/schema.rs | 13 ++- serde_arrow/src/internal/common/mod.rs | 1 - .../src/internal/deserialization/mod.rs | 6 +- serde_arrow/src/internal/schema.rs | 2 +- .../src/internal/serialization/compiler.rs | 18 ++--- 8 files changed, 83 insertions(+), 106 deletions(-) diff --git a/serde_arrow/src/arrow2_impl/deserialization.rs b/serde_arrow/src/arrow2_impl/deserialization.rs index 1199bdfb..0c0862b6 100644 --- a/serde_arrow/src/arrow2_impl/deserialization.rs +++ b/serde_arrow/src/arrow2_impl/deserialization.rs @@ -82,10 +82,9 @@ impl BufferExtract for dyn Array { macro_rules! convert_list { ($offset_type:ty, $variant:ident, $push_func:ident) => {{ - let typed = self - .as_any() - .downcast_ref::>() - .ok_or_else(|| error!("cannot interpret array as LargeList array"))?; + let Some(typed) = self.as_any().downcast_ref::>() else { + fail!("cannot interpret array as LargeList array"); + }; let validity = get_validity(typed); let offsets = typed.offsets(); @@ -96,10 +95,9 @@ impl BufferExtract for dyn Array { let offsets = buffers.$push_func(offsets)?; let validity = validity.map(|v| buffers.push_u1(v)); - let item_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get first child of list array"))?; + let Some(item_field) = field.children.first() else { + fail!("cannot get first child of list array") + }; let item = typed.values().extract_buffers(item_field, buffers)?; Ok(M::$variant { @@ -182,36 +180,28 @@ impl BufferExtract for dyn Array { }) } T::Map => { - let entries_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get children of map"))?; - let keys_field = entries_field - .children - .get(0) - .ok_or_else(|| error!("cannot get keys field"))?; - let values_field = entries_field - .children - .get(1) - .ok_or_else(|| error!("cannot get values field"))?; - - let typed = self - .as_any() - .downcast_ref::() - .ok_or_else(|| error!("cannot convert array into map array"))?; - let typed_entries = typed - .field() - .as_any() - .downcast_ref::() - .ok_or_else(|| error!("cannot convert map field into struct array"))?; - let typed_keys = typed_entries - .values() - .get(0) - .ok_or_else(|| error!("cannot get keys array of map entries"))?; - let typed_values = typed_entries - .values() - .get(1) - .ok_or_else(|| error!("cannot get keys array of map entries"))?; + let Some(entries_field) = field.children.first() else { + fail!("cannot get children of map"); + }; + let Some(keys_field) = entries_field.children.first() else { + fail!("cannot get keys field"); + }; + let Some(values_field) = entries_field.children.get(1) else { + fail!("cannot get values field"); + }; + let Some(typed) = self.as_any().downcast_ref::() else { + fail!("cannot convert array into map array"); + }; + let Some(typed_entries) = typed.field().as_any().downcast_ref::() + else { + fail!("cannot convert map field into struct array"); + }; + let Some(typed_keys) = typed_entries.values().first() else { + fail!("cannot get keys array of map entries"); + }; + let Some(typed_values) = typed_entries.values().get(1) else { + fail!("cannot get keys array of map entries"); + }; let offsets = typed.offsets().as_slice(); let validity = get_validity(typed); @@ -237,14 +227,12 @@ impl BufferExtract for dyn Array { }) } T::Dictionary => { - let keys_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get key field of dictionary"))?; - let values_field = field - .children - .get(1) - .ok_or_else(|| error!("cannot get values field"))?; + let Some(keys_field) = field.children.first() else { + fail!("cannot get key field of dictionary"); + }; + let Some(values_field) = field.children.get(1) else { + fail!("cannot get values field"); + }; macro_rules! convert_dictionary { ($key_type:ty, $variant:ident) => {{ diff --git a/serde_arrow/src/arrow2_impl/schema.rs b/serde_arrow/src/arrow2_impl/schema.rs index da360c05..32e0b236 100644 --- a/serde_arrow/src/arrow2_impl/schema.rs +++ b/serde_arrow/src/arrow2_impl/schema.rs @@ -220,14 +220,14 @@ impl TryFrom<&GenericField> for Field { GenericDataType::List => DataType::List(Box::new( value .children - .get(0) + .first() .ok_or_else(|| error!("List must a single child"))? .try_into()?, )), GenericDataType::LargeList => DataType::LargeList(Box::new( value .children - .get(0) + .first() .ok_or_else(|| error!("List must a single child"))? .try_into()?, )), @@ -241,7 +241,7 @@ impl TryFrom<&GenericField> for Field { GenericDataType::Map => { let element_field: Field = value .children - .get(0) + .first() .ok_or_else(|| error!("Map must a two children"))? .try_into()?; DataType::Map(Box::new(element_field), false) @@ -256,10 +256,9 @@ impl TryFrom<&GenericField> for Field { UnionMode::Dense, ), GenericDataType::Dictionary => { - let key_field = value - .children - .get(0) - .ok_or_else(|| error!("Dictionary must a two children"))?; + let Some(key_field) = value.children.first() else { + fail!("Dictionary must a two children"); + }; let val_field: Field = value .children .get(1) diff --git a/serde_arrow/src/arrow_impl/deserialization.rs b/serde_arrow/src/arrow_impl/deserialization.rs index a932aa72..a02989a9 100644 --- a/serde_arrow/src/arrow_impl/deserialization.rs +++ b/serde_arrow/src/arrow_impl/deserialization.rs @@ -69,10 +69,12 @@ impl BufferExtract for dyn Array { macro_rules! convert_list { ($offset_type:ty, $variant:ident, $push_func:ident) => {{ - let typed = self + let Some(typed) = self .as_any() .downcast_ref::>() - .ok_or_else(|| error!("cannot convert array into GenericListArray"))?; + else { + fail!("cannot convert array into GenericListArray"); + }; let offsets = typed.value_offsets(); let validity = get_validity(self); @@ -82,10 +84,9 @@ impl BufferExtract for dyn Array { let offsets = buffers.$push_func(offsets)?; let validity = validity.map(|v| buffers.push_u1(v)); - let item_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get first child of list array"))?; + let Some(item_field) = field.children.first() else { + fail!("cannot get first child of list array"); + }; let item = typed.values().extract_buffers(item_field, buffers)?; Ok(M::$variant { @@ -177,23 +178,18 @@ impl BufferExtract for dyn Array { }) } T::Map => { - let entries_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get children of map"))?; - let keys_field = entries_field - .children - .get(0) - .ok_or_else(|| error!("cannot get keys field"))?; - let values_field = entries_field - .children - .get(1) - .ok_or_else(|| error!("cannot get values field"))?; - - let typed = self - .as_any() - .downcast_ref::() - .ok_or_else(|| error!("cannot convert array into map array"))?; + let Some(entries_field) = field.children.first() else { + fail!("cannot get children of map"); + }; + let Some(keys_field) = entries_field.children.first() else { + fail!("cannot get keys field"); + }; + let Some(values_field) = entries_field.children.get(1) else { + fail!("cannot get values field"); + }; + let Some(typed) = self.as_any().downcast_ref::() else { + fail!("cannot convert array into map array"); + }; let offsets = typed.value_offsets(); let validity = get_validity(typed); @@ -220,14 +216,12 @@ impl BufferExtract for dyn Array { }) } T::Dictionary => { - let keys_field = field - .children - .get(0) - .ok_or_else(|| error!("cannot get key field of dictionary"))?; - let values_field = field - .children - .get(1) - .ok_or_else(|| error!("cannot get values field"))?; + let Some(keys_field) = field.children.first() else { + fail!("cannot get key field of dictionary"); + }; + let Some(values_field) = field.children.get(1) else { + fail!("cannot get values field"); + }; macro_rules! convert_dictionary { ($key_type:ty, $variant:ident) => {{ diff --git a/serde_arrow/src/arrow_impl/schema.rs b/serde_arrow/src/arrow_impl/schema.rs index 03c0f6e5..19666e70 100644 --- a/serde_arrow/src/arrow_impl/schema.rs +++ b/serde_arrow/src/arrow_impl/schema.rs @@ -215,7 +215,7 @@ impl TryFrom<&GenericField> for Field { Box::::new( value .children - .get(0) + .first() .ok_or_else(|| error!("List must a single child"))? .try_into()?, ) @@ -225,7 +225,7 @@ impl TryFrom<&GenericField> for Field { Box::::new( value .children - .get(0) + .first() .ok_or_else(|| error!("List must a single child"))? .try_into()?, ) @@ -241,7 +241,7 @@ impl TryFrom<&GenericField> for Field { GenericDataType::Map => { let element_field: Field = value .children - .get(0) + .first() .ok_or_else(|| error!("Map must a single child"))? .try_into()?; DataType::Map(Box::new(element_field).into(), false) @@ -268,10 +268,9 @@ impl TryFrom<&GenericField> for Field { UnionMode::Dense, ), GenericDataType::Dictionary => { - let key_field = value - .children - .get(0) - .ok_or_else(|| error!("Dictionary must a two children"))?; + let Some(key_field) = value.children.first() else { + fail!("Dictionary must a two children"); + }; let val_field: Field = value .children .get(1) diff --git a/serde_arrow/src/internal/common/mod.rs b/serde_arrow/src/internal/common/mod.rs index 213f4b46..8464b1e6 100644 --- a/serde_arrow/src/internal/common/mod.rs +++ b/serde_arrow/src/internal/common/mod.rs @@ -6,7 +6,6 @@ mod checks; pub use array_mapping::{ArrayMapping, DictionaryIndex, DictionaryValue}; pub use buffers::{ BitBuffer, BufferExtract, Buffers, MutableBitBuffer, MutableCountBuffer, MutableOffsetBuffer, - Offset, }; pub use checks::check_supported_list_layout; diff --git a/serde_arrow/src/internal/deserialization/mod.rs b/serde_arrow/src/internal/deserialization/mod.rs index 2f9e6762..502b855b 100644 --- a/serde_arrow/src/internal/deserialization/mod.rs +++ b/serde_arrow/src/internal/deserialization/mod.rs @@ -330,7 +330,7 @@ impl<'a> Compiler<'a> { else { fail!("cannot extract entries arrays mapping") }; - let Some(key_field) = entries_fields.get(0) else { + let Some(key_field) = entries_fields.first() else { fail!("cannot extract key field") }; let Some(values_field) = entries_fields.get(1) else { @@ -1716,8 +1716,8 @@ impl Instruction for EmitDictionaryStr { positions[self.position] += 1; let index: usize = match self.index { - I::U8(buffer) => buffers.get_u8(buffer)[pos].try_into()?, - I::U16(buffer) => buffers.get_u16(buffer)[pos].try_into()?, + I::U8(buffer) => buffers.get_u8(buffer)[pos].into(), + I::U16(buffer) => buffers.get_u16(buffer)[pos].into(), I::U32(buffer) => buffers.get_u32(buffer)[pos].try_into()?, I::U64(buffer) => buffers.get_u64(buffer)[pos].try_into()?, I::I8(buffer) => buffers.get_i8(buffer)[pos].try_into()?, diff --git a/serde_arrow/src/internal/schema.rs b/serde_arrow/src/internal/schema.rs index ed6c7c7b..dffffb8e 100644 --- a/serde_arrow/src/internal/schema.rs +++ b/serde_arrow/src/internal/schema.rs @@ -34,7 +34,7 @@ pub trait Sealed {} /// - [`SerdeArrowSchema`] #[cfg_attr( has_arrow, - doc = "- `Vec<`[`arrow::datatypes::Field`][crate::_impl::arrow::datatypes::Field]`>" + doc = "- `Vec<`[`arrow::datatypes::Field`][crate::_impl::arrow::datatypes::Field]`>`" )] #[cfg_attr( has_arrow2, diff --git a/serde_arrow/src/internal/serialization/compiler.rs b/serde_arrow/src/internal/serialization/compiler.rs index 6ad85644..e4d63325 100644 --- a/serde_arrow/src/internal/serialization/compiler.rs +++ b/serde_arrow/src/internal/serialization/compiler.rs @@ -313,10 +313,9 @@ impl Program { fail!("inconsistent arguments"); } - let item = field - .children - .get(0) - .ok_or_else(|| error!("invalid list: no child"))?; + let Some(item) = field.children.first() else { + fail!("invalid list: no child"); + }; let list_idx = self.structure.lists.len(); let offsets = self.buffers.num_u32_offsets.next_value(); @@ -358,10 +357,9 @@ impl Program { fail!("inconsistent arguments"); } - let item = field - .children - .get(0) - .ok_or_else(|| error!("invalid list: no child"))?; + let Some(item) = field.children.first() else { + fail!("invalid list: no child"); + }; let list_idx = self.structure.large_lists.len(); let offsets = self.buffers.num_u64_offsets.next_value(); @@ -711,10 +709,10 @@ impl Program { } field.validate_map()?; - let Some(entries) = field.children.get(0) else { + let Some(entries) = field.children.first() else { fail!("invalid list: no child"); }; - let Some(keys) = entries.children.get(0) else { + let Some(keys) = entries.children.first() else { fail!("entries without key field"); }; let Some(values) = entries.children.get(1) else { From 5f9bee9f65f34111c631134b1ca5970b143c6f8f Mon Sep 17 00:00:00 2001 From: Christopher Prohm Date: Sat, 6 Jan 2024 09:30:04 +0100 Subject: [PATCH 3/3] Update changelog --- Changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changes.md b/Changes.md index d171c97d..cde7b00a 100644 --- a/Changes.md +++ b/Changes.md @@ -21,6 +21,7 @@ Improvements: - Add support for `arrow=47` and `arrow=48` - Fix bug in `arrow2=0.16` support - Fix unused warnings without selected arrow versions +- Add `arrow=49` support Deprecations (see the documentation of deprecated items for how to migratie):