diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7ac0dfa78215..546887640652 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -193,23 +193,43 @@ jobs: name: cargo test datafusion-cli (amd64) needs: linux-build-lib runs-on: ubuntu-latest - container: - image: amd64/rust + env: + AWS_ENDPOINT: http://127.0.0.1:9000 + AWS_ACCESS_KEY_ID: TEST-DataFusionLogin + AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword + TEST_STORAGE_INTEGRATION: 1 + AWS_ALLOW_HTTP: true + steps: - uses: actions/checkout@v4 with: submodules: true fetch-depth: 1 - name: Setup Rust toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: stable - - name: Run tests (excluding doctests) run: | - cd datafusion-cli - cargo test --profile ci --lib --tests --bins --all-features + rustup toolchain install stable + rustup default stable + - name: Setup Minio - S3-compatible storage + working-directory: datafusion-cli + run: | + docker run -d --name minio-container \ + -p 9000:9000 \ + -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ + -v $(pwd)/../datafusion/core/tests/data:/source quay.io/minio/minio \ + server /data + docker exec minio-container /bin/sh -c "\ + mc ready local + mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ + mc mb localminio/data && \ + mc cp -r /source/* localminio/data" + - name: Run tests (excluding doctests, but with integration tests) + working-directory: datafusion-cli + run: cargo test --profile ci --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code + - name: Minio Output + if: ${{ !cancelled() }} + run: docker logs minio-container linux-test-example: name: cargo examples (amd64) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md new file mode 100644 index 000000000000..819e66650d4c --- /dev/null +++ b/datafusion-cli/CONTRIBUTING.md @@ -0,0 +1,84 @@ + + +# Development instructions + +## Running Tests + +Tests can be run using `cargo` + +```shell +cargo test +``` + +## Snapshot testing + +To test CLI output, [Insta](https://github.com/mitsuhiko/insta) is used for snapshot testing. Snapshots are generated +and compared on each test run. If the output changes, tests will fail. +To review the changes, you can use Insta CLI: + +```shell +cargo install cargo-insta +cargo insta review +``` + +## Running Storage Integration Tests + +By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and +then provide the necessary configuration for that object store. + +### AWS + +To test the S3 integration against [Minio](https://github.com/minio/minio) + +First start up a container with Minio and load test files. + +```shell +docker run -d \ + --name datafusion-test-minio \ + -p 9000:9000 \ + -e MINIO_ROOT_USER=TEST-DataFusionLogin \ + -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ + -v $(pwd)/../datafusion/core/tests/data:/source \ + quay.io/minio/minio server /data + +docker exec datafusion-test-minio /bin/sh -c "\ + mc ready local + mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ + mc mb localminio/data && \ + mc cp -r /source/* localminio/data" +``` + +Setup environment + +```shell +export TEST_STORAGE_INTEGRATION=1 +export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin +export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword +export AWS_ENDPOINT=http://127.0.0.1:9000 +export AWS_ALLOW_HTTP=true +``` + +Note that `AWS_ENDPOINT` is set without slash at the end. + +Run tests + +```shell +cargo test +``` diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 23b4700ea8f6..5863ee5b58d9 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1078,6 +1078,18 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "console" +version = "0.15.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "const-random" version = "0.1.18" @@ -1315,6 +1327,8 @@ dependencies = [ "env_logger", "futures", "home", + "insta", + "insta-cmd", "mimalloc", "object_store", "parking_lot", @@ -1726,6 +1740,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "endian-type" version = "0.1.2" @@ -1974,6 +1994,19 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "globset" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.3.26" @@ -2412,6 +2445,33 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "insta" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9ffc4d4892617c50a928c52b2961cb5174b6fc6ebf252b2fac9d21955c48b8" +dependencies = [ + "console", + "globset", + "lazy_static", + "linked-hash-map", + "regex", + "serde", + "similar", + "walkdir", +] + +[[package]] +name = "insta-cmd" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffeeefa927925cced49ccb01bf3e57c9d4cd132df21e576eb9415baeab2d3de6" +dependencies = [ + "insta", + "serde", + "serde_json", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2599,6 +2659,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -3719,6 +3785,12 @@ dependencies = [ "libc", ] +[[package]] +name = "similar" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" + [[package]] name = "siphasher" version = "1.0.1" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 8b5bb901b713..8ac978ed050b 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -71,6 +71,8 @@ url = "2.5.4" [dev-dependencies] assert_cmd = "2.0" ctor = "0.2.9" +insta = { version = "1.41.1", features = ["glob", "filters"] } +insta-cmd = "0.6.0" predicates = "3.0" rstest = "0.22" diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 27cabf15afec..309502b54d70 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -17,10 +17,24 @@ use std::process::Command; -use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; -use predicates::prelude::predicate; use rstest::rstest; +use insta::{glob, Settings}; +use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::{env, fs}; + +fn cli() -> Command { + Command::new(get_cargo_bin("datafusion-cli")) +} + +fn make_settings() -> Settings { + let mut settings = Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); + settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); + settings +} + #[cfg(test)] #[ctor::ctor] fn init() { @@ -28,31 +42,102 @@ fn init() { let _ = env_logger::try_init(); } -// Disabled due to https://github.com/apache/datafusion/issues/10793 -#[cfg(not(target_family = "windows"))] #[rstest] -#[case::exec_from_commands( - ["--command", "select 1", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" -)] #[case::exec_multiple_statements( - ["--command", "select 1; select 2;", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" + "statements", + ["--command", "select 1; select 2;", "-q"], )] #[case::exec_from_files( - ["--file", "tests/data/sql.txt", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" + "files", + ["--file", "tests/sql/select.sql", "-q"], )] #[case::set_batch_size( - ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], - "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" + "batch_size", + ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] #[test] fn cli_quick_test<'a>( + #[case] snapshot_name: &'a str, #[case] args: impl IntoIterator, - #[case] expected: &str, ) { - let mut cmd = Command::cargo_bin("datafusion-cli").unwrap(); + let mut settings = make_settings(); + settings.set_snapshot_suffix(snapshot_name); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); cmd.args(args); - cmd.assert().stdout(predicate::eq(expected)); + + assert_cmd_snapshot!(cmd); +} + +#[rstest] +#[case("csv")] +#[case("tsv")] +#[case("table")] +#[case("json")] +#[case("nd-json")] +#[case("automatic")] +#[test] +fn test_cli_format<'a>(#[case] format: &'a str) { + let mut settings = make_settings(); + settings.set_snapshot_suffix(format); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); + cmd.args(["--command", "select 1", "-q", "--format", format]); + + assert_cmd_snapshot!(cmd); +} + +#[tokio::test] +async fn test_cli() { + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let settings = make_settings(); + let _bound = settings.bind_to_scope(); + + glob!("sql/*.sql", |path| { + let input = fs::read_to_string(path).unwrap(); + assert_cmd_snapshot!(cli().pass_stdin(input)) + }); +} + +#[tokio::test] +async fn test_aws_options() { + // Separate test is needed to pass aws as options in sql and not via env + + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let settings = make_settings(); + let _bound = settings.bind_to_scope(); + + let access_key_id = + env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); + let secret_access_key = + env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); + let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + + let input = format!( + r#"CREATE EXTERNAL TABLE CARS +STORED AS CSV +LOCATION 's3://data/cars.csv' +OPTIONS( + 'aws.access_key_id' '{}', + 'aws.secret_access_key' '{}', + 'aws.endpoint' '{}', + 'aws.allow_http' 'true' +); + +SELECT * FROM CARS limit 1; +"#, + access_key_id, secret_access_key, endpoint_url + ); + + assert_cmd_snapshot!(cli().env_clear().pass_stdin(input)); } diff --git a/datafusion-cli/tests/snapshots/aws_options.snap b/datafusion-cli/tests/snapshots/aws_options.snap new file mode 100644 index 000000000000..283cf57bc662 --- /dev/null +++ b/datafusion-cli/tests/snapshots/aws_options.snap @@ -0,0 +1,25 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv'\nOPTIONS(\n 'aws.access_key_id' 'TEST-DataFusionLogin',\n 'aws.secret_access_key' 'TEST-DataFusionPassword',\n 'aws.endpoint' 'http://127.0.0.1:9000',\n 'aws.allow_http' 'true'\n);\n\nSELECT * FROM CARS limit 1;\n" +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli@load_local_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_local_csv.sql.snap new file mode 100644 index 000000000000..029d5f8d5b9f --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli@load_local_csv.sql.snap @@ -0,0 +1,26 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION '../datafusion/core/tests/data/cars.csv'\nOPTIONS ('has_header' 'TRUE');\n\nSELECT * FROM CARS limit 1;" +input_file: tests/sql/load_local_csv.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap new file mode 100644 index 000000000000..858989621a1f --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap @@ -0,0 +1,26 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\nSELECT * FROM CARS limit 1;" +input_file: tests/sql/load_s3_csv.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli@select.sql.snap b/datafusion-cli/tests/snapshots/cli@select.sql.snap new file mode 100644 index 000000000000..c137d9fe2b13 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli@select.sql.snap @@ -0,0 +1,23 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: select 1; +input_file: tests/sql/select.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@automatic.snap b/datafusion-cli/tests/snapshots/cli_format@automatic.snap new file mode 100644 index 000000000000..2591f493e90a --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@automatic.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - automatic +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@csv.snap b/datafusion-cli/tests/snapshots/cli_format@csv.snap new file mode 100644 index 000000000000..c41b042298eb --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@csv.snap @@ -0,0 +1,18 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - csv +--- +success: true +exit_code: 0 +----- stdout ----- +Int64(1) +1 + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@json.snap b/datafusion-cli/tests/snapshots/cli_format@json.snap new file mode 100644 index 000000000000..8f804a337cce --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@json.snap @@ -0,0 +1,17 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - json +--- +success: true +exit_code: 0 +----- stdout ----- +[{"Int64(1)":1}] + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@nd-json.snap b/datafusion-cli/tests/snapshots/cli_format@nd-json.snap new file mode 100644 index 000000000000..7b4ce1e2530c --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@nd-json.snap @@ -0,0 +1,17 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - nd-json +--- +success: true +exit_code: 0 +----- stdout ----- +{"Int64(1)":1} + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@table.snap b/datafusion-cli/tests/snapshots/cli_format@table.snap new file mode 100644 index 000000000000..99914182462a --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@table.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - table +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format@tsv.snap b/datafusion-cli/tests/snapshots/cli_format@tsv.snap new file mode 100644 index 000000000000..968268c31dd5 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format@tsv.snap @@ -0,0 +1,18 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - tsv +--- +success: true +exit_code: 0 +----- stdout ----- +Int64(1) +1 + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap new file mode 100644 index 000000000000..c27d527df0b6 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - show datafusion.execution.batch_size + - "-q" + - "-b" + - "1" +--- +success: true +exit_code: 0 +----- stdout ----- ++---------------------------------+-------+ +| name | value | ++---------------------------------+-------+ +| datafusion.execution.batch_size | 1 | ++---------------------------------+-------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap new file mode 100644 index 000000000000..7c44e41729a1 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap @@ -0,0 +1,19 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--file" + - tests/sql/select.sql + - "-q" +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap new file mode 100644 index 000000000000..3b975bb6a927 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap @@ -0,0 +1,24 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1; select 2; + - "-q" +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ ++----------+ +| Int64(2) | ++----------+ +| 2 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/sql/load_local_csv.sql b/datafusion-cli/tests/sql/load_local_csv.sql new file mode 100644 index 000000000000..8920c48c5f5f --- /dev/null +++ b/datafusion-cli/tests/sql/load_local_csv.sql @@ -0,0 +1,6 @@ +CREATE EXTERNAL TABLE CARS +STORED AS CSV +LOCATION '../datafusion/core/tests/data/cars.csv' +OPTIONS ('has_header' 'TRUE'); + +SELECT * FROM CARS limit 1; \ No newline at end of file diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql new file mode 100644 index 000000000000..10c2e38b9764 --- /dev/null +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -0,0 +1,5 @@ +CREATE EXTERNAL TABLE CARS +STORED AS CSV +LOCATION 's3://data/cars.csv'; + +SELECT * FROM CARS limit 1; \ No newline at end of file diff --git a/datafusion-cli/tests/data/sql.txt b/datafusion-cli/tests/sql/select.sql similarity index 100% rename from datafusion-cli/tests/data/sql.txt rename to datafusion-cli/tests/sql/select.sql