diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 02db39031..4513d3ab0 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -88,6 +88,29 @@ jobs: name: vegafusion-wasm-packages path: vegafusion-wasm/pkg/vegafusion-wasm-*.tgz + check-vegafusion-python: + runs-on: ubuntu-20.04 + steps: + - name: Check out repository code + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # pin@v3.5.2 + - uses: prefix-dev/setup-pixi@v0.8.1 + with: + pixi-version: v0.30.0 + - name: Cache + uses: actions/cache@v3 + with: + key: ${{ runner.os }}-${{ hashFiles('pixi.lock', 'Cargo.lock') }}-build-vegafusion-wasm + path: | + ~/.cargo + target + .pixi + - name: Check format and lint + run: | + pixi run lint-check-py + - name: Type check + run: | + pixi run type-check-py + # Use maturin action to build linux wheels within proper manylinux compatible containers # (This is why we don't use the pixi "build-py" action) build-vegafusion-python-linux-64: @@ -325,7 +348,7 @@ jobs: - name: Setup Python uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # pin@4.7 with: - python-version: '3.8' + python-version: '3.9' - name: Install Chrome uses: browser-actions/setup-chrome@f0ff752add8c926994566c80b3ceadfd03f24d12 # pin@latest with: @@ -345,7 +368,7 @@ jobs: python -m pip install $vegafusion # Optional dependencies - python -m pip install pyarrow==10.0 altair==5.1.2 polars[timezone] "duckdb>=1.0" vl-convert-python + python -m pip install pyarrow==10.0 altair==5.1.2 "numpy<2" polars[timezone] "duckdb>=1.0" vl-convert-python # Test dependencies python -m pip install pytest altair vega-datasets scikit-image diff --git a/pixi.lock b/pixi.lock index 044628919..814fc46e4 100644 --- a/pixi.lock +++ b/pixi.lock @@ -84,6 +84,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h0b41bf4_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/glog-0.6.0-h6f12383_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/linux-64/grpcio-1.56.2-py310h1b8f574_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.2.1-h3d44ed6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda @@ -164,6 +165,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.18-h36c2ea0_1.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.0-hde9e2c9_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.19.0-hb90f79a_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h29866fb_1.conda @@ -184,6 +186,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/minio-7.1.17-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/minio-server-2023.09.23.03.47.50-hbcca054_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/mypy-1.11.2-py310ha75aee5_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-1.8.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.7.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.10.0-pyhd8ed1ab_0.conda @@ -254,6 +258,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.10.6-py310hcb5633a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ruff-0.6.9-py310h624018c_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/rust-1.80.1-h0a17960_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rust-std-x86_64-unknown-linux-gnu-1.80.1-h2c6d0dc_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/s2n-1.3.54-h06160fa_0.conda @@ -292,7 +297,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/vega_datasets-0.9.0-pyhd3deb0d_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/vl-convert-python-1.6.0-py310h5b4e0ec_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/vl-convert-python-1.7.0-py310ha75aee5_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/voila-0.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/watchfiles-0.21.0-py310hcb5633a_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.9-pyhd8ed1ab_0.conda @@ -396,6 +401,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/gflags-2.2.2-hb1e8313_1004.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/giflib-5.2.1-hb7f2c08_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/glog-0.6.0-h8ac2a54_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-64/grpcio-1.56.2-py310h0d4bf3c_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/icu-73.2-hf5e326d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2 @@ -443,7 +449,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-19_osx64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libcrc32c-1.1.2-he49afe7_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/libcurl-8.4.0-h726d00d_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.6-hd57cbcb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/libcxx-19.1.1-hf95d169_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.19-ha4e1b8e_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/libedit-3.1.20191231-h0678c8f_2.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/libev-4.33-haf1e3a3_1.tar.bz2 @@ -481,6 +487,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/minio-7.1.17-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/minio-server-2023.09.23.03.47.50-h8857fd0_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/mypy-1.11.2-py310h837254d_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-1.8.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.7.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.10.0-pyhd8ed1ab_0.conda @@ -550,6 +558,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/rpds-py-0.10.6-py310h0e083fb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/ruff-0.6.9-py310h4f26fa7_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/rust-1.80.1-h6c54e5d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rust-std-x86_64-apple-darwin-1.80.1-h38e4360_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/scikit-image-0.21.0-py310h9e9d8ca_0.conda @@ -585,7 +594,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/vega_datasets-0.9.0-pyhd3deb0d_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/osx-64/vl-convert-python-1.6.0-py310h936d840_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/vl-convert-python-1.7.0-py310h837254d_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/voila-0.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/watchfiles-0.21.0-py310h0e083fb_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.9-pyhd8ed1ab_0.conda @@ -692,6 +701,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glib-tools-2.76.4-ha614eb4_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/glog-0.6.0-h6da1cb0_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/graphite2-1.3.13-h9f76cd9_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/grpcio-1.56.2-py310h95b248a_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/harfbuzz-7.3.0-h46e5fef_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-72.1-he12128b_0.conda @@ -741,7 +751,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.9.0-17_osxarm64_openblas.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcrc32c-1.1.2-hbdafb3b_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.2.1-hc52a3a8_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-16.0.6-h4653b0c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-19.1.1-ha82da77_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libdeflate-1.17-h1a8c8d9_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20191231-hc8eb9b7_2.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libev-4.33-h642e427_1.tar.bz2 @@ -783,6 +793,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/minio-7.1.17-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/minio-server-2023.09.23.03.47.50-hf0a4a13_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.1-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/mypy-1.11.2-py310h493c2e1_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-1.8.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.7.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.7.4-pyhd8ed1ab_0.conda @@ -856,6 +868,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/rpds-py-0.9.2-py310had9acf8_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ruff-0.6.9-py310he174661_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/rust-1.80.1-h4ff7c5d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rust-std-aarch64-apple-darwin-1.80.1-hf6ec828_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/scikit-image-0.21.0-py310h1253130_0.conda @@ -889,7 +902,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/uri-template-1.3.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/vega_datasets-0.9.0-pyhd3deb0d_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/vl-convert-python-1.6.0-py310ha6dd24b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/vl-convert-python-1.7.0-py310h493c2e1_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/voila-0.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/watchfiles-0.21.0-py310hd442715_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.6-pyhd8ed1ab_0.conda @@ -971,6 +984,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/future-0.18.3-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/giflib-5.2.1-h64bf75a_3.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/grpcio-1.56.2-py310hb84602e_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/h11-0.14.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/imagecodecs-2023.9.18-py310h0dcf169_2.conda @@ -1057,6 +1071,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/mistune-3.0.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/mkl-2023.2.0-h6a75c08_50496.conda - conda: https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/win-64/mypy-1.11.2-py310ha8f682b_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-1.8.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbclient-0.7.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/nbconvert-7.10.0-pyhd8ed1ab_0.conda @@ -1123,6 +1139,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3339-validator-0.1.4-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/rfc3986-validator-0.1.1-pyh9f0ad1d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/rpds-py-0.10.6-py310h87d50f1_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/ruff-0.6.9-py310h11b6ba5_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/rust-1.80.1-hf8d6059_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/rust-std-x86_64-pc-windows-msvc-1.80.1-h17fc481_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/scikit-image-0.21.0-py310h00ffb61_0.conda @@ -1163,7 +1180,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h64f974e_17.conda - conda: https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.40.33810-ha82c5b3_20.conda - conda: https://conda.anaconda.org/conda-forge/noarch/vega_datasets-0.9.0-pyhd3deb0d_0.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/win-64/vl-convert-python-1.6.0-py310hb47754f_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/vl-convert-python-1.7.0-py310hdfd1e6a_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/voila-0.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.40.33810-h3bf8584_20.conda - conda: https://conda.anaconda.org/conda-forge/win-64/watchfiles-0.21.0-py310h87d50f1_0.conda @@ -4924,6 +4941,87 @@ packages: license: LGPLv2 size: 83198 timestamp: 1604365687923 +- kind: conda + name: grpcio + version: 1.56.2 + build: py310h0d4bf3c_1 + build_number: 1 + subdir: osx-64 + url: https://conda.anaconda.org/conda-forge/osx-64/grpcio-1.56.2-py310h0d4bf3c_1.conda + sha256: ab9c89203aaa5e32cbfbc955ab8fdb6e36a59827f8658f072e148205f9daf082 + md5: 07b3f0664ef56646ee40424ee7657e78 + depends: + - __osx >=10.13 + - libcxx >=15.0.7 + - libgrpc 1.56.2 he6801ca_1 + - libzlib >=1.2.13,<2.0.0a0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + license: Apache-2.0 + license_family: APACHE + size: 710917 + timestamp: 1692025409041 +- kind: conda + name: grpcio + version: 1.56.2 + build: py310h1b8f574_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/grpcio-1.56.2-py310h1b8f574_1.conda + sha256: c2eda195f4de3210c1798439a37c075337d331ab2fe0cc9e4a6acb115a15a1ab + md5: 08d2538a6907851ea70d8f7cddc2f0d3 + depends: + - libgcc-ng >=12 + - libgrpc 1.56.2 h3905398_1 + - libstdcxx-ng >=12 + - libzlib >=1.2.13,<2.0.0a0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + license: Apache-2.0 + license_family: APACHE + size: 764791 + timestamp: 1692023633876 +- kind: conda + name: grpcio + version: 1.56.2 + build: py310h95b248a_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/grpcio-1.56.2-py310h95b248a_1.conda + sha256: f1b22c8492fcbb41021e909c75786a6c864eb29115098076a8af0a5efe45288e + md5: ed8ce58613462ac09ebe53a3606a74d5 + depends: + - libcxx >=15.0.7 + - libgrpc 1.56.2 h9075ed4_1 + - libzlib >=1.2.13,<2.0.0a0 + - python >=3.10,<3.11.0a0 + - python >=3.10,<3.11.0a0 *_cpython + - python_abi 3.10.* *_cp310 + license: Apache-2.0 + license_family: APACHE + size: 695887 + timestamp: 1692025592968 +- kind: conda + name: grpcio + version: 1.56.2 + build: py310hb84602e_1 + build_number: 1 + subdir: win-64 + url: https://conda.anaconda.org/conda-forge/win-64/grpcio-1.56.2-py310hb84602e_1.conda + sha256: bc0bcd5a1aa53f87ae6d2ca35f2a1b5f38145fc337c18e4e0f77bc275aa0a669 + md5: 9a0b9fafd6453839c986d8d876ce202a + depends: + - libgrpc 1.56.2 hea2d5f7_1 + - libzlib >=1.2.13,<2.0.0a0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + - ucrt >=10.0.20348.0 + - vc >=14.2,<15 + - vc14_runtime >=14.29.30139 + license: Apache-2.0 + license_family: APACHE + size: 607527 + timestamp: 1692025811162 - kind: conda name: h11 version: 0.14.0 @@ -7749,32 +7847,32 @@ packages: timestamp: 1697009866852 - kind: conda name: libcxx - version: 16.0.6 - build: h4653b0c_0 + version: 19.1.1 + build: ha82da77_0 subdir: osx-arm64 - url: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-16.0.6-h4653b0c_0.conda - sha256: 11d3fb51c14832d9e4f6d84080a375dec21ea8a3a381a1910e67ff9cedc20355 - md5: 9d7d724faf0413bf1dbc5a85935700c8 - arch: aarch64 - platform: osx + url: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-19.1.1-ha82da77_0.conda + sha256: bc2f7cca206fa8a1dfe801c90362a1b6ec2967a75ef60d26e7c7114884c120c0 + md5: 4ed0a90fd6a5bdda4ecf98912329993f + depends: + - __osx >=11.0 license: Apache-2.0 WITH LLVM-exception license_family: Apache - size: 1160232 - timestamp: 1686896993785 + size: 522850 + timestamp: 1727862893739 - kind: conda name: libcxx - version: 16.0.6 - build: hd57cbcb_0 + version: 19.1.1 + build: hf95d169_0 subdir: osx-64 - url: https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.6-hd57cbcb_0.conda - sha256: 9063271847cf05f3a6cc6cae3e7f0ced032ab5f3a3c9d3f943f876f39c5c2549 - md5: 7d6972792161077908b62971802f289a - arch: x86_64 - platform: osx + url: https://conda.anaconda.org/conda-forge/osx-64/libcxx-19.1.1-hf95d169_0.conda + sha256: 390ee50a14fe5b6ac87b64eeb0130c7a79853641ae9a8926687556c76a645889 + md5: 2b09d0f92cae6df4b1670adcaca9c38c + depends: + - __osx >=10.13 license: Apache-2.0 WITH LLVM-exception license_family: Apache - size: 1142172 - timestamp: 1686896907750 + size: 528308 + timestamp: 1727863581528 - kind: conda name: libdeflate version: '1.17' @@ -9253,6 +9351,21 @@ packages: license_family: BSD size: 259556 timestamp: 1685837820566 +- kind: conda + name: libstdcxx + version: 14.1.0 + build: hc0a3c3a_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda + sha256: 44decb3d23abacf1c6dd59f3c152a7101b7ca565b4ef8872804ceaedcc53a9cd + md5: 9dbb9699ea467983ba8a4ba89b08b066 + depends: + - libgcc 14.1.0 h77fa898_1 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 3892781 + timestamp: 1724801863728 - kind: conda name: libstdcxx-ng version: 13.2.0 @@ -10479,6 +10592,105 @@ packages: platform: win size: 3227 timestamp: 1608166968312 +- kind: conda + name: mypy + version: 1.11.2 + build: py310h493c2e1_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/mypy-1.11.2-py310h493c2e1_0.conda + sha256: b7e8939b7d6744d20904e13684b33b0a9547f5a34b2399190a3b4ffd8fb683ae + md5: c1c91361a25c70ebee323b47d2cb6fe0 + depends: + - __osx >=11.0 + - mypy_extensions >=1.0.0 + - psutil >=4.0 + - python >=3.10,<3.11.0a0 + - python >=3.10,<3.11.0a0 *_cpython + - python_abi 3.10.* *_cp310 + - tomli >=1.1.0 + - typing_extensions >=4.1.0 + license: MIT + license_family: MIT + size: 9243747 + timestamp: 1724602122831 +- kind: conda + name: mypy + version: 1.11.2 + build: py310h837254d_0 + subdir: osx-64 + url: https://conda.anaconda.org/conda-forge/osx-64/mypy-1.11.2-py310h837254d_0.conda + sha256: 56cb03b5123514b2bb0dcab09311061cec1969feb444a0d07a7968f95a9b6851 + md5: 8908c49155ac6d84d76c956f8291806c + depends: + - __osx >=10.13 + - mypy_extensions >=1.0.0 + - psutil >=4.0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + - tomli >=1.1.0 + - typing_extensions >=4.1.0 + license: MIT + license_family: MIT + size: 11773874 + timestamp: 1724601916692 +- kind: conda + name: mypy + version: 1.11.2 + build: py310ha75aee5_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/mypy-1.11.2-py310ha75aee5_0.conda + sha256: a6d23ab439793aa31822060251d5839ff472120928cecc75028d8999dcb70b89 + md5: 5abca238950289c9fb9c4e942218092e + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc-ng >=13 + - mypy_extensions >=1.0.0 + - psutil >=4.0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + - tomli >=1.1.0 + - typing_extensions >=4.1.0 + license: MIT + license_family: MIT + size: 17827218 + timestamp: 1724602150638 +- kind: conda + name: mypy + version: 1.11.2 + build: py310ha8f682b_0 + subdir: win-64 + url: https://conda.anaconda.org/conda-forge/win-64/mypy-1.11.2-py310ha8f682b_0.conda + sha256: d61e3a8305198eedf22226721bd2e1a8bbcabb786a85ffca9bc042e77564e1f3 + md5: 4d0795f72fd05090c272d40c20f3ac61 + depends: + - mypy_extensions >=1.0.0 + - psutil >=4.0 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + - tomli >=1.1.0 + - typing_extensions >=4.1.0 + - ucrt >=10.0.20348.0 + - vc >=14.2,<15 + - vc14_runtime >=14.29.30139 + license: MIT + license_family: MIT + size: 9641286 + timestamp: 1724601937116 +- kind: conda + name: mypy_extensions + version: 1.0.0 + build: pyha770c72_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.0.0-pyha770c72_0.conda + sha256: f240217476e148e825420c6bc3a0c0efb08c0718b7042fae960400c02af858a3 + md5: 4eccaeba205f0aed9ac3a9ea58568ca3 + depends: + - python >=3.5 + license: MIT + license_family: MIT + size: 10492 + timestamp: 1675543414256 - kind: conda name: narwhals version: 1.8.4 @@ -13890,6 +14102,81 @@ packages: - pkg:pypi/rpds-py size: 992400 timestamp: 1697072452949 +- kind: conda + name: ruff + version: 0.6.9 + build: py310h11b6ba5_0 + subdir: win-64 + url: https://conda.anaconda.org/conda-forge/win-64/ruff-0.6.9-py310h11b6ba5_0.conda + sha256: 1b1255aaf7b07e7cf55778a82756a432c9f54afb0850d888cc5f27b072c5e41c + md5: 245c3025ee5063b8a95bab2035be42eb + depends: + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + - ucrt >=10.0.20348.0 + - vc >=14.2,<15 + - vc14_runtime >=14.29.30139 + license: MIT + license_family: MIT + size: 6867904 + timestamp: 1728068781085 +- kind: conda + name: ruff + version: 0.6.9 + build: py310h4f26fa7_0 + subdir: osx-64 + url: https://conda.anaconda.org/conda-forge/osx-64/ruff-0.6.9-py310h4f26fa7_0.conda + sha256: 7ba20d6fefbdb0bfc7aa20df205bb37ba5e6cf5ea6c919ddd52e121c761bc61a + md5: 3bd4e0fed3227341d2e101966f0e686a + depends: + - __osx >=10.13 + - libcxx >=17 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + constrains: + - __osx >=10.12 + license: MIT + license_family: MIT + size: 6709143 + timestamp: 1728067364164 +- kind: conda + name: ruff + version: 0.6.9 + build: py310h624018c_0 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/ruff-0.6.9-py310h624018c_0.conda + sha256: 5fefd28dd467bc6d84cacc122c7d551db1ca29aebdb7d418447ef0670c66da25 + md5: 16502945d34d2dae72dd9b1c290b80e2 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - libstdcxx >=13 + - python >=3.10,<3.11.0a0 + - python_abi 3.10.* *_cp310 + license: MIT + license_family: MIT + size: 6970904 + timestamp: 1728067124093 +- kind: conda + name: ruff + version: 0.6.9 + build: py310he174661_0 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/ruff-0.6.9-py310he174661_0.conda + sha256: 8852e16b9819e8ff51aa7dc8363ed0fb2c9fd5a1074e87e0e719c9c2dc489da5 + md5: 7282b9b4370bf099b98cbc7c58bcc2c1 + depends: + - __osx >=11.0 + - libcxx >=17 + - python >=3.10,<3.11.0a0 + - python >=3.10,<3.11.0a0 *_cpython + - python_abi 3.10.* *_cp310 + constrains: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 6341227 + timestamp: 1728067442525 - kind: conda name: rust version: 1.80.1 @@ -15582,31 +15869,33 @@ packages: timestamp: 1606414171959 - kind: conda name: vl-convert-python - version: 1.6.0 - build: py310h5b4e0ec_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/vl-convert-python-1.6.0-py310h5b4e0ec_0.conda - sha256: 8e788a7c3e708748521a9e0509a978d484665bd6e5c57ac32faa86dc71549bf9 - md5: eac355b92fe08a372c6d3cae74f15aac + version: 1.7.0 + build: py310h493c2e1_1 + build_number: 1 + subdir: osx-arm64 + url: https://conda.anaconda.org/conda-forge/osx-arm64/vl-convert-python-1.7.0-py310h493c2e1_1.conda + sha256: ab58104d2748cf58e4932bcd9e0c848bf32eae7c845a71ef94273045ec9cb123 + md5: d5fa3689362e4da635af4b350d73c6ff depends: - - __glibc >=2.17,<3.0.a0 - - libgcc-ng >=12 + - __osx >=11.0 - python >=3.10,<3.11.0a0 + - python >=3.10,<3.11.0a0 *_cpython - python_abi 3.10.* *_cp310 constrains: - - __glibc >=2.17 + - __osx >=10.13 license: BSD-3-Clause license_family: BSD - size: 22615432 - timestamp: 1722901880182 + size: 20936659 + timestamp: 1728157975980 - kind: conda name: vl-convert-python - version: 1.6.0 - build: py310h936d840_0 + version: 1.7.0 + build: py310h837254d_1 + build_number: 1 subdir: osx-64 - url: https://conda.anaconda.org/conda-forge/osx-64/vl-convert-python-1.6.0-py310h936d840_0.conda - sha256: 301dbd28a46fe52f6c1d7c2c713672d8e3ce652fd3df6e97c12e161d1f748086 - md5: 474c329abe2ac606f4cc04773cca8e55 + url: https://conda.anaconda.org/conda-forge/osx-64/vl-convert-python-1.7.0-py310h837254d_1.conda + sha256: 15f90c3399e5cc82f260892c286781b8352c3501a6ac2b6f58af528a14dee1a3 + md5: af4b5450186271598fdd027049d19db8 depends: - __osx >=10.13 - python >=3.10,<3.11.0a0 @@ -15615,38 +15904,38 @@ packages: - __osx >=10.13 license: BSD-3-Clause license_family: BSD - size: 21172242 - timestamp: 1722902056741 + size: 21704033 + timestamp: 1728158827629 - kind: conda name: vl-convert-python - version: 1.6.0 - build: py310ha6dd24b_0 - subdir: osx-arm64 - url: https://conda.anaconda.org/conda-forge/osx-arm64/vl-convert-python-1.6.0-py310ha6dd24b_0.conda - sha256: 441d5fe773854e34c3018ce982bc25a55a951e9b50b18a25689afac07e0ca54b - md5: 5446a027ac5a4ea7f3e95cc42717b9d4 + version: 1.7.0 + build: py310ha75aee5_1 + build_number: 1 + subdir: linux-64 + url: https://conda.anaconda.org/conda-forge/linux-64/vl-convert-python-1.7.0-py310ha75aee5_1.conda + sha256: f12e36c2787ab902f40268a17b35846bf842105329083de78ec78bb19d8c7621 + md5: 120c480ae62b2f896b87195197f47d48 depends: - - __osx >=11.0 + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 - python >=3.10,<3.11.0a0 - - python >=3.10,<3.11.0a0 *_cpython - python_abi 3.10.* *_cp310 constrains: - - __osx >=10.13 + - __glibc >=2.17 license: BSD-3-Clause license_family: BSD - size: 20412409 - timestamp: 1722901289142 + size: 23182077 + timestamp: 1728158798404 - kind: conda name: vl-convert-python - version: 1.6.0 - build: py310hb47754f_0 + version: 1.7.0 + build: py310hdfd1e6a_1 + build_number: 1 subdir: win-64 - url: https://conda.anaconda.org/conda-forge/win-64/vl-convert-python-1.6.0-py310hb47754f_0.conda - sha256: fb55fad4b37b1aaa0294574c3ce0b724a3770c9d61ee9916ef79db2ef0d862d2 - md5: 7890e67278c0dacbd5c65d5e32f7cfe4 + url: https://conda.anaconda.org/conda-forge/win-64/vl-convert-python-1.7.0-py310hdfd1e6a_1.conda + sha256: 14b8983c1b0d69a441ce1546c9d17674e7ef9471adcd4cc143f468fc6eba7802 + md5: e38de8dc3950940b624ba1b3117801ab depends: - - m2w64-gcc-libs - - m2w64-gcc-libs-core - python >=3.10,<3.11.0a0 - python_abi 3.10.* *_cp310 - ucrt >=10.0.20348.0 @@ -15654,8 +15943,8 @@ packages: - vc14_runtime >=14.40.33810 license: BSD-3-Clause license_family: BSD - size: 21875382 - timestamp: 1722903742368 + size: 22858430 + timestamp: 1728160711471 - kind: conda name: voila version: 0.5.0 diff --git a/pixi.toml b/pixi.toml index 8f2c13844..219ba2387 100644 --- a/pixi.toml +++ b/pixi.toml @@ -29,6 +29,11 @@ dev-py = { cmd = [ # Build Python packages build-py = { cmd = "maturin build -m vegafusion-python/Cargo.toml --release --strip --sdist $0" } +fmt-py = { cmd = "ruff format", cwd="vegafusion-python" } +lint-fix-py = { cmd = "ruff format && ruff check --fix", cwd="vegafusion-python" } +lint-check-py = { cmd = "ruff format --check && ruff check", cwd="vegafusion-python" } +type-check-py = { cmd = "mypy", cwd="vegafusion-python" } + # test python install-chromedriver-auto = """ pip cache remove chromedriver-binary-auto && @@ -121,6 +126,10 @@ openjdk = "20.0.0.*" build = "0.7.0.*" minio-server = "2023.9.23.3.47.50.*" minio = "7.1.17.*" +rust = "1.80.*" +taplo = ">=0.9.3,<0.10" +ruff = ">=0.6.9,<0.7" +mypy = ">=1.11.2,<2" # Dependencies are those required at runtime by the Python packages [dependencies] @@ -130,11 +139,10 @@ pandas = "2.0.3.*" altair = "5.4.*" protobuf = "4.23.3.*" ipywidgets = "8.1.0.*" -rust = "1.80.*" -vl-convert-python = "1.6.*" +vl-convert-python = "1.7.*" anywidget = ">=0.9.6,<0.10" polars = "1.8.*" -taplo = ">=0.9.3,<0.10" +grpcio = ">=1.56.2,<2" [target.osx-arm64.build-dependencies] # These dependencies are for building node canvas from source on apple silicon diff --git a/vegafusion-python/.mypy.ini b/vegafusion-python/.mypy.ini new file mode 100644 index 000000000..93457ac65 --- /dev/null +++ b/vegafusion-python/.mypy.ini @@ -0,0 +1,7 @@ +[mypy] +python_version = 3.9 +cache_dir = .mypy_cache +strict = true +files = vegafusion/ + +ignore_missing_imports = true \ No newline at end of file diff --git a/vegafusion-python/pyproject.toml b/vegafusion-python/pyproject.toml index c679e9f43..a8dbfe51e 100644 --- a/vegafusion-python/pyproject.toml +++ b/vegafusion-python/pyproject.toml @@ -3,14 +3,14 @@ name = "vegafusion" version = "1.6.9" description = "Core tools for using VegaFusion from Python" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["vega", "altair", "vegafusion", "arrow"] classifiers = [ - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Visualization", ] @@ -32,7 +32,3 @@ Documentation = "https://vegafusion.io" [tool.maturin] module-name = "vegafusion._vegafusion" - -[tool.black] -line-length = 88 -target_version = ["py38"] diff --git a/vegafusion-python/ruff.toml b/vegafusion-python/ruff.toml new file mode 100644 index 000000000..e5e302c5e --- /dev/null +++ b/vegafusion-python/ruff.toml @@ -0,0 +1,65 @@ +line-length = 88 +indent-width = 4 +include = ["vegafusion/**/*.py", "tests/**/*.py"] +exclude = ["vegafusion/datasource/_dfi_types.py", "tests/altair_mocks/**/*.py"] +target-version = "py39" + +[lint] +select = [ + "E", # pycodestyle Error + "F", # Pyflakes + "W", # pycodestyle Warning + "I", # isort + "N", # PEP8 Naming + "UP", # pyupgrade + "YTT", # flake8-2020 + "ANN", # flake8-annotations + "S", # flake8-bandit + "BLE", # flake8-blind-except + "B", # flake8-bugbear + "A", # flake8-annotations-complexity + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "FA", # flake8-future-annotations +] + +ignore = [ + # Allow assert statements + "S101", + + # Rules that conflict with other rules + "D211", # No blank lines allowed before class docstring (no-blank-line-before-class-docstring) + "D213", # Multi-line docstring summary should start at the second line (multi-line-docstring-summary-start) + "EM101", # Exception must not use an string literal + "EM102", # Exception must not use an f-string literal + + # Not compatible with Python 3.9. Re-enable when upgrading to Python 3.10+ + "UP007", # Using old-style union syntax (e.g., Union[int, str] instead of int | str) + "UP018", # Using non-native literals (e.g., using 'int' instead of 'Literal[int]') + + # Deprecated and not useful + "ANN101", # Missing type annotation for self in method (missing-type-annotation-for-self) + "ANN102", # Missing type annotation for class (missing-type-cls) + + "S608", # SQL injection + + # Conflicting lint rules with Ruff formatter + "W191", # Tab indentation + "E111", # Indentation with invalid multiple + "E114", # Indentation with invalid multiple comment + "E117", # Over-indented + "D206", # Indent with spaces + "D300", # Triple single quotes + "Q000", # Bad quotes inline string + "Q001", # Bad quotes multiline string + "Q002", # Bad quotes docstring + "Q003", # Avoidable escaped quote + "COM812", # Missing trailing comma + "COM819", # Prohibited trailing comma + "ISC001", # Single-line implicit string concatenation + "ISC002", # Multi-line implicit string concatenation +] + +[lint.per-file-ignores] +# Ignore some lint checks in test directory +"tests/*" = ["D", "ANN", "S", "I", "E501"] diff --git a/vegafusion-python/tests/altair_mocks/area/cumulative_count/mock.py b/vegafusion-python/tests/altair_mocks/area/cumulative_count/mock.py index 15ab3a428..74cd7df64 100644 --- a/vegafusion-python/tests/altair_mocks/area/cumulative_count/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/cumulative_count/mock.py @@ -8,7 +8,4 @@ alt.Chart(source).transform_window( cumulative_count="count()", sort=[{"field": "IMDB_Rating"}], -).mark_area().encode( - x="IMDB_Rating:Q", - y="cumulative_count:Q" -) +).mark_area().encode(x="IMDB_Rating:Q", y="cumulative_count:Q") diff --git a/vegafusion-python/tests/altair_mocks/area/density_facet/mock.py b/vegafusion-python/tests/altair_mocks/area/density_facet/mock.py index 30bf3d986..6c46d607f 100644 --- a/vegafusion-python/tests/altair_mocks/area/density_facet/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/density_facet/mock.py @@ -5,18 +5,10 @@ source = data.iris() alt.Chart(source).transform_fold( - ['petalWidth', - 'petalLength', - 'sepalWidth', - 'sepalLength'], - as_ = ['Measurement_type', 'value'] + ["petalWidth", "petalLength", "sepalWidth", "sepalLength"], + as_=["Measurement_type", "value"], ).transform_density( - density='value', - bandwidth=0.3, - groupby=['Measurement_type'], - extent= [0, 8] + density="value", bandwidth=0.3, groupby=["Measurement_type"], extent=[0, 8] ).mark_area().encode( - alt.X('value:Q'), - alt.Y('density:Q'), - alt.Row('Measurement_type:N') -).properties(width=300, height=50) \ No newline at end of file + alt.X("value:Q"), alt.Y("density:Q"), alt.Row("Measurement_type:N") +).properties(width=300, height=50) diff --git a/vegafusion-python/tests/altair_mocks/area/density_stack/mock.py b/vegafusion-python/tests/altair_mocks/area/density_stack/mock.py index ad0d0273e..40e87546a 100644 --- a/vegafusion-python/tests/altair_mocks/area/density_stack/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/density_stack/mock.py @@ -6,20 +6,15 @@ source = data.iris() alt.Chart(source).transform_fold( - ['petalWidth', - 'petalLength', - 'sepalWidth', - 'sepalLength'], - as_ = ['Measurement_type', 'value'] + ["petalWidth", "petalLength", "sepalWidth", "sepalLength"], + as_=["Measurement_type", "value"], ).transform_density( - density='value', + density="value", bandwidth=0.3, - groupby=['Measurement_type'], - extent= [0, 8], - counts = True, - steps=200 + groupby=["Measurement_type"], + extent=[0, 8], + counts=True, + steps=200, ).mark_area().encode( - alt.X('value:Q'), - alt.Y('density:Q', stack='zero'), - alt.Color('Measurement_type:N') + alt.X("value:Q"), alt.Y("density:Q", stack="zero"), alt.Color("Measurement_type:N") ).properties(width=400, height=100) diff --git a/vegafusion-python/tests/altair_mocks/area/gradient/mock.py b/vegafusion-python/tests/altair_mocks/area/gradient/mock.py index d2b63bc5e..e73f5d5dc 100644 --- a/vegafusion-python/tests/altair_mocks/area/gradient/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/gradient/mock.py @@ -5,20 +5,17 @@ source = data.stocks() -alt.Chart(source).transform_filter( - 'datum.symbol==="GOOG"' -).mark_area( - line={'color':'darkgreen'}, +alt.Chart(source).transform_filter('datum.symbol==="GOOG"').mark_area( + line={"color": "darkgreen"}, color=alt.Gradient( - gradient='linear', - stops=[alt.GradientStop(color='white', offset=0), - alt.GradientStop(color='darkgreen', offset=1)], + gradient="linear", + stops=[ + alt.GradientStop(color="white", offset=0), + alt.GradientStop(color="darkgreen", offset=1), + ], x1=1, x2=1, y1=1, - y2=0 - ) -).encode( - alt.X('date:T'), - alt.Y('price:Q') -) + y2=0, + ), +).encode(alt.X("date:T"), alt.Y("price:Q")) diff --git a/vegafusion-python/tests/altair_mocks/area/horizon_graph/mock.py b/vegafusion-python/tests/altair_mocks/area/horizon_graph/mock.py index 0a628a76a..447866ad4 100644 --- a/vegafusion-python/tests/altair_mocks/area/horizon_graph/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/horizon_graph/mock.py @@ -3,35 +3,44 @@ import altair as alt import pandas as pd -source = pd.DataFrame([ - {"x": 1, "y": 28}, {"x": 2, "y": 55}, - {"x": 3, "y": 43}, {"x": 4, "y": 91}, - {"x": 5, "y": 81}, {"x": 6, "y": 53}, - {"x": 7, "y": 19}, {"x": 8, "y": 87}, - {"x": 9, "y": 52}, {"x": 10, "y": 48}, - {"x": 11, "y": 24}, {"x": 12, "y": 49}, - {"x": 13, "y": 87}, {"x": 14, "y": 66}, - {"x": 15, "y": 17}, {"x": 16, "y": 27}, - {"x": 17, "y": 68}, {"x": 18, "y": 16}, - {"x": 19, "y": 49}, {"x": 20, "y": 15} -]) +source = pd.DataFrame( + [ + {"x": 1, "y": 28}, + {"x": 2, "y": 55}, + {"x": 3, "y": 43}, + {"x": 4, "y": 91}, + {"x": 5, "y": 81}, + {"x": 6, "y": 53}, + {"x": 7, "y": 19}, + {"x": 8, "y": 87}, + {"x": 9, "y": 52}, + {"x": 10, "y": 48}, + {"x": 11, "y": 24}, + {"x": 12, "y": 49}, + {"x": 13, "y": 87}, + {"x": 14, "y": 66}, + {"x": 15, "y": 17}, + {"x": 16, "y": 27}, + {"x": 17, "y": 68}, + {"x": 18, "y": 16}, + {"x": 19, "y": 49}, + {"x": 20, "y": 15}, + ] +) -area1 = alt.Chart(source).mark_area( - clip=True, - interpolate='monotone' -).encode( - alt.X('x', scale=alt.Scale(zero=False, nice=False)), - alt.Y('y', scale=alt.Scale(domain=[0, 50]), title='y'), - opacity=alt.value(0.6) -).properties( - width=500, - height=75 +area1 = ( + alt.Chart(source) + .mark_area(clip=True, interpolate="monotone") + .encode( + alt.X("x", scale=alt.Scale(zero=False, nice=False)), + alt.Y("y", scale=alt.Scale(domain=[0, 50]), title="y"), + opacity=alt.value(0.6), + ) + .properties(width=500, height=75) ) area2 = area1.encode( - alt.Y('ny:Q', scale=alt.Scale(domain=[0, 50])) -).transform_calculate( - "ny", alt.datum.y - 50 -) + alt.Y("ny:Q", scale=alt.Scale(domain=[0, 50])) +).transform_calculate("ny", alt.datum.y - 50) area1 + area2 diff --git a/vegafusion-python/tests/altair_mocks/area/layered/mock.py b/vegafusion-python/tests/altair_mocks/area/layered/mock.py index 3fa018c35..34c26d991 100644 --- a/vegafusion-python/tests/altair_mocks/area/layered/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/layered/mock.py @@ -6,7 +6,5 @@ source = data.iowa_electricity() alt.Chart(source).mark_area(opacity=0.3).encode( - x="year:T", - y=alt.Y("net_generation:Q", stack=None), - color="source:N" + x="year:T", y=alt.Y("net_generation:Q", stack=None), color="source:N" ) diff --git a/vegafusion-python/tests/altair_mocks/area/normalized_stacked/mock.py b/vegafusion-python/tests/altair_mocks/area/normalized_stacked/mock.py index a0e5b4cf7..f4410e72b 100644 --- a/vegafusion-python/tests/altair_mocks/area/normalized_stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/normalized_stacked/mock.py @@ -6,7 +6,5 @@ source = data.iowa_electricity() alt.Chart(source).mark_area().encode( - x="year:T", - y=alt.Y("net_generation:Q", stack="normalize"), - color="source:N" + x="year:T", y=alt.Y("net_generation:Q", stack="normalize"), color="source:N" ) diff --git a/vegafusion-python/tests/altair_mocks/area/streamgraph/mock.py b/vegafusion-python/tests/altair_mocks/area/streamgraph/mock.py index 56624680b..fdbebc65a 100644 --- a/vegafusion-python/tests/altair_mocks/area/streamgraph/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/streamgraph/mock.py @@ -6,11 +6,7 @@ source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( - alt.X('yearmonth(date):T', - axis=alt.Axis(format='%Y', domain=False, tickSize=0) - ), - alt.Y('sum(count):Q', stack='center', axis=None), - alt.Color('series:N', - scale=alt.Scale(scheme='category20b') - ) + alt.X("yearmonth(date):T", axis=alt.Axis(format="%Y", domain=False, tickSize=0)), + alt.Y("sum(count):Q", stack="center", axis=None), + alt.Color("series:N", scale=alt.Scale(scheme="category20b")), ).interactive() diff --git a/vegafusion-python/tests/altair_mocks/area/trellis/mock.py b/vegafusion-python/tests/altair_mocks/area/trellis/mock.py index 6c121a984..bb4855a19 100644 --- a/vegafusion-python/tests/altair_mocks/area/trellis/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/trellis/mock.py @@ -6,10 +6,5 @@ source = data.iowa_electricity() alt.Chart(source).mark_area().encode( - x="year:T", - y="net_generation:Q", - color="source:N", - row="source:N" -).properties( - height=100 -) + x="year:T", y="net_generation:Q", color="source:N", row="source:N" +).properties(height=100) diff --git a/vegafusion-python/tests/altair_mocks/area/trellis_sort_array/mock.py b/vegafusion-python/tests/altair_mocks/area/trellis_sort_array/mock.py index 25e3f4c25..4cc384fda 100644 --- a/vegafusion-python/tests/altair_mocks/area/trellis_sort_array/mock.py +++ b/vegafusion-python/tests/altair_mocks/area/trellis_sort_array/mock.py @@ -5,11 +5,9 @@ source = data.stocks() -alt.Chart(source).transform_filter( - alt.datum.symbol != 'GOOG' -).mark_area().encode( - x='date:T', - y='price:Q', - color='symbol:N', - row=alt.Row('symbol:N', sort=['MSFT', 'AAPL', 'IBM', 'AMZN']) +alt.Chart(source).transform_filter(alt.datum.symbol != "GOOG").mark_area().encode( + x="date:T", + y="price:Q", + color="symbol:N", + row=alt.Row("symbol:N", sort=["MSFT", "AAPL", "IBM", "AMZN"]), ).properties(height=50, width=400) diff --git a/vegafusion-python/tests/altair_mocks/bar/and_tick_chart/mock.py b/vegafusion-python/tests/altair_mocks/bar/and_tick_chart/mock.py index b2669dd38..7ad42dd90 100644 --- a/vegafusion-python/tests/altair_mocks/bar/and_tick_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/and_tick_chart/mock.py @@ -3,26 +3,31 @@ import altair as alt import pandas as pd -source = pd.DataFrame({ - 'project': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], - 'score': [25, 57, 23, 19, 8, 47, 8], - 'goal': [25, 47, 30, 27, 38, 19, 4] -}) +source = pd.DataFrame( + { + "project": ["a", "b", "c", "d", "e", "f", "g"], + "score": [25, 57, 23, 19, 8, 47, 8], + "goal": [25, 47, 30, 27, 38, 19, 4], + } +) -bar = alt.Chart(source).mark_bar().encode( - x='project', - y='score' -).properties( - width=alt.Step(40) # controls width of bar. +bar = ( + alt.Chart(source) + .mark_bar() + .encode(x="project", y="score") + .properties( + width=alt.Step(40) # controls width of bar. + ) ) -tick = alt.Chart(source).mark_tick( - color='red', - thickness=2, - size=40 * 0.9, # controls width of tick. -).encode( - x='project', - y='goal' +tick = ( + alt.Chart(source) + .mark_tick( + color="red", + thickness=2, + size=40 * 0.9, # controls width of tick. + ) + .encode(x="project", y="goal") ) -bar + tick \ No newline at end of file +bar + tick diff --git a/vegafusion-python/tests/altair_mocks/bar/diverging_stacked/mock.py b/vegafusion-python/tests/altair_mocks/bar/diverging_stacked/mock.py index 7e1bd976f..a61d1b37d 100644 --- a/vegafusion-python/tests/altair_mocks/bar/diverging_stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/diverging_stacked/mock.py @@ -3,335 +3,330 @@ import altair as alt import pandas as pd -source = pd.DataFrame([ - { - "question": "Question 1", - "type": "Strongly disagree", - "value": 24, - "percentage": 0.7, - "percentage_start": -19.1, - "percentage_end": -18.4 - }, - { - "question": "Question 1", - "type": "Disagree", - "value": 294, - "percentage": 9.1, - "percentage_start": -18.4, - "percentage_end": -9.2 - }, - { - "question": "Question 1", - "type": "Neither agree nor disagree", - "value": 594, - "percentage": 18.5, - "percentage_start": -9.2, - "percentage_end": 9.2 - }, - { - "question": "Question 1", - "type": "Agree", - "value": 1927, - "percentage": 59.9, - "percentage_start": 9.2, - "percentage_end": 69.2 - }, - { - "question": "Question 1", - "type": "Strongly agree", - "value": 376, - "percentage": 11.7, - "percentage_start": 69.2, - "percentage_end": 80.9 - }, - - { - "question": "Question 2", - "type": "Strongly disagree", - "value": 2, - "percentage": 18.2, - "percentage_start": -36.4, - "percentage_end": -18.2 - }, - { - "question": "Question 2", - "type": "Disagree", - "value": 2, - "percentage": 18.2, - "percentage_start": -18.2, - "percentage_end": 0 - }, - { - "question": "Question 2", - "type": "Neither agree nor disagree", - "value": 0, - "percentage": 0, - "percentage_start": 0, - "percentage_end": 0 - }, - { - "question": "Question 2", - "type": "Agree", - "value": 7, - "percentage": 63.6, - "percentage_start": 0, - "percentage_end": 63.6 - }, - { - "question": "Question 2", - "type": "Strongly agree", - "value": 11, - "percentage": 0, - "percentage_start": 63.6, - "percentage_end": 63.6 - }, - - { - "question": "Question 3", - "type": "Strongly disagree", - "value": 2, - "percentage": 20, - "percentage_start": -30, - "percentage_end": -10 - }, - { - "question": "Question 3", - "type": "Disagree", - "value": 0, - "percentage": 0, - "percentage_start": -10, - "percentage_end": -10 - }, - { - "question": "Question 3", - "type": "Neither agree nor disagree", - "value": 2, - "percentage": 20, - "percentage_start": -10, - "percentage_end": 10 - }, - { - "question": "Question 3", - "type": "Agree", - "value": 4, - "percentage": 40, - "percentage_start": 10, - "percentage_end": 50 - }, - { - "question": "Question 3", - "type": "Strongly agree", - "value": 2, - "percentage": 20, - "percentage_start": 50, - "percentage_end": 70 - }, - - { - "question": "Question 4", - "type": "Strongly disagree", - "value": 0, - "percentage": 0, - "percentage_start": -15.6, - "percentage_end": -15.6 - }, - { - "question": "Question 4", - "type": "Disagree", - "value": 2, - "percentage": 12.5, - "percentage_start": -15.6, - "percentage_end": -3.1 - }, - { - "question": "Question 4", - "type": "Neither agree nor disagree", - "value": 1, - "percentage": 6.3, - "percentage_start": -3.1, - "percentage_end": 3.1 - }, - { - "question": "Question 4", - "type": "Agree", - "value": 7, - "percentage": 43.8, - "percentage_start": 3.1, - "percentage_end": 46.9 - }, - { - "question": "Question 4", - "type": "Strongly agree", - "value": 6, - "percentage": 37.5, - "percentage_start": 46.9, - "percentage_end": 84.4 - }, - - { - "question": "Question 5", - "type": "Strongly disagree", - "value": 0, - "percentage": 0, - "percentage_start": -10.4, - "percentage_end": -10.4 - }, - { - "question": "Question 5", - "type": "Disagree", - "value": 1, - "percentage": 4.2, - "percentage_start": -10.4, - "percentage_end": -6.3 - }, - { - "question": "Question 5", - "type": "Neither agree nor disagree", - "value": 3, - "percentage": 12.5, - "percentage_start": -6.3, - "percentage_end": 6.3 - }, - { - "question": "Question 5", - "type": "Agree", - "value": 16, - "percentage": 66.7, - "percentage_start": 6.3, - "percentage_end": 72.9 - }, - { - "question": "Question 5", - "type": "Strongly agree", - "value": 4, - "percentage": 16.7, - "percentage_start": 72.9, - "percentage_end": 89.6 - }, - - { - "question": "Question 6", - "type": "Strongly disagree", - "value": 1, - "percentage": 6.3, - "percentage_start": -18.8, - "percentage_end": -12.5 - }, - { - "question": "Question 6", - "type": "Disagree", - "value": 1, - "percentage": 6.3, - "percentage_start": -12.5, - "percentage_end": -6.3 - }, - { - "question": "Question 6", - "type": "Neither agree nor disagree", - "value": 2, - "percentage": 12.5, - "percentage_start": -6.3, - "percentage_end": 6.3 - }, - { - "question": "Question 6", - "type": "Agree", - "value": 9, - "percentage": 56.3, - "percentage_start": 6.3, - "percentage_end": 62.5 - }, - { - "question": "Question 6", - "type": "Strongly agree", - "value": 3, - "percentage": 18.8, - "percentage_start": 62.5, - "percentage_end": 81.3 - }, - - { - "question": "Question 7", - "type": "Strongly disagree", - "value": 0, - "percentage": 0, - "percentage_start": -10, - "percentage_end": -10 - }, - { - "question": "Question 7", - "type": "Disagree", - "value": 0, - "percentage": 0, - "percentage_start": -10, - "percentage_end": -10 - }, - { - "question": "Question 7", - "type": "Neither agree nor disagree", - "value": 1, - "percentage": 20, - "percentage_start": -10, - "percentage_end": 10 - }, - { - "question": "Question 7", - "type": "Agree", - "value": 4, - "percentage": 80, - "percentage_start": 10, - "percentage_end": 90 - }, - { - "question": "Question 7", - "type": "Strongly agree", - "value": 0, - "percentage": 0, - "percentage_start": 90, - "percentage_end": 90 - }, - - { - "question": "Question 8", - "type": "Strongly disagree", - "value": 0, - "percentage": 0, - "percentage_start": 0, - "percentage_end": 0 - }, - { - "question": "Question 8", - "type": "Disagree", - "value": 0, - "percentage": 0, - "percentage_start": 0, - "percentage_end": 0 - }, - { - "question": "Question 8", - "type": "Neither agree nor disagree", - "value": 0, - "percentage": 0, - "percentage_start": 0, - "percentage_end": 0 - }, - { - "question": "Question 8", - "type": "Agree", - "value": 0, - "percentage": 0, - "percentage_start": 0, - "percentage_end": 0 - }, - { - "question": "Question 8", - "type": "Strongly agree", - "value": 2, - "percentage": 100, - "percentage_start": 0, - "percentage_end": 100 - } -]) +source = pd.DataFrame( + [ + { + "question": "Question 1", + "type": "Strongly disagree", + "value": 24, + "percentage": 0.7, + "percentage_start": -19.1, + "percentage_end": -18.4, + }, + { + "question": "Question 1", + "type": "Disagree", + "value": 294, + "percentage": 9.1, + "percentage_start": -18.4, + "percentage_end": -9.2, + }, + { + "question": "Question 1", + "type": "Neither agree nor disagree", + "value": 594, + "percentage": 18.5, + "percentage_start": -9.2, + "percentage_end": 9.2, + }, + { + "question": "Question 1", + "type": "Agree", + "value": 1927, + "percentage": 59.9, + "percentage_start": 9.2, + "percentage_end": 69.2, + }, + { + "question": "Question 1", + "type": "Strongly agree", + "value": 376, + "percentage": 11.7, + "percentage_start": 69.2, + "percentage_end": 80.9, + }, + { + "question": "Question 2", + "type": "Strongly disagree", + "value": 2, + "percentage": 18.2, + "percentage_start": -36.4, + "percentage_end": -18.2, + }, + { + "question": "Question 2", + "type": "Disagree", + "value": 2, + "percentage": 18.2, + "percentage_start": -18.2, + "percentage_end": 0, + }, + { + "question": "Question 2", + "type": "Neither agree nor disagree", + "value": 0, + "percentage": 0, + "percentage_start": 0, + "percentage_end": 0, + }, + { + "question": "Question 2", + "type": "Agree", + "value": 7, + "percentage": 63.6, + "percentage_start": 0, + "percentage_end": 63.6, + }, + { + "question": "Question 2", + "type": "Strongly agree", + "value": 11, + "percentage": 0, + "percentage_start": 63.6, + "percentage_end": 63.6, + }, + { + "question": "Question 3", + "type": "Strongly disagree", + "value": 2, + "percentage": 20, + "percentage_start": -30, + "percentage_end": -10, + }, + { + "question": "Question 3", + "type": "Disagree", + "value": 0, + "percentage": 0, + "percentage_start": -10, + "percentage_end": -10, + }, + { + "question": "Question 3", + "type": "Neither agree nor disagree", + "value": 2, + "percentage": 20, + "percentage_start": -10, + "percentage_end": 10, + }, + { + "question": "Question 3", + "type": "Agree", + "value": 4, + "percentage": 40, + "percentage_start": 10, + "percentage_end": 50, + }, + { + "question": "Question 3", + "type": "Strongly agree", + "value": 2, + "percentage": 20, + "percentage_start": 50, + "percentage_end": 70, + }, + { + "question": "Question 4", + "type": "Strongly disagree", + "value": 0, + "percentage": 0, + "percentage_start": -15.6, + "percentage_end": -15.6, + }, + { + "question": "Question 4", + "type": "Disagree", + "value": 2, + "percentage": 12.5, + "percentage_start": -15.6, + "percentage_end": -3.1, + }, + { + "question": "Question 4", + "type": "Neither agree nor disagree", + "value": 1, + "percentage": 6.3, + "percentage_start": -3.1, + "percentage_end": 3.1, + }, + { + "question": "Question 4", + "type": "Agree", + "value": 7, + "percentage": 43.8, + "percentage_start": 3.1, + "percentage_end": 46.9, + }, + { + "question": "Question 4", + "type": "Strongly agree", + "value": 6, + "percentage": 37.5, + "percentage_start": 46.9, + "percentage_end": 84.4, + }, + { + "question": "Question 5", + "type": "Strongly disagree", + "value": 0, + "percentage": 0, + "percentage_start": -10.4, + "percentage_end": -10.4, + }, + { + "question": "Question 5", + "type": "Disagree", + "value": 1, + "percentage": 4.2, + "percentage_start": -10.4, + "percentage_end": -6.3, + }, + { + "question": "Question 5", + "type": "Neither agree nor disagree", + "value": 3, + "percentage": 12.5, + "percentage_start": -6.3, + "percentage_end": 6.3, + }, + { + "question": "Question 5", + "type": "Agree", + "value": 16, + "percentage": 66.7, + "percentage_start": 6.3, + "percentage_end": 72.9, + }, + { + "question": "Question 5", + "type": "Strongly agree", + "value": 4, + "percentage": 16.7, + "percentage_start": 72.9, + "percentage_end": 89.6, + }, + { + "question": "Question 6", + "type": "Strongly disagree", + "value": 1, + "percentage": 6.3, + "percentage_start": -18.8, + "percentage_end": -12.5, + }, + { + "question": "Question 6", + "type": "Disagree", + "value": 1, + "percentage": 6.3, + "percentage_start": -12.5, + "percentage_end": -6.3, + }, + { + "question": "Question 6", + "type": "Neither agree nor disagree", + "value": 2, + "percentage": 12.5, + "percentage_start": -6.3, + "percentage_end": 6.3, + }, + { + "question": "Question 6", + "type": "Agree", + "value": 9, + "percentage": 56.3, + "percentage_start": 6.3, + "percentage_end": 62.5, + }, + { + "question": "Question 6", + "type": "Strongly agree", + "value": 3, + "percentage": 18.8, + "percentage_start": 62.5, + "percentage_end": 81.3, + }, + { + "question": "Question 7", + "type": "Strongly disagree", + "value": 0, + "percentage": 0, + "percentage_start": -10, + "percentage_end": -10, + }, + { + "question": "Question 7", + "type": "Disagree", + "value": 0, + "percentage": 0, + "percentage_start": -10, + "percentage_end": -10, + }, + { + "question": "Question 7", + "type": "Neither agree nor disagree", + "value": 1, + "percentage": 20, + "percentage_start": -10, + "percentage_end": 10, + }, + { + "question": "Question 7", + "type": "Agree", + "value": 4, + "percentage": 80, + "percentage_start": 10, + "percentage_end": 90, + }, + { + "question": "Question 7", + "type": "Strongly agree", + "value": 0, + "percentage": 0, + "percentage_start": 90, + "percentage_end": 90, + }, + { + "question": "Question 8", + "type": "Strongly disagree", + "value": 0, + "percentage": 0, + "percentage_start": 0, + "percentage_end": 0, + }, + { + "question": "Question 8", + "type": "Disagree", + "value": 0, + "percentage": 0, + "percentage_start": 0, + "percentage_end": 0, + }, + { + "question": "Question 8", + "type": "Neither agree nor disagree", + "value": 0, + "percentage": 0, + "percentage_start": 0, + "percentage_end": 0, + }, + { + "question": "Question 8", + "type": "Agree", + "value": 0, + "percentage": 0, + "percentage_start": 0, + "percentage_end": 0, + }, + { + "question": "Question 8", + "type": "Strongly agree", + "value": 2, + "percentage": 100, + "percentage_start": 0, + "percentage_end": 100, + }, + ] +) color_scale = alt.Scale( domain=[ @@ -339,26 +334,20 @@ "Disagree", "Neither agree nor disagree", "Agree", - "Strongly agree" + "Strongly agree", ], - range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"] + range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"], ) -y_axis = alt.Axis( - title='Question', - offset=5, - ticks=False, - minExtent=60, - domain=False -) +y_axis = alt.Axis(title="Question", offset=5, ticks=False, minExtent=60, domain=False) alt.Chart(source).mark_bar().encode( - x='percentage_start:Q', - x2='percentage_end:Q', - y=alt.Y('question:N', axis=y_axis), + x="percentage_start:Q", + x2="percentage_end:Q", + y=alt.Y("question:N", axis=y_axis), color=alt.Color( - 'type:N', - legend=alt.Legend( title='Response'), + "type:N", + legend=alt.Legend(title="Response"), scale=color_scale, - ) -) \ No newline at end of file + ), +) diff --git a/vegafusion-python/tests/altair_mocks/bar/grouped/mock.py b/vegafusion-python/tests/altair_mocks/bar/grouped/mock.py index 140c9edfe..5701df3f8 100644 --- a/vegafusion-python/tests/altair_mocks/bar/grouped/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/grouped/mock.py @@ -6,8 +6,5 @@ source = data.barley() alt.Chart(source).mark_bar().encode( - x='year:O', - y='sum(yield):Q', - color='year:N', - column='site:N' + x="year:O", y="sum(yield):Q", color="year:N", column="site:N" ) diff --git a/vegafusion-python/tests/altair_mocks/bar/horizontal/mock.py b/vegafusion-python/tests/altair_mocks/bar/horizontal/mock.py index e56bee082..f3a7fd554 100644 --- a/vegafusion-python/tests/altair_mocks/bar/horizontal/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/horizontal/mock.py @@ -6,7 +6,4 @@ source = data.wheat() -alt.Chart(source).mark_bar().encode( - x='wheat:Q', - y="year:O" -).properties(height=500) +alt.Chart(source).mark_bar().encode(x="wheat:Q", y="year:O").properties(height=500) diff --git a/vegafusion-python/tests/altair_mocks/bar/horizontal_grouped/mock.py b/vegafusion-python/tests/altair_mocks/bar/horizontal_grouped/mock.py index 5ef536506..9e3008f6b 100644 --- a/vegafusion-python/tests/altair_mocks/bar/horizontal_grouped/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/horizontal_grouped/mock.py @@ -6,8 +6,5 @@ source = data.barley() alt.Chart(source).mark_bar().encode( - x='sum(yield):Q', - y='year:O', - color='year:N', - row='site:N' + x="sum(yield):Q", y="year:O", color="year:N", row="site:N" ) diff --git a/vegafusion-python/tests/altair_mocks/bar/horizontal_stacked/mock.py b/vegafusion-python/tests/altair_mocks/bar/horizontal_stacked/mock.py index d0b56c4fa..db73101fd 100644 --- a/vegafusion-python/tests/altair_mocks/bar/horizontal_stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/horizontal_stacked/mock.py @@ -5,8 +5,4 @@ source = data.barley() -alt.Chart(source).mark_bar().encode( - x='sum(yield)', - y='variety', - color='site' -) +alt.Chart(source).mark_bar().encode(x="sum(yield)", y="variety", color="site") diff --git a/vegafusion-python/tests/altair_mocks/bar/layered/mock.py b/vegafusion-python/tests/altair_mocks/bar/layered/mock.py index f82caed12..85c3ec70c 100644 --- a/vegafusion-python/tests/altair_mocks/bar/layered/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/layered/mock.py @@ -6,7 +6,7 @@ source = data.iowa_electricity() alt.Chart(source).mark_bar(opacity=0.7).encode( - x='year:T', - y=alt.Y('net_generation:Q', stack=None), + x="year:T", + y=alt.Y("net_generation:Q", stack=None), color="source", ) diff --git a/vegafusion-python/tests/altair_mocks/bar/normalized_stacked/mock.py b/vegafusion-python/tests/altair_mocks/bar/normalized_stacked/mock.py index 2083cdce4..f3e54bd02 100644 --- a/vegafusion-python/tests/altair_mocks/bar/normalized_stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/normalized_stacked/mock.py @@ -6,7 +6,5 @@ source = data.barley() alt.Chart(source).mark_bar().encode( - x=alt.X('sum(yield)', stack="normalize"), - y='variety', - color='site' + x=alt.X("sum(yield)", stack="normalize"), y="variety", color="site" ) diff --git a/vegafusion-python/tests/altair_mocks/bar/percentage_of_total/mock.py b/vegafusion-python/tests/altair_mocks/bar/percentage_of_total/mock.py index 9c232c665..65c3620fe 100644 --- a/vegafusion-python/tests/altair_mocks/bar/percentage_of_total/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/percentage_of_total/mock.py @@ -3,14 +3,15 @@ import altair as alt import pandas as pd -source = pd.DataFrame({'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'], - 'Time': [8, 2, 4, 8, 2]}) +source = pd.DataFrame( + { + "Activity": ["Sleeping", "Eating", "TV", "Work", "Exercise"], + "Time": [8, 2, 4, 8, 2], + } +) alt.Chart(source).transform_joinaggregate( - TotalTime='sum(Time)', -).transform_calculate( - PercentOfTotal="datum.Time / datum.TotalTime" -).mark_bar().encode( - alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')), - y='Activity:N' + TotalTime="sum(Time)", +).transform_calculate(PercentOfTotal="datum.Time / datum.TotalTime").mark_bar().encode( + alt.X("PercentOfTotal:Q", axis=alt.Axis(format=".0%")), y="Activity:N" ) diff --git a/vegafusion-python/tests/altair_mocks/bar/sorted/mock.py b/vegafusion-python/tests/altair_mocks/bar/sorted/mock.py index abeb51a9f..164e6615d 100644 --- a/vegafusion-python/tests/altair_mocks/bar/sorted/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/sorted/mock.py @@ -5,7 +5,4 @@ source = data.barley() -alt.Chart(source).mark_bar().encode( - x='sum(yield):Q', - y=alt.Y('site:N', sort='-x') -) +alt.Chart(source).mark_bar().encode(x="sum(yield):Q", y=alt.Y("site:N", sort="-x")) diff --git a/vegafusion-python/tests/altair_mocks/bar/stacked/mock.py b/vegafusion-python/tests/altair_mocks/bar/stacked/mock.py index 98bbe52ad..10d37101f 100644 --- a/vegafusion-python/tests/altair_mocks/bar/stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/stacked/mock.py @@ -5,8 +5,4 @@ source = data.barley() -alt.Chart(source).mark_bar().encode( - x='variety', - y='sum(yield)', - color='site' -) \ No newline at end of file +alt.Chart(source).mark_bar().encode(x="variety", y="sum(yield)", color="site") diff --git a/vegafusion-python/tests/altair_mocks/bar/stacked_with_sorted_segments/mock.py b/vegafusion-python/tests/altair_mocks/bar/stacked_with_sorted_segments/mock.py index ad31b8d6b..58553ef15 100644 --- a/vegafusion-python/tests/altair_mocks/bar/stacked_with_sorted_segments/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/stacked_with_sorted_segments/mock.py @@ -6,12 +6,12 @@ source = data.barley() alt.Chart(source).mark_bar().encode( - x='sum(yield)', - y='variety', - color='site', + x="sum(yield)", + y="variety", + color="site", order=alt.Order( # Sort the segments of the bars by this field - 'site', - sort='ascending' - ) + "site", + sort="ascending", + ), ) diff --git a/vegafusion-python/tests/altair_mocks/bar/stacked_with_text_overlay/mock.py b/vegafusion-python/tests/altair_mocks/bar/stacked_with_text_overlay/mock.py index aa44349cd..00c1e315d 100644 --- a/vegafusion-python/tests/altair_mocks/bar/stacked_with_text_overlay/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/stacked_with_text_overlay/mock.py @@ -3,19 +3,27 @@ import altair as alt from vega_datasets import data -source=data.barley() +source = data.barley() -bars = alt.Chart(source).mark_bar().encode( - x=alt.X('sum(yield):Q', stack='zero'), - y=alt.Y('variety:N'), - color=alt.Color('site') +bars = ( + alt.Chart(source) + .mark_bar() + .encode( + x=alt.X("sum(yield):Q", stack="zero"), + y=alt.Y("variety:N"), + color=alt.Color("site"), + ) ) -text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode( - x=alt.X('sum(yield):Q', stack='zero'), - y=alt.Y('variety:N'), - detail='site:N', - text=alt.Text('sum(yield):Q', format='.1f') +text = ( + alt.Chart(source) + .mark_text(dx=-15, dy=3, color="white") + .encode( + x=alt.X("sum(yield):Q", stack="zero"), + y=alt.Y("variety:N"), + detail="site:N", + text=alt.Text("sum(yield):Q", format=".1f"), + ) ) bars + text diff --git a/vegafusion-python/tests/altair_mocks/bar/trellis_compact/mock.py b/vegafusion-python/tests/altair_mocks/bar/trellis_compact/mock.py index d2fac9937..8c53c8249 100644 --- a/vegafusion-python/tests/altair_mocks/bar/trellis_compact/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/trellis_compact/mock.py @@ -43,4 +43,4 @@ ), row=alt.Row("a:N", title="Factor A", header=alt.Header(labelAngle=0)), column=alt.Column("b:N", title="Factor B"), -) \ No newline at end of file +) diff --git a/vegafusion-python/tests/altair_mocks/bar/trellis_stacked/mock.py b/vegafusion-python/tests/altair_mocks/bar/trellis_stacked/mock.py index 8204a6e81..0b777c3a6 100644 --- a/vegafusion-python/tests/altair_mocks/bar/trellis_stacked/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/trellis_stacked/mock.py @@ -6,8 +6,5 @@ source = data.barley() alt.Chart(source).mark_bar().encode( - column='year', - x='yield', - y='variety', - color='site' -).properties(width=220) \ No newline at end of file + column="year", x="yield", y="variety", color="site" +).properties(width=220) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_error_bars/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_error_bars/mock.py index a0f561a73..33c230815 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_error_bars/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_error_bars/mock.py @@ -5,17 +5,16 @@ source = data.barley() -bars = alt.Chart().mark_bar().encode( - x='year:O', - y=alt.Y('mean(yield):Q', title='Mean Yield'), - color='year:N', +bars = ( + alt.Chart() + .mark_bar() + .encode( + x="year:O", + y=alt.Y("mean(yield):Q", title="Mean Yield"), + color="year:N", + ) ) -error_bars = alt.Chart().mark_errorbar(extent='ci').encode( - x='year:O', - y='yield:Q' -) +error_bars = alt.Chart().mark_errorbar(extent="ci").encode(x="year:O", y="yield:Q") -alt.layer(bars, error_bars, data=source).facet( - column='site:N' -) \ No newline at end of file +alt.layer(bars, error_bars, data=source).facet(column="site:N") diff --git a/vegafusion-python/tests/altair_mocks/bar/with_highlighted_bar/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_highlighted_bar/mock.py index a7a33e7b6..4a912c1dd 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_highlighted_bar/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_highlighted_bar/mock.py @@ -6,12 +6,12 @@ source = data.wheat() alt.Chart(source).mark_bar().encode( - x='year:O', + x="year:O", y="wheat:Q", # The highlight will be set on the result of a conditional statement color=alt.condition( alt.datum.year == 1810, # If the year is 1810 this test returns True, - alt.value('orange'), # which sets the bar orange. - alt.value('steelblue') # And if it's not true it sets the bar steelblue. - ) + alt.value("orange"), # which sets the bar orange. + alt.value("steelblue"), # And if it's not true it sets the bar steelblue. + ), ).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_labels/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_labels/mock.py index e7974eea0..f7dee7b36 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_labels/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_labels/mock.py @@ -6,17 +6,12 @@ source = data.wheat() -bars = alt.Chart(source).mark_bar().encode( - x='wheat:Q', - y="year:O" -) +bars = alt.Chart(source).mark_bar().encode(x="wheat:Q", y="year:O") text = bars.mark_text( - align='left', - baseline='middle', - dx=3 # Nudges text to right so it doesn't appear on top of the bar -).encode( - text='wheat:Q' -) + align="left", + baseline="middle", + dx=3, # Nudges text to right so it doesn't appear on top of the bar +).encode(text="wheat:Q") (bars + text).properties(height=500) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_line_at_mean/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_line_at_mean/mock.py index e42ec0838..742fc8998 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_line_at_mean/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_line_at_mean/mock.py @@ -5,13 +5,8 @@ source = data.wheat() -bar = alt.Chart(source).mark_bar().encode( - x='year:O', - y='wheat:Q' -) +bar = alt.Chart(source).mark_bar().encode(x="year:O", y="wheat:Q") -rule = alt.Chart(source).mark_rule(color='red').encode( - y='mean(wheat):Q' -) +rule = alt.Chart(source).mark_rule(color="red").encode(y="mean(wheat):Q") (bar + rule).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_line_on_dual_axis/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_line_on_dual_axis/mock.py index a8b324eb5..4563be535 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_line_on_dual_axis/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_line_on_dual_axis/mock.py @@ -5,12 +5,10 @@ source = data.wheat() -base = alt.Chart(source).encode(x='year:O') +base = alt.Chart(source).encode(x="year:O") -bar = base.mark_bar().encode(y='wheat:Q') +bar = base.mark_bar().encode(y="wheat:Q") -line = base.mark_line(color='red').encode( - y='wages:Q' -) +line = base.mark_line(color="red").encode(y="wages:Q") (bar + line).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_negative_values/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_negative_values/mock.py index b15c0ee7e..f91baa9be 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_negative_values/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_negative_values/mock.py @@ -11,6 +11,6 @@ color=alt.condition( alt.datum.nonfarm_change > 0, alt.value("steelblue"), # The positive color - alt.value("orange") # The negative color - ) -).properties(width=600) \ No newline at end of file + alt.value("orange"), # The negative color + ), +).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_rolling_mean/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_rolling_mean/mock.py index a7a10b903..257e3f245 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_rolling_mean/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_rolling_mean/mock.py @@ -5,19 +5,18 @@ source = data.wheat() -bar = alt.Chart(source).mark_bar().encode( - x='year:O', - y='wheat:Q' -) +bar = alt.Chart(source).mark_bar().encode(x="year:O", y="wheat:Q") -line = alt.Chart(source).mark_line(color='red').transform_window( - # The field to average - rolling_mean='mean(wheat)', - # The number of values before and after the current value to include. - frame=[-9, 0] -).encode( - x='year:O', - y='rolling_mean:Q' +line = ( + alt.Chart(source) + .mark_line(color="red") + .transform_window( + # The field to average + rolling_mean="mean(wheat)", + # The number of values before and after the current value to include. + frame=[-9, 0], + ) + .encode(x="year:O", y="rolling_mean:Q") ) (bar + line).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/bar/with_rounded_edges/mock.py b/vegafusion-python/tests/altair_mocks/bar/with_rounded_edges/mock.py index fb996d2ba..29f51ab93 100644 --- a/vegafusion-python/tests/altair_mocks/bar/with_rounded_edges/mock.py +++ b/vegafusion-python/tests/altair_mocks/bar/with_rounded_edges/mock.py @@ -5,11 +5,6 @@ source = data.seattle_weather() -alt.Chart(source).mark_bar( - cornerRadiusTopLeft=3, - cornerRadiusTopRight=3 -).encode( - x='month(date):O', - y='count():Q', - color='weather:N' +alt.Chart(source).mark_bar(cornerRadiusTopLeft=3, cornerRadiusTopRight=3).encode( + x="month(date):O", y="count():Q", color="weather:N" ) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/airports/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/airports/mock.py index d76fc5914..0024fe53a 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/airports/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/airports/mock.py @@ -4,25 +4,25 @@ from vega_datasets import data airports = data.airports() -states = alt.topo_feature(data.us_10m.url, feature='states') +states = alt.topo_feature(data.us_10m.url, feature="states") # US states background -background = alt.Chart(states).mark_geoshape( - fill='lightgray', - stroke='white' -).properties( - width=500, - height=300 -).project('albersUsa') +background = ( + alt.Chart(states) + .mark_geoshape(fill="lightgray", stroke="white") + .properties(width=500, height=300) + .project("albersUsa") +) # airport positions on background -points = alt.Chart(airports).mark_circle( - size=10, - color='steelblue' -).encode( - longitude='longitude:Q', - latitude='latitude:Q', - tooltip=['name', 'city', 'state'] +points = ( + alt.Chart(airports) + .mark_circle(size=10, color="steelblue") + .encode( + longitude="longitude:Q", + latitude="latitude:Q", + tooltip=["name", "city", "state"], + ) ) background + points diff --git a/vegafusion-python/tests/altair_mocks/casestudy/anscombe_plot/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/anscombe_plot/mock.py index b42bc861a..548e77e37 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/anscombe_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/anscombe_plot/mock.py @@ -6,9 +6,9 @@ source = data.anscombe() alt.Chart(source).mark_circle().encode( - alt.X('X', scale=alt.Scale(zero=False)), - alt.Y('Y', scale=alt.Scale(zero=False)), - alt.Facet('Series', columns=2), + alt.X("X", scale=alt.Scale(zero=False)), + alt.Y("Y", scale=alt.Scale(zero=False)), + alt.Facet("Series", columns=2), ).properties( width=180, height=180, diff --git a/vegafusion-python/tests/altair_mocks/casestudy/beckers_barley_trellis_plot/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/beckers_barley_trellis_plot/mock.py index 3f55f49f1..1d9603e64 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/beckers_barley_trellis_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/beckers_barley_trellis_plot/mock.py @@ -7,23 +7,18 @@ alt.Chart(source, title="The Morris Mistake").mark_point().encode( alt.X( - 'yield:Q', + "yield:Q", title="Barley Yield (bushels/acre)", scale=alt.Scale(zero=False), - axis=alt.Axis(grid=False) + axis=alt.Axis(grid=False), ), - alt.Y( - 'variety:N', - title="", - sort='-x', - axis=alt.Axis(grid=True) - ), - color=alt.Color('year:N', legend=alt.Legend(title="Year")), + alt.Y("variety:N", title="", sort="-x", axis=alt.Axis(grid=True)), + color=alt.Color("year:N", legend=alt.Legend(title="Year")), facet=alt.Facet( - 'site:N', + "site:N", columns=2, title="", - sort=alt.EncodingSortField(field='yield', op='sum', order='descending') + sort=alt.EncodingSortField(field="yield", op="sum", order="descending"), ), ).properties( width=200, diff --git a/vegafusion-python/tests/altair_mocks/casestudy/co2_concentration/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/co2_concentration/mock.py index e301b852a..22f07198e 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/co2_concentration/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/co2_concentration/mock.py @@ -5,55 +5,37 @@ source = data.co2_concentration.url -base = alt.Chart( - source, - title="Carbon Dioxide in the Atmosphere" -).transform_calculate( - year="year(datum.Date)" -).transform_calculate( - decade="floor(datum.year / 10)" -).transform_calculate( - scaled_date="(datum.year % 10) + (month(datum.Date)/12)" -).transform_window( - first_date='first_value(scaled_date)', - last_date='last_value(scaled_date)', - sort=[{"field": "scaled_date", "order": "ascending"}], - groupby=['decade'], - frame=[None, None] -).transform_calculate( - end="datum.first_date === datum.scaled_date ? 'first' : datum.last_date === datum.scaled_date ? 'last' : null" -).encode( - x=alt.X( - "scaled_date:Q", - axis=alt.Axis(title="Year into Decade", tickCount=11) - ), - y=alt.Y( - "CO2:Q", - title="CO2 concentration in ppm", - scale=alt.Scale(zero=False) +base = ( + alt.Chart(source, title="Carbon Dioxide in the Atmosphere") + .transform_calculate(year="year(datum.Date)") + .transform_calculate(decade="floor(datum.year / 10)") + .transform_calculate(scaled_date="(datum.year % 10) + (month(datum.Date)/12)") + .transform_window( + first_date="first_value(scaled_date)", + last_date="last_value(scaled_date)", + sort=[{"field": "scaled_date", "order": "ascending"}], + groupby=["decade"], + frame=[None, None], + ) + .transform_calculate( + end="datum.first_date === datum.scaled_date ? 'first' : datum.last_date === datum.scaled_date ? 'last' : null" + ) + .encode( + x=alt.X("scaled_date:Q", axis=alt.Axis(title="Year into Decade", tickCount=11)), + y=alt.Y("CO2:Q", title="CO2 concentration in ppm", scale=alt.Scale(zero=False)), ) ) line = base.mark_line().encode( - color=alt.Color( - "decade:O", - scale=alt.Scale(scheme="magma"), - legend=None - ) + color=alt.Color("decade:O", scale=alt.Scale(scheme="magma"), legend=None) ) text = base.encode(text="year:N") -start_year = text.transform_filter( - alt.datum.end == 'first' -).mark_text(baseline="top") +start_year = text.transform_filter(alt.datum.end == "first").mark_text(baseline="top") -end_year = text.transform_filter( - alt.datum.end == 'last' -).mark_text(baseline="bottom") +end_year = text.transform_filter(alt.datum.end == "last").mark_text(baseline="bottom") -(line + start_year + end_year).configure_text( - align="left", - dx=1, - dy=3 -).properties(width=600, height=375) +(line + start_year + end_year).configure_text(align="left", dx=1, dy=3).properties( + width=600, height=375 +) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/falkensee/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/falkensee/mock.py index 1dc85856e..3f25bcfc4 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/falkensee/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/falkensee/mock.py @@ -41,39 +41,28 @@ {"year": "2011", "population": 40465}, {"year": "2012", "population": 40905}, {"year": "2013", "population": 41258}, - {"year": "2014", "population": 41777} + {"year": "2014", "population": 41777}, ] -source2 = [{ - "start": "1933", - "end": "1945", - "event": "Nazi Rule" -}, - { - "start": "1948", - "end": "1989", - "event": "GDR (East Germany)" - }] +source2 = [ + {"start": "1933", "end": "1945", "event": "Nazi Rule"}, + {"start": "1948", "end": "1989", "event": "GDR (East Germany)"}, +] source = pd.DataFrame(source) source2 = pd.DataFrame(source2) -line = alt.Chart(source).mark_line(color='#333').encode( - alt.X('year:T', axis=alt.Axis(format='%Y')), - y='population' -).properties( - width=500, - height=300 +line = ( + alt.Chart(source) + .mark_line(color="#333") + .encode(alt.X("year:T", axis=alt.Axis(format="%Y")), y="population") + .properties(width=500, height=300) ) -point = line.mark_point(color='#333') +point = line.mark_point(color="#333") -rect = alt.Chart(source2).mark_rect().encode( - x='start:T', - x2='end:T', - color='event:N' -) +rect = alt.Chart(source2).mark_rect().encode(x="start:T", x2="end:T", color="event:N") rect + line + point diff --git a/vegafusion-python/tests/altair_mocks/casestudy/gapminder_bubble_plot/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/gapminder_bubble_plot/mock.py index 9c91b7b0f..4a9052ec0 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/gapminder_bubble_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/gapminder_bubble_plot/mock.py @@ -1,12 +1,11 @@ # https://altair-viz.github.io/gallery/gapminder_bubble_plot.html import altair as alt -from vega_datasets import data source = "https://raw.githubusercontent.com/vega/vega-datasets/main/data/gapminder-health-income.csv" alt.Chart(source).mark_circle().encode( - alt.X('income:Q', scale=alt.Scale(type='log')), - alt.Y('health:Q', scale=alt.Scale(zero=False)), - size='population:Q' + alt.X("income:Q", scale=alt.Scale(type="log")), + alt.Y("health:Q", scale=alt.Scale(zero=False)), + size="population:Q", ) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/iowa_electricity/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/iowa_electricity/mock.py index dbe12f57a..d8137dbc9 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/iowa_electricity/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/iowa_electricity/mock.py @@ -6,10 +6,7 @@ source = data.iowa_electricity() alt.Chart(source, title="Iowa's renewable energy boom").mark_area().encode( - x=alt.X( - "year:T", - title="Year" - ), + x=alt.X("year:T", title="Year"), y=alt.Y( "net_generation:Q", stack="normalize", @@ -19,5 +16,5 @@ color=alt.Color( "source:N", legend=alt.Legend(title="Electricity source"), - ) -) \ No newline at end of file + ), +) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/isotype/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/isotype/mock.py index a15a7f9da..592d9db4c 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/isotype/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/isotype/mock.py @@ -3,70 +3,71 @@ import altair as alt import pandas as pd -source = pd.DataFrame([ - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'pigs'}, - {'country': 'Great Britain', 'animal': 'pigs'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'} -]) +source = pd.DataFrame( + [ + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "pigs"}, + {"country": "Great Britain", "animal": "pigs"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + ] +) -domains = ['person', 'cattle', 'pigs', 'sheep'] +domains = ["person", "cattle", "pigs", "sheep"] shape_scale = alt.Scale( domain=domains, range=[ - 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z', - 'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z', - 'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z', - 'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z' - ] + "M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z", + "M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z", + "M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z", + "M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z", + ], ) color_scale = alt.Scale( domain=domains, - range=['rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)'] + range=["rgb(162,160,152)", "rgb(194,81,64)", "rgb(93,93,93)", "rgb(91,131,149)"], ) alt.Chart(source).mark_point(filled=True, opacity=1, size=100).encode( - alt.X('x:O', axis=None), - alt.Y('animal:O', axis=None), - alt.Row('country:N', header=alt.Header(title='')), - alt.Shape('animal:N', legend=None, scale=shape_scale), - alt.Color('animal:N', legend=None, scale=color_scale), -).transform_window( - x='rank()', - groupby=['country', 'animal'] -).properties(width=550, height=140) \ No newline at end of file + alt.X("x:O", axis=None), + alt.Y("animal:O", axis=None), + alt.Row("country:N", header=alt.Header(title="")), + alt.Shape("animal:N", legend=None, scale=shape_scale), + alt.Color("animal:N", legend=None, scale=color_scale), +).transform_window(x="rank()", groupby=["country", "animal"]).properties( + width=550, height=140 +) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/isotype_emoji/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/isotype_emoji/mock.py index 927885702..04890cf6c 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/isotype_emoji/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/isotype_emoji/mock.py @@ -3,55 +3,56 @@ import altair as alt import pandas as pd -source = pd.DataFrame([ - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'cattle'}, - {'country': 'Great Britain', 'animal': 'pigs'}, - {'country': 'Great Britain', 'animal': 'pigs'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'Great Britain', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'cattle'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'pigs'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'}, - {'country': 'United States', 'animal': 'sheep'} -]) +source = pd.DataFrame( + [ + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "cattle"}, + {"country": "Great Britain", "animal": "pigs"}, + {"country": "Great Britain", "animal": "pigs"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "Great Britain", "animal": "sheep"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "cattle"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "pigs"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + {"country": "United States", "animal": "sheep"}, + ] +) -alt.Chart(source).mark_text(size=45, baseline='middle').encode( - alt.X('x:O', axis=None), - alt.Y('animal:O', axis=None), - alt.Row('country:N', header=alt.Header(title='')), - alt.Text('emoji:N') +alt.Chart(source).mark_text(size=45, baseline="middle").encode( + alt.X("x:O", axis=None), + alt.Y("animal:O", axis=None), + alt.Row("country:N", header=alt.Header(title="")), + alt.Text("emoji:N"), ).transform_calculate( emoji="{'cattle': '🐄', 'pigs': '🐖', 'sheep': '🐏'}[datum.animal]" -).transform_window( - x='rank()', - groupby=['country', 'animal'] -).properties(width=550, height=140) +).transform_window(x="rank()", groupby=["country", "animal"]).properties( + width=550, height=140 +) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/london_tube/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/london_tube/mock.py index a98441b20..2603b6e56 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/london_tube/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/london_tube/mock.py @@ -3,50 +3,75 @@ import altair as alt from vega_datasets import data -boroughs = alt.topo_feature(data.londonBoroughs.url, 'boroughs') -tubelines = alt.topo_feature(data.londonTubeLines.url, 'line') +boroughs = alt.topo_feature(data.londonBoroughs.url, "boroughs") +tubelines = alt.topo_feature(data.londonTubeLines.url, "line") centroids = data.londonCentroids.url -background = alt.Chart(boroughs).mark_geoshape( - stroke='white', - strokeWidth=2 -).encode( - color=alt.value('#eee'), -).properties( - width=600, - height=400 +background = ( + alt.Chart(boroughs) + .mark_geoshape(stroke="white", strokeWidth=2) + .encode( + color=alt.value("#eee"), + ) + .properties(width=600, height=400) ) -labels = alt.Chart(centroids).mark_text().encode( - longitude='cx:Q', - latitude='cy:Q', - text='bLabel:N', - size=alt.value(8), - opacity=alt.value(0.6) -).transform_calculate( - "bLabel", "indexof (datum.name,' ') > 0 ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name" +labels = ( + alt.Chart(centroids) + .mark_text() + .encode( + longitude="cx:Q", + latitude="cy:Q", + text="bLabel:N", + size=alt.value(8), + opacity=alt.value(0.6), + ) + .transform_calculate( + "bLabel", + "indexof (datum.name,' ') > 0 ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name", + ) ) -line_scale = alt.Scale(domain=["Bakerloo", "Central", "Circle", "District", "DLR", - "Hammersmith & City", "Jubilee", "Metropolitan", "Northern", - "Piccadilly", "Victoria", "Waterloo & City"], - range=["rgb(137,78,36)", "rgb(220,36,30)", "rgb(255,206,0)", - "rgb(1,114,41)", "rgb(0,175,173)", "rgb(215,153,175)", - "rgb(106,114,120)", "rgb(114,17,84)", "rgb(0,0,0)", - "rgb(0,24,168)", "rgb(0,160,226)", "rgb(106,187,170)"]) +line_scale = alt.Scale( + domain=[ + "Bakerloo", + "Central", + "Circle", + "District", + "DLR", + "Hammersmith & City", + "Jubilee", + "Metropolitan", + "Northern", + "Piccadilly", + "Victoria", + "Waterloo & City", + ], + range=[ + "rgb(137,78,36)", + "rgb(220,36,30)", + "rgb(255,206,0)", + "rgb(1,114,41)", + "rgb(0,175,173)", + "rgb(215,153,175)", + "rgb(106,114,120)", + "rgb(114,17,84)", + "rgb(0,0,0)", + "rgb(0,24,168)", + "rgb(0,160,226)", + "rgb(106,187,170)", + ], +) -lines = alt.Chart(tubelines).mark_geoshape( - filled=False, - strokeWidth=2 -).encode( - alt.Color( - 'id:N', - legend=alt.Legend( - title=None, - orient='bottom-right', - offset=0 - ), - scale=line_scale +lines = ( + alt.Chart(tubelines) + .mark_geoshape(filled=False, strokeWidth=2) + .encode( + alt.Color( + "id:N", + legend=alt.Legend(title=None, orient="bottom-right", offset=0), + scale=line_scale, + ) ) ) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/natural_disasters/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/natural_disasters/mock.py index 4afaea5fc..e8bb8bc19 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/natural_disasters/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/natural_disasters/mock.py @@ -1,26 +1,18 @@ # https://altair-viz.github.io/gallery/natural_disasters.html import altair as alt -from vega_datasets import data source = "https://raw.githubusercontent.com/vega/vega-datasets/main/data/disasters.csv" -alt.Chart(source).mark_circle( - opacity=0.8, - stroke='black', - strokeWidth=1 -).encode( - alt.X('Year:O', axis=alt.Axis(labelAngle=0)), - alt.Y('Entity:N'), - alt.Size('Deaths:Q', - scale=alt.Scale(range=[0, 4000]), - legend=alt.Legend(title='Annual Global Deaths') - ), - alt.Color('Entity:N', legend=None) -).properties( - width=300, - height=300 -).transform_filter( - alt.datum.Entity != 'All natural disasters' +alt.Chart(source).mark_circle(opacity=0.8, stroke="black", strokeWidth=1).encode( + alt.X("Year:O", axis=alt.Axis(labelAngle=0)), + alt.Y("Entity:N"), + alt.Size( + "Deaths:Q", + scale=alt.Scale(range=[0, 4000]), + legend=alt.Legend(title="Annual Global Deaths"), + ), + alt.Color("Entity:N", legend=None), +).properties(width=300, height=300).transform_filter( + alt.datum.Entity != "All natural disasters" ) - diff --git a/vegafusion-python/tests/altair_mocks/casestudy/one_dot_per_zipcode/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/one_dot_per_zipcode/mock.py index 75d1a70a2..1b7991d62 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/one_dot_per_zipcode/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/one_dot_per_zipcode/mock.py @@ -9,13 +9,8 @@ alt.Chart(source).transform_calculate( "leading digit", alt.expr.substring(alt.datum.zip_code, 0, 1) ).mark_circle(size=3).encode( - longitude='longitude:Q', - latitude='latitude:Q', - color='leading digit:N', - tooltip='zip_code:N' -).project( - type='albersUsa' -).properties( - width=600, - height=400 -) + longitude="longitude:Q", + latitude="latitude:Q", + color="leading digit:N", + tooltip="zip_code:N", +).project(type="albersUsa").properties(width=600, height=400) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/top_k_items/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/top_k_items/mock.py index 54c877a9b..1048e17db 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/top_k_items/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/top_k_items/mock.py @@ -9,16 +9,13 @@ alt.Chart( source, ).mark_bar().encode( - x=alt.X('Title:N', sort='-y'), - y=alt.Y('IMDB_Rating:Q'), - color=alt.Color('IMDB_Rating:Q') - + x=alt.X("Title:N", sort="-y"), + y=alt.Y("IMDB_Rating:Q"), + color=alt.Color("IMDB_Rating:Q"), ).transform_window( - rank='rank(IMDB_Rating)', + rank="rank(IMDB_Rating)", sort=[ - alt.SortField('IMDB_Rating', order='descending'), - alt.SortField('Title', order='ascending'), - ] -).transform_filter( - (alt.datum.rank < 10) -) + alt.SortField("IMDB_Rating", order="descending"), + alt.SortField("Title", order="ascending"), + ], +).transform_filter(alt.datum.rank < 10) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/top_k_letters/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/top_k_letters/mock.py index d734ca3cd..490cdc3d5 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/top_k_letters/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/top_k_letters/mock.py @@ -2,8 +2,8 @@ # Added sort of initial data frame for consistent order of tie breakers import altair as alt -import pandas as pd import numpy as np +import pandas as pd # Excerpt from A Tale of Two Cities; public domain text text = """ @@ -17,21 +17,16 @@ or for evil, in the superlative degree of comparison only. """ -source = pd.DataFrame( - {'letters': np.array([c for c in text if c.isalpha()])} -) +source = pd.DataFrame({"letters": np.array([c for c in text if c.isalpha()])}) alt.Chart(source).transform_aggregate( - count='count()', - groupby=['letters'] + count="count()", groupby=["letters"] ).transform_window( - rank='rank(count)', + rank="rank(count)", sort=[ - alt.SortField('count', order='descending'), - ] -).transform_filter( - alt.datum.rank < 10 -).mark_bar().encode( - y=alt.Y('letters:N', sort='-x'), - x='count:Q', + alt.SortField("count", order="descending"), + ], +).transform_filter(alt.datum.rank < 10).mark_bar().encode( + y=alt.Y("letters:N", sort="-x"), + x="count:Q", ) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/top_k_with_others/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/top_k_with_others/mock.py index 6d5607e8f..179e3e33e 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/top_k_with_others/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/top_k_with_others/mock.py @@ -13,10 +13,10 @@ title=None, ), ).transform_aggregate( - aggregate_gross='mean(Worldwide_Gross)', + aggregate_gross="mean(Worldwide_Gross)", groupby=["Director"], ).transform_window( - rank='row_number()', + rank="row_number()", sort=[alt.SortField("aggregate_gross", order="descending")], ).transform_calculate( ranked_director="datum.rank < 10 ? datum.Director : 'All Others'" diff --git a/vegafusion-python/tests/altair_mocks/casestudy/us_employment/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/us_employment/mock.py index 69b6ab454..2c0e06402 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/us_employment/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/us_employment/mock.py @@ -5,50 +5,38 @@ from vega_datasets import data source = data.us_employment() -presidents = pd.DataFrame([ - { - "start": "2006-01-01", - "end": "2009-01-19", - "president": "Bush" - }, - { - "start": "2009-01-20", - "end": "2015-12-31", - "president": "Obama" - } -]) +presidents = pd.DataFrame( + [ + {"start": "2006-01-01", "end": "2009-01-19", "president": "Bush"}, + {"start": "2009-01-20", "end": "2015-12-31", "president": "Obama"}, + ] +) -bars = alt.Chart( - source, - title="The U.S. employment crash during the Great Recession" -).mark_bar().encode( - x=alt.X("month:T", title=""), - y=alt.Y("nonfarm_change:Q", title="Change in non-farm employment (in thousands)"), - color=alt.condition( - alt.datum.nonfarm_change > 0, - alt.value("steelblue"), - alt.value("orange") +bars = ( + alt.Chart(source, title="The U.S. employment crash during the Great Recession") + .mark_bar() + .encode( + x=alt.X("month:T", title=""), + y=alt.Y( + "nonfarm_change:Q", title="Change in non-farm employment (in thousands)" + ), + color=alt.condition( + alt.datum.nonfarm_change > 0, alt.value("steelblue"), alt.value("orange") + ), ) ) -rule = alt.Chart(presidents).mark_rule( - color="black", - strokeWidth=2 -).encode( - x='end:T' -).transform_filter(alt.datum.president == "Bush") +rule = ( + alt.Chart(presidents) + .mark_rule(color="black", strokeWidth=2) + .encode(x="end:T") + .transform_filter(alt.datum.president == "Bush") +) -text = alt.Chart(presidents).mark_text( - align='left', - baseline='middle', - dx=7, - dy=-135, - size=11 -).encode( - x='start:T', - x2='end:T', - text='president', - color=alt.value('#000000') +text = ( + alt.Chart(presidents) + .mark_text(align="left", baseline="middle", dx=7, dy=-135, size=11) + .encode(x="start:T", x2="end:T", text="president", color=alt.value("#000000")) ) (bars + rule + text).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/us_population_over_time_facet/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/us_population_over_time_facet/mock.py index 79c4fafe4..f9c433ace 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/us_population_over_time_facet/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/us_population_over_time_facet/mock.py @@ -6,15 +6,7 @@ source = data.population.url alt.Chart(source).mark_area().encode( - x='age:O', - y=alt.Y( - 'sum(people):Q', - title='Population', - axis=alt.Axis(format='~s') - ), - facet=alt.Facet('year:O', columns=5), -).properties( - title='US Age Distribution By Year', - width=80, - height=80 -) + x="age:O", + y=alt.Y("sum(people):Q", title="Population", axis=alt.Axis(format="~s")), + facet=alt.Facet("year:O", columns=5), +).properties(title="US Age Distribution By Year", width=80, height=80) diff --git a/vegafusion-python/tests/altair_mocks/casestudy/us_state_capitals/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/us_state_capitals/mock.py index 6090d3f44..da6e81f44 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/us_state_capitals/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/us_state_capitals/mock.py @@ -3,36 +3,39 @@ import altair as alt from vega_datasets import data -states = alt.topo_feature(data.us_10m.url, 'states') +states = alt.topo_feature(data.us_10m.url, "states") capitals = data.us_state_capitals.url # US states background -background = alt.Chart(states).mark_geoshape( - fill='lightgray', - stroke='white' -).properties( - title='US State Capitols', - width=650, - height=400 -).project('albersUsa') +background = ( + alt.Chart(states) + .mark_geoshape(fill="lightgray", stroke="white") + .properties(title="US State Capitols", width=650, height=400) + .project("albersUsa") +) # Points and text -hover = alt.selection(type='single', on='mouseover', nearest=True, - fields=['lat', 'lon']) +hover = alt.selection( + type="single", on="mouseover", nearest=True, fields=["lat", "lon"] +) base = alt.Chart(capitals).encode( - longitude='lon:Q', - latitude='lat:Q', + longitude="lon:Q", + latitude="lat:Q", ) -text = base.mark_text(dy=-5, align='right').encode( - alt.Text('city', type='nominal'), - opacity=alt.condition(~hover, alt.value(0), alt.value(1)) +text = base.mark_text(dy=-5, align="right").encode( + alt.Text("city", type="nominal"), + opacity=alt.condition(~hover, alt.value(0), alt.value(1)), ) -points = base.mark_point().encode( - color=alt.value('black'), - size=alt.condition(~hover, alt.value(30), alt.value(100)) -).add_selection(hover) +points = ( + base.mark_point() + .encode( + color=alt.value("black"), + size=alt.condition(~hover, alt.value(30), alt.value(100)), + ) + .add_selection(hover) +) background + points + text diff --git a/vegafusion-python/tests/altair_mocks/casestudy/wheat_wages/mock.py b/vegafusion-python/tests/altair_mocks/casestudy/wheat_wages/mock.py index 27a6d91e2..62e22ef25 100644 --- a/vegafusion-python/tests/altair_mocks/casestudy/wheat_wages/mock.py +++ b/vegafusion-python/tests/altair_mocks/casestudy/wheat_wages/mock.py @@ -2,25 +2,23 @@ import altair as alt from vega_datasets import data -base_wheat = alt.Chart(data.wheat.url).transform_calculate( - year_end="+datum.year + 5") +base_wheat = alt.Chart(data.wheat.url).transform_calculate(year_end="+datum.year + 5") base_monarchs = alt.Chart(data.monarchs.url).transform_calculate( offset="((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95", off2="((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95", y="95", - x="+datum.start + (+datum.end - +datum.start)/2" + x="+datum.start + (+datum.end - +datum.start)/2", ) bars = base_wheat.mark_bar(**{"fill": "#aaa", "stroke": "#999"}).encode( - x=alt.X("year:Q", axis=alt.Axis(format='d', tickCount=5)), + x=alt.X("year:Q", axis=alt.Axis(format="d", tickCount=5)), y=alt.Y("wheat:Q", axis=alt.Axis(zindex=1)), - x2=alt.X2("year_end") + x2=alt.X2("year_end"), ) area = base_wheat.mark_area(**{"color": "#a4cedb", "opacity": 0.7}).encode( - x=alt.X("year:Q"), - y=alt.Y("wages:Q") + x=alt.X("year:Q"), y=alt.Y("wages:Q") ) area_line_1 = area.mark_line(**{"color": "#000", "opacity": 0.7}) @@ -31,19 +29,17 @@ x2=alt.X2("end"), y=alt.Y("y:Q"), y2=alt.Y2("offset"), - fill=alt.Fill("commonwealth:N", legend=None, scale=alt.Scale(range=["black", "white"])) + fill=alt.Fill( + "commonwealth:N", legend=None, scale=alt.Scale(range=["black", "white"]) + ), ) -top_text = base_monarchs.mark_text(**{"yOffset": 14, "fontSize": 9, "fontStyle": "italic"}).encode( - x=alt.X("x:Q"), - y=alt.Y("off2:Q"), - text=alt.Text("name:N") -) +top_text = base_monarchs.mark_text( + **{"yOffset": 14, "fontSize": 9, "fontStyle": "italic"} +).encode(x=alt.X("x:Q"), y=alt.Y("off2:Q"), text=alt.Text("name:N")) (bars + area + area_line_1 + area_line_2 + top_bars + top_text).properties( width=600, height=400 ).configure_axis( title=None, gridColor="white", gridOpacity=0.25, domain=False -).configure_view( - stroke="transparent" -) +).configure_view(stroke="transparent") diff --git a/vegafusion-python/tests/altair_mocks/circular/donut/mock.py b/vegafusion-python/tests/altair_mocks/circular/donut/mock.py index 10f8cf44e..cf905aecd 100644 --- a/vegafusion-python/tests/altair_mocks/circular/donut/mock.py +++ b/vegafusion-python/tests/altair_mocks/circular/donut/mock.py @@ -1,7 +1,7 @@ # https://altair-viz.github.io/gallery/donut_chart.html -import pandas as pd import altair as alt +import pandas as pd source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) diff --git a/vegafusion-python/tests/altair_mocks/circular/pacman/mock.py b/vegafusion-python/tests/altair_mocks/circular/pacman/mock.py index 649dc6492..a3d5ab041 100644 --- a/vegafusion-python/tests/altair_mocks/circular/pacman/mock.py +++ b/vegafusion-python/tests/altair_mocks/circular/pacman/mock.py @@ -1,7 +1,7 @@ # https://altair-viz.github.io/gallery/pacman_chart.html -import numpy as np import altair as alt +import numpy as np alt.Chart().mark_arc(color="gold").encode( theta=alt.datum((5 / 8) * np.pi, scale=None), diff --git a/vegafusion-python/tests/altair_mocks/circular/pie/mock.py b/vegafusion-python/tests/altair_mocks/circular/pie/mock.py index d7d268326..f82a35905 100644 --- a/vegafusion-python/tests/altair_mocks/circular/pie/mock.py +++ b/vegafusion-python/tests/altair_mocks/circular/pie/mock.py @@ -1,7 +1,7 @@ # https://altair-viz.github.io/gallery/pie_chart.html -import pandas as pd import altair as alt +import pandas as pd source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) diff --git a/vegafusion-python/tests/altair_mocks/circular/pie_with_labels/mock.py b/vegafusion-python/tests/altair_mocks/circular/pie_with_labels/mock.py index 39a3b5d42..02d460bd8 100644 --- a/vegafusion-python/tests/altair_mocks/circular/pie_with_labels/mock.py +++ b/vegafusion-python/tests/altair_mocks/circular/pie_with_labels/mock.py @@ -1,7 +1,7 @@ # https://altair-viz.github.io/gallery/pie_chart.html -import pandas as pd import altair as alt +import pandas as pd source = pd.DataFrame( {"category": ["a", "b", "c", "d", "e", "f"], "value": [4, 6, 10, 3, 7, 8]} diff --git a/vegafusion-python/tests/altair_mocks/circular/radial/mock.py b/vegafusion-python/tests/altair_mocks/circular/radial/mock.py index 7a72dd3e3..ec0a31796 100644 --- a/vegafusion-python/tests/altair_mocks/circular/radial/mock.py +++ b/vegafusion-python/tests/altair_mocks/circular/radial/mock.py @@ -1,7 +1,7 @@ # https://altair-viz.github.io/gallery/radial_chart.html -import pandas as pd import altair as alt +import pandas as pd source = pd.DataFrame({"values": [12, 23, 47, 6, 52, 19]}) diff --git a/vegafusion-python/tests/altair_mocks/histogram/layered/mock.py b/vegafusion-python/tests/altair_mocks/histogram/layered/mock.py index b31d58964..a738cc67c 100644 --- a/vegafusion-python/tests/altair_mocks/histogram/layered/mock.py +++ b/vegafusion-python/tests/altair_mocks/histogram/layered/mock.py @@ -1,25 +1,24 @@ # https://altair-viz.github.io/gallery/layered_histogram.html -import pandas as pd import altair as alt import numpy as np +import pandas as pd + np.random.seed(42) # Generating Data -source = pd.DataFrame({ - 'Trial A': np.random.normal(0, 0.8, 1000), - 'Trial B': np.random.normal(-2, 1, 1000), - 'Trial C': np.random.normal(3, 2, 1000) -}) +source = pd.DataFrame( + { + "Trial A": np.random.normal(0, 0.8, 1000), + "Trial B": np.random.normal(-2, 1, 1000), + "Trial C": np.random.normal(3, 2, 1000), + } +) alt.Chart(source).transform_fold( - ['Trial A', 'Trial B', 'Trial C'], - as_=['Experiment', 'Measurement'] -).mark_bar( - opacity=0.3, - binSpacing=0 -).encode( - alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)), - alt.Y('count()', stack=None), - alt.Color('Experiment:N') + ["Trial A", "Trial B", "Trial C"], as_=["Experiment", "Measurement"] +).mark_bar(opacity=0.3, binSpacing=0).encode( + alt.X("Measurement:Q", bin=alt.Bin(maxbins=100)), + alt.Y("count()", stack=None), + alt.Color("Experiment:N"), ) diff --git a/vegafusion-python/tests/altair_mocks/histogram/trellis/mock.py b/vegafusion-python/tests/altair_mocks/histogram/trellis/mock.py index 1ba9be665..d07f1c0a7 100644 --- a/vegafusion-python/tests/altair_mocks/histogram/trellis/mock.py +++ b/vegafusion-python/tests/altair_mocks/histogram/trellis/mock.py @@ -7,7 +7,5 @@ source = data.cars() alt.Chart(source).mark_bar().encode( - alt.X("Horsepower:Q", bin=True), - y='count()', - row='Origin' + alt.X("Horsepower:Q", bin=True), y="count()", row="Origin" ).properties(height=100) diff --git a/vegafusion-python/tests/altair_mocks/histogram/with_a_global_mean_overlay/mock.py b/vegafusion-python/tests/altair_mocks/histogram/with_a_global_mean_overlay/mock.py index 2f0dfecd0..76bb0c199 100644 --- a/vegafusion-python/tests/altair_mocks/histogram/with_a_global_mean_overlay/mock.py +++ b/vegafusion-python/tests/altair_mocks/histogram/with_a_global_mean_overlay/mock.py @@ -7,14 +7,8 @@ base = alt.Chart(source) -bar = base.mark_bar().encode( - x=alt.X('IMDB_Rating:Q', bin=True, axis=None), - y='count()' -) +bar = base.mark_bar().encode(x=alt.X("IMDB_Rating:Q", bin=True, axis=None), y="count()") -rule = base.mark_rule(color='red').encode( - x='mean(IMDB_Rating):Q', - size=alt.value(5) -) +rule = base.mark_rule(color="red").encode(x="mean(IMDB_Rating):Q", size=alt.value(5)) -bar + rule \ No newline at end of file +bar + rule diff --git a/vegafusion-python/tests/altair_mocks/interactive/area-interval_selection/mock.py b/vegafusion-python/tests/altair_mocks/interactive/area-interval_selection/mock.py index 52e2160a5..48bd697ac 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/area-interval_selection/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/area-interval_selection/mock.py @@ -5,22 +5,17 @@ source = data.sp500.url -brush = alt.selection(type='interval', encodings=['x']) +brush = alt.selection(type="interval", encodings=["x"]) -base = alt.Chart(source).mark_area().encode( - x = 'date:T', - y = 'price:Q' -).properties( - width=600, - height=200 +base = ( + alt.Chart(source) + .mark_area() + .encode(x="date:T", y="price:Q") + .properties(width=600, height=200) ) -upper = base.encode( - alt.X('date:T', scale=alt.Scale(domain=brush)) -) +upper = base.encode(alt.X("date:T", scale=alt.Scale(domain=brush))) -lower = base.properties( - height=60 -).add_selection(brush) +lower = base.properties(height=60).add_selection(brush) upper & lower diff --git a/vegafusion-python/tests/altair_mocks/interactive/brush/mock.py b/vegafusion-python/tests/altair_mocks/interactive/brush/mock.py index dacbde49c..3f0e52de6 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/brush/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/brush/mock.py @@ -4,10 +4,10 @@ from vega_datasets import data source = data.cars() -brush = alt.selection(type='interval') +brush = alt.selection(type="interval") alt.Chart(source).mark_point().encode( - x='Horsepower:Q', - y='Miles_per_Gallon:Q', - color=alt.condition(brush, 'Cylinders:O', alt.value('grey')), + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + color=alt.condition(brush, "Cylinders:O", alt.value("grey")), ).add_selection(brush) diff --git a/vegafusion-python/tests/altair_mocks/interactive/casestudy-airport_connections/mock.py b/vegafusion-python/tests/altair_mocks/interactive/casestudy-airport_connections/mock.py index 66782f786..61d3540d8 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/casestudy-airport_connections/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/casestudy-airport_connections/mock.py @@ -19,46 +19,43 @@ airports, key="iata", fields=["state", "latitude", "longitude"] ) -background = alt.Chart(states).mark_geoshape( - fill="lightgray", - stroke="white" -).properties( - width=600, - height=400 -).project("albersUsa") +background = ( + alt.Chart(states) + .mark_geoshape(fill="lightgray", stroke="white") + .properties(width=600, height=400) + .project("albersUsa") +) -connections = alt.Chart(flights_airport).mark_rule(opacity=0.35).encode( - latitude="latitude:Q", - longitude="longitude:Q", - latitude2="lat2:Q", - longitude2="lon2:Q" -).transform_lookup( - lookup="origin", - from_=lookup_data -).transform_lookup( - lookup="destination", - from_=lookup_data, - as_=["state", "lat2", "lon2"] -).transform_filter( - select_city +connections = ( + alt.Chart(flights_airport) + .mark_rule(opacity=0.35) + .encode( + latitude="latitude:Q", + longitude="longitude:Q", + latitude2="lat2:Q", + longitude2="lon2:Q", + ) + .transform_lookup(lookup="origin", from_=lookup_data) + .transform_lookup( + lookup="destination", from_=lookup_data, as_=["state", "lat2", "lon2"] + ) + .transform_filter(select_city) ) -points = alt.Chart(flights_airport).mark_circle().encode( - latitude="latitude:Q", - longitude="longitude:Q", - size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 1000]), legend=None), - order=alt.Order("routes:Q", sort="descending"), - tooltip=["origin:N", "routes:Q"] -).transform_aggregate( - routes="count()", - groupby=["origin"] -).transform_lookup( - lookup="origin", - from_=lookup_data -).transform_filter( - (alt.datum.state != "PR") & (alt.datum.state != "VI") -).add_selection( - select_city +points = ( + alt.Chart(flights_airport) + .mark_circle() + .encode( + latitude="latitude:Q", + longitude="longitude:Q", + size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 1000]), legend=None), + order=alt.Order("routes:Q", sort="descending"), + tooltip=["origin:N", "routes:Q"], + ) + .transform_aggregate(routes="count()", groupby=["origin"]) + .transform_lookup(lookup="origin", from_=lookup_data) + .transform_filter((alt.datum.state != "PR") & (alt.datum.state != "VI")) + .add_selection(select_city) ) (background + connections + points).configure_view(stroke=None) diff --git a/vegafusion-python/tests/altair_mocks/interactive/casestudy-seattle_weather_interactive/mock.py b/vegafusion-python/tests/altair_mocks/interactive/casestudy-seattle_weather_interactive/mock.py index 7d5573db5..690a22e47 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/casestudy-seattle_weather_interactive/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/casestudy-seattle_weather_interactive/mock.py @@ -3,50 +3,51 @@ source = data.seattle_weather() -scale = alt.Scale(domain=['sun', 'fog', 'drizzle', 'rain', 'snow'], - range=['#e7ba52', '#a7a7a7', '#aec7e8', '#1f77b4', '#9467bd']) -color = alt.Color('weather:N', scale=scale) +scale = alt.Scale( + domain=["sun", "fog", "drizzle", "rain", "snow"], + range=["#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd"], +) +color = alt.Color("weather:N", scale=scale) # We create two selections: # - a brush that is active on the top panel # - a multi-click that is active on the bottom panel -brush = alt.selection_interval(encodings=['x']) -click = alt.selection_point(encodings=['color']) +brush = alt.selection_interval(encodings=["x"]) +click = alt.selection_point(encodings=["color"]) # Top panel is scatter plot of temperature vs time -points = alt.Chart().mark_point().encode( - alt.X('monthdate(date):T', title='Date'), - alt.Y('temp_max:Q', - title='Maximum Daily Temperature (C)', - scale=alt.Scale(domain=[-5, 40]) - ), - color=alt.condition(brush, color, alt.value('lightgray')), - size=alt.Size('precipitation:Q', scale=alt.Scale(range=[5, 200])) -).properties( - width=550, - height=300 -).add_params( - brush -).transform_filter( - click +points = ( + alt.Chart() + .mark_point() + .encode( + alt.X("monthdate(date):T", title="Date"), + alt.Y( + "temp_max:Q", + title="Maximum Daily Temperature (C)", + scale=alt.Scale(domain=[-5, 40]), + ), + color=alt.condition(brush, color, alt.value("lightgray")), + size=alt.Size("precipitation:Q", scale=alt.Scale(range=[5, 200])), + ) + .properties(width=550, height=300) + .add_params(brush) + .transform_filter(click) ) # Bottom panel is a bar chart of weather type -bars = alt.Chart().mark_bar().encode( - x='count()', - y='weather:N', - color=alt.condition(click, color, alt.value('lightgray')), -).transform_filter( - brush -).properties( - width=550, -).add_params( - click +bars = ( + alt.Chart() + .mark_bar() + .encode( + x="count()", + y="weather:N", + color=alt.condition(click, color, alt.value("lightgray")), + ) + .transform_filter(brush) + .properties( + width=550, + ) + .add_params(click) ) -alt.vconcat( - points, - bars, - data=source, - title="Seattle Weather: 2012-2015" -) \ No newline at end of file +alt.vconcat(points, bars, data=source, title="Seattle Weather: 2012-2015") diff --git a/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_over_time/mock.py b/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_over_time/mock.py index 40f45c43c..8a405ca34 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_over_time/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_over_time/mock.py @@ -19,15 +19,8 @@ title="Sex", ), column=alt.Column("age:O", title="Age"), -).properties( - width=20, - title="U.S. Population by Age and Sex" -).add_params( +).properties(width=20, title="U.S. Population by Age and Sex").add_params( select_year ).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female") -).transform_filter( - select_year -).configure_facet( - spacing=8 -) \ No newline at end of file +).transform_filter(select_year).configure_facet(spacing=8) diff --git a/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_pyramid_over_time/mock.py b/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_pyramid_over_time/mock.py index d3423fa0f..48b72cf68 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_pyramid_over_time/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/casestudy-us_population_pyramid_over_time/mock.py @@ -5,44 +5,50 @@ source = data.population.url slider = alt.binding_range(min=1850, max=2000, step=10) -select_year = alt.selection_point(name='year', fields=['year'], - bind=slider, value={'year': 2000}) - -base = alt.Chart(source).add_params( - select_year -).transform_filter( - select_year -).transform_calculate( - gender=alt.expr.if_(alt.datum.sex == 1, 'Male', 'Female') -).properties( - width=250 +select_year = alt.selection_point( + name="year", fields=["year"], bind=slider, value={"year": 2000} ) +base = ( + alt.Chart(source) + .add_params(select_year) + .transform_filter(select_year) + .transform_calculate(gender=alt.expr.if_(alt.datum.sex == 1, "Male", "Female")) + .properties(width=250) +) + + +color_scale = alt.Scale(domain=["Male", "Female"], range=["#1f77b4", "#e377c2"]) + +left = ( + base.transform_filter(alt.datum.gender == "Female") + .encode( + y=alt.Y("age:O", axis=None), + x=alt.X("sum(people):Q", title="population", sort=alt.SortOrder("descending")), + color=alt.Color("gender:N", scale=color_scale, legend=None), + ) + .mark_bar() + .properties(title="Female") +) + +middle = ( + base.encode( + y=alt.Y("age:O", axis=None), + text=alt.Text("age:Q"), + ) + .mark_text() + .properties(width=20) +) + +right = ( + base.transform_filter(alt.datum.gender == "Male") + .encode( + y=alt.Y("age:O", axis=None), + x=alt.X("sum(people):Q", title="population"), + color=alt.Color("gender:N", scale=color_scale, legend=None), + ) + .mark_bar() + .properties(title="Male") +) -color_scale = alt.Scale(domain=['Male', 'Female'], - range=['#1f77b4', '#e377c2']) - -left = base.transform_filter( - alt.datum.gender == 'Female' -).encode( - y=alt.Y('age:O', axis=None), - x=alt.X('sum(people):Q', - title='population', - sort=alt.SortOrder('descending')), - color=alt.Color('gender:N', scale=color_scale, legend=None) -).mark_bar().properties(title='Female') - -middle = base.encode( - y=alt.Y('age:O', axis=None), - text=alt.Text('age:Q'), -).mark_text().properties(width=20) - -right = base.transform_filter( - alt.datum.gender == 'Male' -).encode( - y=alt.Y('age:O', axis=None), - x=alt.X('sum(people):Q', title='population'), - color=alt.Color('gender:N', scale=color_scale, legend=None) -).mark_bar().properties(title='Male') - -alt.concat(left, middle, right, spacing=5) \ No newline at end of file +alt.concat(left, middle, right, spacing=5) diff --git a/vegafusion-python/tests/altair_mocks/interactive/casestudy-weather_heatmap/mock.py b/vegafusion-python/tests/altair_mocks/interactive/casestudy-weather_heatmap/mock.py index 75d14ef76..4f6be89a7 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/casestudy-weather_heatmap/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/casestudy-weather_heatmap/mock.py @@ -1,20 +1,18 @@ # https://altair-viz.github.io/gallery/weather_heatmap.html import altair as alt -from vega_datasets import data # Since the data is more than 5,000 rows we'll import it from a URL source = "https://raw.githubusercontent.com/vega/vega-datasets/v1.31.1/data/seattle-temps.csv" alt.Chart( - source, - title="2010 Daily High Temperature (F) in Seattle, WA" + source, title="2010 Daily High Temperature (F) in Seattle, WA" ).mark_rect().encode( - x='date(date):O', - y='month(date):O', - color=alt.Color('max(temp):Q', scale=alt.Scale(scheme="inferno")), + x="date(date):O", + y="month(date):O", + color=alt.Color("max(temp):Q", scale=alt.Scale(scheme="inferno")), tooltip=[ - alt.Tooltip('monthdate(date):T', title='Date'), - alt.Tooltip('max(temp):Q', title='Max Temp') - ] + alt.Tooltip("monthdate(date):T", title="Date"), + alt.Tooltip("max(temp):Q", title="Max Temp"), + ], ).properties(width=550) diff --git a/vegafusion-python/tests/altair_mocks/interactive/cross_highlight/mock.py b/vegafusion-python/tests/altair_mocks/interactive/cross_highlight/mock.py index 0939ae894..d907424c1 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/cross_highlight/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/cross_highlight/mock.py @@ -3,39 +3,41 @@ source = data.movies.url -pts = alt.selection_point(encodings=['x']) +pts = alt.selection_point(encodings=["x"]) -rect = alt.Chart(data.movies.url).mark_rect().encode( - alt.X('IMDB_Rating:Q', bin=True), - alt.Y('Rotten_Tomatoes_Rating:Q', bin=True), - alt.Color('count()', - scale=alt.Scale(scheme='greenblue'), - legend=alt.Legend(title='Total Records') - ) +rect = ( + alt.Chart(data.movies.url) + .mark_rect() + .encode( + alt.X("IMDB_Rating:Q", bin=True), + alt.Y("Rotten_Tomatoes_Rating:Q", bin=True), + alt.Color( + "count()", + scale=alt.Scale(scheme="greenblue"), + legend=alt.Legend(title="Total Records"), + ), + ) ) -circ = rect.mark_point().encode( - alt.ColorValue('grey'), - alt.Size('count()', - legend=alt.Legend(title='Records in Selection') - ) -).transform_filter( - pts +circ = ( + rect.mark_point() + .encode( + alt.ColorValue("grey"), + alt.Size("count()", legend=alt.Legend(title="Records in Selection")), + ) + .transform_filter(pts) ) -bar = alt.Chart(source).mark_bar().encode( - x='Major_Genre:N', - y='count()', - color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey")) -).properties( - width=550, - height=200 -).add_params(pts) +bar = ( + alt.Chart(source) + .mark_bar() + .encode( + x="Major_Genre:N", + y="count()", + color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey")), + ) + .properties(width=550, height=200) + .add_params(pts) +) -alt.vconcat( - rect + circ, - bar -).resolve_legend( - color="independent", - size="independent" -) \ No newline at end of file +alt.vconcat(rect + circ, bar).resolve_legend(color="independent", size="independent") diff --git a/vegafusion-python/tests/altair_mocks/interactive/histogram-responsive/mock.py b/vegafusion-python/tests/altair_mocks/interactive/histogram-responsive/mock.py index fffbb2aa2..5ade44b0d 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/histogram-responsive/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/histogram-responsive/mock.py @@ -5,25 +5,25 @@ source = data.flights_5k.url -brush = alt.selection_interval(encodings=['x']) +brush = alt.selection_interval(encodings=["x"]) -base = alt.Chart(source).transform_calculate( - time="hours(datum.date) + minutes(datum.date) / 60" -).mark_bar().encode( - y='count():Q' -).properties( - width=600, - height=100 +base = ( + alt.Chart(source) + .transform_calculate(time="hours(datum.date) + minutes(datum.date) / 60") + .mark_bar() + .encode(y="count():Q") + .properties(width=600, height=100) ) alt.vconcat( base.encode( - alt.X('time:Q', - bin=alt.Bin(maxbins=30, extent=brush), - scale=alt.Scale(domain=brush) - ) + alt.X( + "time:Q", + bin=alt.Bin(maxbins=30, extent=brush), + scale=alt.Scale(domain=brush), + ) ), base.encode( - alt.X('time:Q', bin=alt.Bin(maxbins=30)), - ).add_selection(brush) + alt.X("time:Q", bin=alt.Bin(maxbins=30)), + ).add_selection(brush), ) diff --git a/vegafusion-python/tests/altair_mocks/interactive/layered_crossfilter/mock.py b/vegafusion-python/tests/altair_mocks/interactive/layered_crossfilter/mock.py index f5084ff7a..beab7c7fc 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/layered_crossfilter/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/layered_crossfilter/mock.py @@ -3,37 +3,29 @@ import altair as alt from vega_datasets import data -source = alt.UrlData( - data.flights_2k.url, - format={'parse': {'date': 'date'}} -) +source = alt.UrlData(data.flights_2k.url, format={"parse": {"date": "date"}}) -brush = alt.selection(type='interval', encodings=['x']) +brush = alt.selection(type="interval", encodings=["x"]) # Define the base chart, with the common parts of the # background and highlights -base = alt.Chart().mark_bar().encode( - x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), - y='count()' -).properties( - width=130, - height=130 +base = ( + alt.Chart() + .mark_bar() + .encode( + x=alt.X(alt.repeat("column"), type="quantitative", bin=alt.Bin(maxbins=20)), + y="count()", + ) + .properties(width=130, height=130) ) # gray background with selection -background = base.encode( - color=alt.value('#ddd') -).add_selection(brush) +background = base.encode(color=alt.value("#ddd")).add_selection(brush) # blue highlights on the transformed data highlight = base.transform_filter(brush) # layer the two charts & repeat -alt.layer( - background, - highlight, - data=source -).transform_calculate( - "time", - "hours(datum.date)" -).repeat(column=["distance", "delay", "time"]) \ No newline at end of file +alt.layer(background, highlight, data=source).transform_calculate( + "time", "hours(datum.date)" +).repeat(column=["distance", "delay", "time"]) diff --git a/vegafusion-python/tests/altair_mocks/interactive/legend/mock.py b/vegafusion-python/tests/altair_mocks/interactive/legend/mock.py index 7a76d0125..e2625a10b 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/legend/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/legend/mock.py @@ -5,13 +5,11 @@ source = data.unemployment_across_industries.url -selection = alt.selection_multi(fields=['series'], bind='legend') +selection = alt.selection_multi(fields=["series"], bind="legend") alt.Chart(source).mark_area().encode( - alt.X('yearmonth(date):T', axis=alt.Axis(domain=False, format='%Y', tickSize=0)), - alt.Y('sum(count):Q', stack='center', axis=None), - alt.Color('series:N', scale=alt.Scale(scheme='category20b')), - opacity=alt.condition(selection, alt.value(1), alt.value(0.2)) -).add_selection( - selection -) + alt.X("yearmonth(date):T", axis=alt.Axis(domain=False, format="%Y", tickSize=0)), + alt.Y("sum(count):Q", stack="center", axis=None), + alt.Color("series:N", scale=alt.Scale(scheme="category20b")), + opacity=alt.condition(selection, alt.value(1), alt.value(0.2)), +).add_selection(selection) diff --git a/vegafusion-python/tests/altair_mocks/interactive/multiline_highlight/mock.py b/vegafusion-python/tests/altair_mocks/interactive/multiline_highlight/mock.py index 8b867f91e..cc5c48755 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/multiline_highlight/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/multiline_highlight/mock.py @@ -3,25 +3,19 @@ source = data.stocks() -highlight = alt.selection_point(on='mouseover', - fields=['symbol'], nearest=True) +highlight = alt.selection_point(on="mouseover", fields=["symbol"], nearest=True) -base = alt.Chart(source).encode( - x='date:T', - y='price:Q', - color='symbol:N' -) +base = alt.Chart(source).encode(x="date:T", y="price:Q", color="symbol:N") -points = base.mark_circle().encode( - opacity=alt.value(0) -).add_params( - highlight -).properties( - width=600 +points = ( + base.mark_circle() + .encode(opacity=alt.value(0)) + .add_params(highlight) + .properties(width=600) ) lines = base.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(3)) ) -points + lines \ No newline at end of file +points + lines diff --git a/vegafusion-python/tests/altair_mocks/interactive/multiline_tooltip/mock.py b/vegafusion-python/tests/altair_mocks/interactive/multiline_tooltip/mock.py index 7fd59ff51..d8d99c22b 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/multiline_tooltip/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/multiline_tooltip/mock.py @@ -1,30 +1,35 @@ import altair as alt -import pandas as pd import numpy as np +import pandas as pd np.random.seed(42) -source = pd.DataFrame(np.cumsum(np.random.randn(100, 3), 0).round(2), - columns=['A', 'B', 'C'], index=pd.RangeIndex(100, name='x')) -source = source.reset_index().melt('x', var_name='category', value_name='y') +source = pd.DataFrame( + np.cumsum(np.random.randn(100, 3), 0).round(2), + columns=["A", "B", "C"], + index=pd.RangeIndex(100, name="x"), +) +source = source.reset_index().melt("x", var_name="category", value_name="y") # Create a selection that chooses the nearest point & selects based on x-value -nearest = alt.selection_point(nearest=True, on='mouseover', - fields=['x'], empty=False) +nearest = alt.selection_point(nearest=True, on="mouseover", fields=["x"], empty=False) # The basic line -line = alt.Chart(source).mark_line(interpolate='basis').encode( - x='x:Q', - y='y:Q', - color='category:N' +line = ( + alt.Chart(source) + .mark_line(interpolate="basis") + .encode(x="x:Q", y="y:Q", color="category:N") ) # Transparent selectors across the chart. This is what tells us # the x-value of the cursor -selectors = alt.Chart(source).mark_point().encode( - x='x:Q', - opacity=alt.value(0), -).add_params( - nearest +selectors = ( + alt.Chart(source) + .mark_point() + .encode( + x="x:Q", + opacity=alt.value(0), + ) + .add_params(nearest) ) # Draw points on the line, and highlight based on selection @@ -33,20 +38,19 @@ ) # Draw text labels near the points, and highlight based on selection -text = line.mark_text(align='left', dx=5, dy=-5).encode( - text=alt.condition(nearest, 'y:Q', alt.value(' ')) +text = line.mark_text(align="left", dx=5, dy=-5).encode( + text=alt.condition(nearest, "y:Q", alt.value(" ")) ) # Draw a rule at the location of the selection -rules = alt.Chart(source).mark_rule(color='gray').encode( - x='x:Q', -).transform_filter( - nearest +rules = ( + alt.Chart(source) + .mark_rule(color="gray") + .encode( + x="x:Q", + ) + .transform_filter(nearest) ) # Put the five layers into a chart and bind the data -alt.layer( - line, selectors, points, rules, text -).properties( - width=600, height=300 -) \ No newline at end of file +alt.layer(line, selectors, points, rules, text).properties(width=600, height=300) diff --git a/vegafusion-python/tests/altair_mocks/interactive/other-image_tooltip/mock.py b/vegafusion-python/tests/altair_mocks/interactive/other-image_tooltip/mock.py index 8b0da07df..d4bbe1852 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/other-image_tooltip/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/other-image_tooltip/mock.py @@ -4,11 +4,21 @@ import pandas as pd source = pd.DataFrame.from_records( - [{'a': 1, 'b': 1, 'image': 'https://altair-viz.github.io/_static/altair-logo-light.png'}, - {'a': 2, 'b': 2, 'image': 'https://avatars.githubusercontent.com/u/11796929?s=200&v=4'}] + [ + { + "a": 1, + "b": 1, + "image": "https://altair-viz.github.io/_static/altair-logo-light.png", + }, + { + "a": 2, + "b": 2, + "image": "https://avatars.githubusercontent.com/u/11796929?s=200&v=4", + }, + ] ) alt.Chart(source).mark_circle(size=200).encode( - x='a', - y='b', - tooltip=['image'] # Must be a list for the image to render + x="a", + y="b", + tooltip=["image"], # Must be a list for the image to render ) diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter-href/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter-href/mock.py index 9af23c4b5..32fca8549 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter-href/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter-href/mock.py @@ -6,11 +6,11 @@ source = data.cars() alt.Chart(source).transform_calculate( - url='https://www.google.com/search?q=' + alt.datum.Name + url="https://www.google.com/search?q=" + alt.datum.Name ).mark_point().encode( - x='Horsepower:Q', - y='Miles_per_Gallon:Q', - color='Origin:N', - href='url:N', - tooltip=['Name:N', 'url:N'] + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + color="Origin:N", + href="url:N", + tooltip=["Name:N", "url:N"], ) diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter-with_linked_table/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter-with_linked_table/mock.py index c936f42c2..a983cf264 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter-with_linked_table/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter-with_linked_table/mock.py @@ -7,35 +7,33 @@ brush = alt.selection_interval() # Scatter Plot -points = alt.Chart(source).mark_point().encode( - x='Horsepower:Q', - y='Miles_per_Gallon:Q', - color=alt.condition(brush, 'Cylinders:O', alt.value('grey')) -).add_params(brush) +points = ( + alt.Chart(source) + .mark_point() + .encode( + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + color=alt.condition(brush, "Cylinders:O", alt.value("grey")), + ) + .add_params(brush) +) # Base chart for data tables -ranked_text = alt.Chart(source).mark_text().encode( - y=alt.Y('row_number:O',axis=None) -).transform_window( - row_number='row_number()' -).transform_filter( - brush -).transform_window( - rank='rank(row_number)' -).transform_filter( - alt.datum.rank<20 +ranked_text = ( + alt.Chart(source) + .mark_text() + .encode(y=alt.Y("row_number:O", axis=None)) + .transform_window(row_number="row_number()") + .transform_filter(brush) + .transform_window(rank="rank(row_number)") + .transform_filter(alt.datum.rank < 20) ) # Data Tables -horsepower = ranked_text.encode(text='Horsepower:N').properties(title='Horsepower') -mpg = ranked_text.encode(text='Miles_per_Gallon:N').properties(title='MPG') -origin = ranked_text.encode(text='Origin:N').properties(title='Origin') -text = alt.hconcat(horsepower, mpg, origin) # Combine data tables +horsepower = ranked_text.encode(text="Horsepower:N").properties(title="Horsepower") +mpg = ranked_text.encode(text="Miles_per_Gallon:N").properties(title="MPG") +origin = ranked_text.encode(text="Origin:N").properties(title="Origin") +text = alt.hconcat(horsepower, mpg, origin) # Combine data tables # Build chart -alt.hconcat( - points, - text -).resolve_legend( - color="independent" -) +alt.hconcat(points, text).resolve_legend(color="independent") diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter-with_minimap/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter-with_minimap/mock.py index 56d61ae7c..e6400e56c 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter-with_minimap/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter-with_minimap/mock.py @@ -37,4 +37,4 @@ .properties(width=300, height=300, title="Seattle weather -- detail view") ) -detail | minimap \ No newline at end of file +detail | minimap diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter_linked_brush/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter_linked_brush/mock.py index 04a887303..f3b95692f 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter_linked_brush/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter_linked_brush/mock.py @@ -5,16 +5,17 @@ source = data.cars() -brush = alt.selection(type='interval', resolve='global') +brush = alt.selection(type="interval", resolve="global") -base = alt.Chart(source).mark_point().encode( - y='Miles_per_Gallon', - color=alt.condition(brush, 'Origin', alt.ColorValue('gray')), -).add_selection( - brush -).properties( - width=200, - height=250 +base = ( + alt.Chart(source) + .mark_point() + .encode( + y="Miles_per_Gallon", + color=alt.condition(brush, "Origin", alt.ColorValue("gray")), + ) + .add_selection(brush) + .properties(width=200, height=250) ) -base.encode(x='Horsepower') | base.encode(x='Acceleration') +base.encode(x="Horsepower") | base.encode(x="Acceleration") diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter_plot/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter_plot/mock.py index 2ad86c31f..3f79a1ec0 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter_plot/mock.py @@ -6,7 +6,7 @@ source = data.cars() alt.Chart(source).mark_circle().encode( - x='Horsepower', - y='Miles_per_Gallon', - color='Origin', + x="Horsepower", + y="Miles_per_Gallon", + color="Origin", ).interactive() diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter_with_histogram/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter_with_histogram/mock.py index eb036ad1e..bfccb0179 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter_with_histogram/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter_with_histogram/mock.py @@ -1,6 +1,6 @@ import altair as alt -import pandas as pd import numpy as np +import pandas as pd np.random.seed(42) @@ -9,39 +9,34 @@ m = np.random.normal(15, 1, size=100) -source = pd.DataFrame({"x": x, "y":y, "m":m}) +source = pd.DataFrame({"x": x, "y": y, "m": m}) # interval selection in the scatter plot pts = alt.selection_interval(encodings=["x"]) # left panel: scatter plot -points = alt.Chart().mark_point(filled=True, color="black").encode( - x='x', - y='y' -).transform_filter( - pts -).properties( - width=300, - height=300 +points = ( + alt.Chart() + .mark_point(filled=True, color="black") + .encode(x="x", y="y") + .transform_filter(pts) + .properties(width=300, height=300) ) # right panel: histogram -mag = alt.Chart().mark_bar().encode( - x='mbin:N', - y="count()", - color=alt.condition(pts, alt.value("black"), alt.value("lightgray")) -).properties( - width=300, - height=300 -).add_params(pts) +mag = ( + alt.Chart() + .mark_bar() + .encode( + x="mbin:N", + y="count()", + color=alt.condition(pts, alt.value("black"), alt.value("lightgray")), + ) + .properties(width=300, height=300) + .add_params(pts) +) # build the chart: -alt.hconcat( - points, - mag, - data=source -).transform_bin( - "mbin", - field="m", - bin=alt.Bin(maxbins=20) -) \ No newline at end of file +alt.hconcat(points, mag, data=source).transform_bin( + "mbin", field="m", bin=alt.Bin(maxbins=20) +) diff --git a/vegafusion-python/tests/altair_mocks/interactive/scatter_with_layered_histogram/mock.py b/vegafusion-python/tests/altair_mocks/interactive/scatter_with_layered_histogram/mock.py index b23c0d4ec..cdec4360b 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/scatter_with_layered_histogram/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/scatter_with_layered_histogram/mock.py @@ -1,53 +1,49 @@ import altair as alt -import pandas as pd import numpy as np +import pandas as pd np.random.seed(1) # generate fake data -source = pd.DataFrame({ - 'gender': ['M']*1000 + ['F']*1000, - 'height':np.concatenate(( - np.random.normal(69, 7, 1000), np.random.normal(64, 6, 1000) - )), - 'weight': np.concatenate(( - np.random.normal(195.8, 144, 1000), np.random.normal(167, 100, 1000) - )), - 'age': np.concatenate(( - np.random.normal(45, 8, 1000), np.random.normal(51, 6, 1000) - )) -}) - -selector = alt.selection_point(fields=['gender']) - -color_scale = alt.Scale(domain=['M', 'F'], - range=['#1FC3AA', '#8624F5']) - -base = alt.Chart(source).properties( - width=250, - height=250 -).add_params(selector) +source = pd.DataFrame( + { + "gender": ["M"] * 1000 + ["F"] * 1000, + "height": np.concatenate( + (np.random.normal(69, 7, 1000), np.random.normal(64, 6, 1000)) + ), + "weight": np.concatenate( + (np.random.normal(195.8, 144, 1000), np.random.normal(167, 100, 1000)) + ), + "age": np.concatenate( + (np.random.normal(45, 8, 1000), np.random.normal(51, 6, 1000)) + ), + } +) + +selector = alt.selection_point(fields=["gender"]) + +color_scale = alt.Scale(domain=["M", "F"], range=["#1FC3AA", "#8624F5"]) + +base = alt.Chart(source).properties(width=250, height=250).add_params(selector) points = base.mark_point(filled=True, size=200).encode( - x=alt.X('mean(height):Q').scale(domain=[0,84]), - y=alt.Y('mean(weight):Q').scale(domain=[0,250]), + x=alt.X("mean(height):Q").scale(domain=[0, 84]), + y=alt.Y("mean(weight):Q").scale(domain=[0, 250]), color=alt.condition( - selector, - 'gender:N', - alt.value('lightgray'), - scale=color_scale), + selector, "gender:N", alt.value("lightgray"), scale=color_scale + ), ) -hists = base.mark_bar(opacity=0.5, thickness=100).encode( - x=alt.X('age') - .bin(step=5) # step keeps bin size the same - .scale(domain=[0,100]), - y=alt.Y('count()') - .stack(None) - .scale(domain=[0,350]), - color=alt.Color('gender:N', scale=color_scale) -).transform_filter( - selector +hists = ( + base.mark_bar(opacity=0.5, thickness=100) + .encode( + x=alt.X("age") + .bin(step=5) # step keeps bin size the same + .scale(domain=[0, 100]), + y=alt.Y("count()").stack(None).scale(domain=[0, 350]), + color=alt.Color("gender:N", scale=color_scale), + ) + .transform_filter(selector) ) -points | hists \ No newline at end of file +points | hists diff --git a/vegafusion-python/tests/altair_mocks/interactive/select_detail/mock.py b/vegafusion-python/tests/altair_mocks/interactive/select_detail/mock.py index 055b4f094..56f076b66 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/select_detail/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/select_detail/mock.py @@ -1,8 +1,8 @@ # https://altair-viz.github.io/gallery/select_detail.html import altair as alt -import pandas as pd import numpy as np +import pandas as pd np.random.seed(0) @@ -10,45 +10,48 @@ n_times = 50 # Create one (x, y) pair of metadata per object -locations = pd.DataFrame({ - 'id': range(n_objects), - 'x': np.random.randn(n_objects), - 'y': np.random.randn(n_objects) -}) +locations = pd.DataFrame( + { + "id": range(n_objects), + "x": np.random.randn(n_objects), + "y": np.random.randn(n_objects), + } +) # Create a 50-element time-series for each object -timeseries = pd.DataFrame(np.random.randn(n_times, n_objects).cumsum(0), - columns=locations['id'], - index=pd.RangeIndex(0, n_times, name='time')) +timeseries = pd.DataFrame( + np.random.randn(n_times, n_objects).cumsum(0), + columns=locations["id"], + index=pd.RangeIndex(0, n_times, name="time"), +) # Melt the wide-form timeseries into a long-form view -timeseries = timeseries.reset_index().melt('time') +timeseries = timeseries.reset_index().melt("time") # Merge the (x, y) metadata into the long-form view -timeseries['id'] = timeseries['id'].astype(int) # make merge not complain -data = pd.merge(timeseries, locations, on='id') +timeseries["id"] = timeseries["id"].astype(int) # make merge not complain +data = pd.merge(timeseries, locations, on="id") # Data is prepared, now make a chart -selector = alt.selection_single(empty='all', fields=['id']) +selector = alt.selection_single(empty="all", fields=["id"]) -base = alt.Chart(data).properties( - width=200, - height=200 -).add_selection(selector) +base = alt.Chart(data).properties(width=200, height=200).add_selection(selector) points = base.mark_point(filled=True, size=200).encode( - x='mean(x)', - y='mean(y)', - color=alt.condition(selector, 'id:O', alt.value('lightgray'), legend=None), + x="mean(x)", + y="mean(y)", + color=alt.condition(selector, "id:O", alt.value("lightgray"), legend=None), ) -timeseries = base.mark_line().encode( - x='time', - y=alt.Y('value', scale=alt.Scale(domain=(-15, 15))), - color=alt.Color('id:O', legend=None) -).transform_filter( - selector +timeseries = ( + base.mark_line() + .encode( + x="time", + y=alt.Y("value", scale=alt.Scale(domain=(-15, 15))), + color=alt.Color("id:O", legend=None), + ) + .transform_filter(selector) ) -points | timeseries \ No newline at end of file +points | timeseries diff --git a/vegafusion-python/tests/altair_mocks/interactive/select_mark_area/mock.py b/vegafusion-python/tests/altair_mocks/interactive/select_mark_area/mock.py index 1e535edf9..154c1894e 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/select_mark_area/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/select_mark_area/mock.py @@ -3,16 +3,17 @@ source = data.unemployment_across_industries.url -base = alt.Chart(source).mark_area( - color='goldenrod', - opacity=0.3 -).encode( - x='yearmonth(date):T', - y='sum(count):Q', +base = ( + alt.Chart(source) + .mark_area(color="goldenrod", opacity=0.3) + .encode( + x="yearmonth(date):T", + y="sum(count):Q", + ) ) -brush = alt.selection_interval(encodings=['x']) +brush = alt.selection_interval(encodings=["x"]) background = base.add_params(brush) -selected = base.transform_filter(brush).mark_area(color='goldenrod') +selected = base.transform_filter(brush).mark_area(color="goldenrod") -background + selected \ No newline at end of file +background + selected diff --git a/vegafusion-python/tests/altair_mocks/interactive/selection_histogram/mock.py b/vegafusion-python/tests/altair_mocks/interactive/selection_histogram/mock.py index ffb9733f6..f6c7fe244 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/selection_histogram/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/selection_histogram/mock.py @@ -5,20 +5,22 @@ brush = alt.selection_interval() -points = alt.Chart(source).mark_point().encode( - x='Horsepower:Q', - y='Miles_per_Gallon:Q', - color=alt.condition(brush, 'Origin:N', alt.value('lightgray')) -).add_params( - brush +points = ( + alt.Chart(source) + .mark_point() + .encode( + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + color=alt.condition(brush, "Origin:N", alt.value("lightgray")), + ) + .add_params(brush) ) -bars = alt.Chart(source).mark_bar().encode( - y='Origin:N', - color='Origin:N', - x='count(Origin):Q' -).transform_filter( - brush +bars = ( + alt.Chart(source) + .mark_bar() + .encode(y="Origin:N", color="Origin:N", x="count(Origin):Q") + .transform_filter(brush) ) -points & bars \ No newline at end of file +points & bars diff --git a/vegafusion-python/tests/altair_mocks/interactive/selection_layer_bar_month/mock.py b/vegafusion-python/tests/altair_mocks/interactive/selection_layer_bar_month/mock.py index 9def26a55..7ed9acf5c 100644 --- a/vegafusion-python/tests/altair_mocks/interactive/selection_layer_bar_month/mock.py +++ b/vegafusion-python/tests/altair_mocks/interactive/selection_layer_bar_month/mock.py @@ -2,21 +2,24 @@ from vega_datasets import data source = data.seattle_weather() -brush = alt.selection_interval(encodings=['x']) +brush = alt.selection_interval(encodings=["x"]) -bars = alt.Chart().mark_bar().encode( - x='month(date):O', - y='mean(precipitation):Q', - opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), -).add_params( - brush +bars = ( + alt.Chart() + .mark_bar() + .encode( + x="month(date):O", + y="mean(precipitation):Q", + opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), + ) + .add_params(brush) ) -line = alt.Chart().mark_rule(color='firebrick').encode( - y='mean(precipitation):Q', - size=alt.SizeValue(3) -).transform_filter( - brush +line = ( + alt.Chart() + .mark_rule(color="firebrick") + .encode(y="mean(precipitation):Q", size=alt.SizeValue(3)) + .transform_filter(brush) ) alt.layer(bars, line, data=source) diff --git a/vegafusion-python/tests/altair_mocks/line/bump_chart/mock.py b/vegafusion-python/tests/altair_mocks/line/bump_chart/mock.py index 20a549106..f45a876cb 100644 --- a/vegafusion-python/tests/altair_mocks/line/bump_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/bump_chart/mock.py @@ -2,22 +2,22 @@ # Width set to 500 import altair as alt -from vega_datasets import data import pandas as pd +from vega_datasets import data stocks = data.stocks() -source = stocks.groupby([pd.Grouper(key="date", freq="6M"),"symbol"]).mean().reset_index() +source = ( + stocks.groupby([pd.Grouper(key="date", freq="6M"), "symbol"]).mean().reset_index() +) -alt.Chart(source).mark_line(point = True).encode( - x = alt.X("date:O", timeUnit="yearmonth", title="date"), +alt.Chart(source).mark_line(point=True).encode( + x=alt.X("date:O", timeUnit="yearmonth", title="date"), y="rank:O", - color=alt.Color("symbol:N") + color=alt.Color("symbol:N"), ).transform_window( - rank="rank()", - sort=[alt.SortField("price", order="descending")], - groupby=["date"] + rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"] ).properties( title="Bump Chart for Stock Prices", width=500, height=150, -) \ No newline at end of file +) diff --git a/vegafusion-python/tests/altair_mocks/line/filled_step_chart/mock.py b/vegafusion-python/tests/altair_mocks/line/filled_step_chart/mock.py index 54c6bb764..cbb0cc52c 100644 --- a/vegafusion-python/tests/altair_mocks/line/filled_step_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/filled_step_chart/mock.py @@ -6,10 +6,5 @@ source = data.stocks() alt.Chart(source).mark_area( - color="lightblue", - interpolate='step-after', - line=True -).encode( - x='date', - y='price' -).transform_filter(alt.datum.symbol == 'GOOG') + color="lightblue", interpolate="step-after", line=True +).encode(x="date", y="price").transform_filter(alt.datum.symbol == "GOOG") diff --git a/vegafusion-python/tests/altair_mocks/line/layer_line_color_rule/mock.py b/vegafusion-python/tests/altair_mocks/line/layer_line_color_rule/mock.py index 4b7e4ec1e..e3805931c 100644 --- a/vegafusion-python/tests/altair_mocks/line/layer_line_color_rule/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/layer_line_color_rule/mock.py @@ -7,16 +7,8 @@ base = alt.Chart(source).properties(width=550) -line = base.mark_line().encode( - x='date', - y='price', - color='symbol' -) +line = base.mark_line().encode(x="date", y="price", color="symbol") -rule = base.mark_rule().encode( - y='average(price)', - color='symbol', - size=alt.value(2) -) +rule = base.mark_rule().encode(y="average(price)", color="symbol", size=alt.value(2)) line + rule diff --git a/vegafusion-python/tests/altair_mocks/line/multi_series/mock.py b/vegafusion-python/tests/altair_mocks/line/multi_series/mock.py index a976a553d..3af46386b 100644 --- a/vegafusion-python/tests/altair_mocks/line/multi_series/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/multi_series/mock.py @@ -6,8 +6,8 @@ source = data.stocks() alt.Chart(source).mark_line().encode( - x='date', - y='price', - color='symbol', - strokeDash='symbol', + x="date", + y="price", + color="symbol", + strokeDash="symbol", ) diff --git a/vegafusion-python/tests/altair_mocks/line/percent_axis/mock.py b/vegafusion-python/tests/altair_mocks/line/percent_axis/mock.py index 2e543fd68..5c9dd5404 100644 --- a/vegafusion-python/tests/altair_mocks/line/percent_axis/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/percent_axis/mock.py @@ -6,9 +6,5 @@ source = data.jobs.url alt.Chart(source).mark_line().encode( - alt.X('year:O'), - alt.Y('perc:Q', axis=alt.Axis(format='%')), - color='sex:N' -).transform_filter( - alt.datum.job == 'Welder' -) + alt.X("year:O"), alt.Y("perc:Q", axis=alt.Axis(format="%")), color="sex:N" +).transform_filter(alt.datum.job == "Welder") diff --git a/vegafusion-python/tests/altair_mocks/line/slope_graph/mock.py b/vegafusion-python/tests/altair_mocks/line/slope_graph/mock.py index a310d8f13..6603ec2b1 100644 --- a/vegafusion-python/tests/altair_mocks/line/slope_graph/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/slope_graph/mock.py @@ -5,8 +5,4 @@ source = data.barley() -alt.Chart(source).mark_line().encode( - x='year:O', - y='median(yield)', - color='site' -) +alt.Chart(source).mark_line().encode(x="year:O", y="median(yield)", color="site") diff --git a/vegafusion-python/tests/altair_mocks/line/slope_graph2/mock.py b/vegafusion-python/tests/altair_mocks/line/slope_graph2/mock.py index fc18cd8d8..fd69c6d6c 100644 --- a/vegafusion-python/tests/altair_mocks/line/slope_graph2/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/slope_graph2/mock.py @@ -6,8 +6,4 @@ source = data.barley() -alt.Chart(source).mark_line().encode( - x='year:O', - y='mean(yield)', - color='site' -) +alt.Chart(source).mark_line().encode(x="year:O", y="mean(yield)", color="site") diff --git a/vegafusion-python/tests/altair_mocks/line/step_chart/mock.py b/vegafusion-python/tests/altair_mocks/line/step_chart/mock.py index 3feda7925..7c2593d0d 100644 --- a/vegafusion-python/tests/altair_mocks/line/step_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/step_chart/mock.py @@ -5,9 +5,6 @@ source = data.stocks() -alt.Chart(source).mark_line(interpolate='step-after').encode( - x='date', - y='price' -).transform_filter( - alt.datum.symbol == 'GOOG' -) +alt.Chart(source).mark_line(interpolate="step-after").encode( + x="date", y="price" +).transform_filter(alt.datum.symbol == "GOOG") diff --git a/vegafusion-python/tests/altair_mocks/line/trail_marker/mock.py b/vegafusion-python/tests/altair_mocks/line/trail_marker/mock.py index ea43669c2..c61286a12 100644 --- a/vegafusion-python/tests/altair_mocks/line/trail_marker/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/trail_marker/mock.py @@ -8,8 +8,4 @@ source = data.wheat() source["year"] = source.year.astype(str) -alt.Chart(source).mark_trail().encode( - x='year:T', - y='wheat:Q', - size='wheat:Q' -) \ No newline at end of file +alt.Chart(source).mark_trail().encode(x="year:T", y="wheat:Q", size="wheat:Q") diff --git a/vegafusion-python/tests/altair_mocks/line/with_ci/mock.py b/vegafusion-python/tests/altair_mocks/line/with_ci/mock.py index a215c2c39..8b3d846f0 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_ci/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_ci/mock.py @@ -6,14 +6,15 @@ source = data.cars() -line = alt.Chart(source).mark_line().encode( - x='Year', - y='mean(Miles_per_Gallon)' -) +line = alt.Chart(source).mark_line().encode(x="Year", y="mean(Miles_per_Gallon)") -band = alt.Chart(source).mark_errorband(extent='stdev').encode( - x='Year', - y=alt.Y('Miles_per_Gallon', title='Miles/Gallon'), +band = ( + alt.Chart(source) + .mark_errorband(extent="stdev") + .encode( + x="Year", + y=alt.Y("Miles_per_Gallon", title="Miles/Gallon"), + ) ) band + line diff --git a/vegafusion-python/tests/altair_mocks/line/with_cumsum/mock.py b/vegafusion-python/tests/altair_mocks/line/with_cumsum/mock.py index 2e4f80830..6311f52d8 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_cumsum/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_cumsum/mock.py @@ -7,14 +7,14 @@ alt.Chart(source).mark_line().transform_window( # Sort the data chronologically - sort=[{'field': 'year'}], + sort=[{"field": "year"}], # Include all previous records before the current record and none after # (This is the default value so you could skip it and it would still work.) frame=[None, 0], # What to add up as you go - cumulative_wheat='sum(wheat)' + cumulative_wheat="sum(wheat)", ).encode( - x='year:O', + x="year:O", # Plot the calculated field created by the transformation - y='cumulative_wheat:Q' + y="cumulative_wheat:Q", ).properties(width=600) diff --git a/vegafusion-python/tests/altair_mocks/line/with_datum/mock.py b/vegafusion-python/tests/altair_mocks/line/with_datum/mock.py index 177179182..c8ffd08a1 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_datum/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_datum/mock.py @@ -5,21 +5,15 @@ source = data.stocks() -lines = ( - alt.Chart(source) - .mark_line() - .encode(x="date", y="price", color="symbol") -) +lines = alt.Chart(source).mark_line().encode(x="date", y="price", color="symbol") xrule = ( alt.Chart() - .mark_rule(color="cyan", strokeWidth=2) - .encode(x=alt.datum(alt.DateTime(year=2006, month="November"))) + .mark_rule(color="cyan", strokeWidth=2) + .encode(x=alt.datum(alt.DateTime(year=2006, month="November"))) ) -yrule = ( - alt.Chart().mark_rule(strokeDash=[12, 6], size=2).encode(y=alt.datum(350)) -) +yrule = alt.Chart().mark_rule(strokeDash=[12, 6], size=2).encode(y=alt.datum(350)) -lines + yrule + xrule \ No newline at end of file +lines + yrule + xrule diff --git a/vegafusion-python/tests/altair_mocks/line/with_generator/mock.py b/vegafusion-python/tests/altair_mocks/line/with_generator/mock.py index c3f491da5..f484aa1da 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_generator/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_generator/mock.py @@ -2,15 +2,8 @@ import altair as alt -source = alt.sequence(start=0, stop=12.71, step=0.1, as_='x') +source = alt.sequence(start=0, stop=12.71, step=0.1, as_="x") alt.Chart(source).mark_line().transform_calculate( - sin='sin(datum.x)', - cos='cos(datum.x)' -).transform_fold( - ['sin', 'cos'] -).encode( - x='x:Q', - y='value:Q', - color='key:N' -) + sin="sin(datum.x)", cos="cos(datum.x)" +).transform_fold(["sin", "cos"]).encode(x="x:Q", y="value:Q", color="key:N") diff --git a/vegafusion-python/tests/altair_mocks/line/with_logarithmic_scale/mock.py b/vegafusion-python/tests/altair_mocks/line/with_logarithmic_scale/mock.py index b0fec2a40..525b6aff5 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_logarithmic_scale/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_logarithmic_scale/mock.py @@ -6,9 +6,9 @@ source = data.population() alt.Chart(source).mark_line().encode( - x='year:O', + x="year:O", y=alt.Y( - 'sum(people)', - scale=alt.Scale(type="log") # Here the scale is applied - ) + "sum(people)", + scale=alt.Scale(type="log"), # Here the scale is applied + ), ) diff --git a/vegafusion-python/tests/altair_mocks/line/with_points/mock.py b/vegafusion-python/tests/altair_mocks/line/with_points/mock.py index 3ddc0be7e..c19d6008a 100644 --- a/vegafusion-python/tests/altair_mocks/line/with_points/mock.py +++ b/vegafusion-python/tests/altair_mocks/line/with_points/mock.py @@ -5,14 +5,8 @@ import pandas as pd x = np.arange(100) -source = pd.DataFrame({ - 'x': x, - 'f(x)': np.sin(x / 5) -}) +source = pd.DataFrame({"x": x, "f(x)": np.sin(x / 5)}) -alt.Chart(source).mark_line( - point=alt.OverlayMarkDef(color="red") -).encode( - x='x', - y='f(x)' +alt.Chart(source).mark_line(point=alt.OverlayMarkDef(color="red")).encode( + x="x", y="f(x)" ) diff --git a/vegafusion-python/tests/altair_mocks/maps/airports_count/mock.py b/vegafusion-python/tests/altair_mocks/maps/airports_count/mock.py index 7736762f8..954cfac90 100644 --- a/vegafusion-python/tests/altair_mocks/maps/airports_count/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/airports_count/mock.py @@ -4,31 +4,34 @@ from vega_datasets import data airports = data.airports.url -states = alt.topo_feature(data.us_10m.url, feature='states') +states = alt.topo_feature(data.us_10m.url, feature="states") # US states background -background = alt.Chart(states).mark_geoshape( - fill='lightgray', - stroke='white' -).properties( - width=500, - height=300 -).project('albersUsa') +background = ( + alt.Chart(states) + .mark_geoshape(fill="lightgray", stroke="white") + .properties(width=500, height=300) + .project("albersUsa") +) # airport positions on background -points = alt.Chart(airports).transform_aggregate( - latitude='mean(latitude)', - longitude='mean(longitude)', - count='count()', - groupby=['state'] -).mark_circle().encode( - longitude='longitude:Q', - latitude='latitude:Q', - size=alt.Size('count:Q', title='Number of Airports'), - color=alt.value('steelblue'), - tooltip=['state:N','count:Q'] -).properties( - title='Number of airports in US' +points = ( + alt.Chart(airports) + .transform_aggregate( + latitude="mean(latitude)", + longitude="mean(longitude)", + count="count()", + groupby=["state"], + ) + .mark_circle() + .encode( + longitude="longitude:Q", + latitude="latitude:Q", + size=alt.Size("count:Q", title="Number of Airports"), + color=alt.value("steelblue"), + tooltip=["state:N", "count:Q"], + ) + .properties(title="Number of airports in US") ) background + points diff --git a/vegafusion-python/tests/altair_mocks/maps/choropleth/mock.py b/vegafusion-python/tests/altair_mocks/maps/choropleth/mock.py index 439ce0efb..680152188 100644 --- a/vegafusion-python/tests/altair_mocks/maps/choropleth/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/choropleth/mock.py @@ -3,17 +3,9 @@ import altair as alt from vega_datasets import data -counties = alt.topo_feature(data.us_10m.url, 'counties') +counties = alt.topo_feature(data.us_10m.url, "counties") source = data.unemployment.url -alt.Chart(counties).mark_geoshape().encode( - color='rate:Q' -).transform_lookup( - lookup='id', - from_=alt.LookupData(source, 'id', ['rate']) -).project( - type='albersUsa' -).properties( - width=500, - height=300 -) +alt.Chart(counties).mark_geoshape().encode(color="rate:Q").transform_lookup( + lookup="id", from_=alt.LookupData(source, "id", ["rate"]) +).project(type="albersUsa").properties(width=500, height=300) diff --git a/vegafusion-python/tests/altair_mocks/maps/choropleth_repeat/mock.py b/vegafusion-python/tests/altair_mocks/maps/choropleth_repeat/mock.py index 0712910d7..4d57ec6a3 100644 --- a/vegafusion-python/tests/altair_mocks/maps/choropleth_repeat/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/choropleth_repeat/mock.py @@ -3,22 +3,14 @@ import altair as alt from vega_datasets import data -states = alt.topo_feature(data.us_10m.url, 'states') +states = alt.topo_feature(data.us_10m.url, "states") source = data.population_engineers_hurricanes.url -variable_list = ['population', 'engineers', 'hurricanes'] +variable_list = ["population", "engineers", "hurricanes"] alt.Chart(states).mark_geoshape().encode( - alt.Color(alt.repeat('row'), type='quantitative') + alt.Color(alt.repeat("row"), type="quantitative") ).transform_lookup( - lookup='id', - from_=alt.LookupData(source, 'id', variable_list) -).properties( - width=500, - height=150 -).project( - type='albersUsa' -).repeat( + lookup="id", from_=alt.LookupData(source, "id", variable_list) +).properties(width=500, height=150).project(type="albersUsa").repeat( row=variable_list -).resolve_scale( - color='independent' -) +).resolve_scale(color="independent") diff --git a/vegafusion-python/tests/altair_mocks/maps/us_incomebrackets_by_state_facet/mock.py b/vegafusion-python/tests/altair_mocks/maps/us_incomebrackets_by_state_facet/mock.py index 88f0e471a..aae9eb930 100644 --- a/vegafusion-python/tests/altair_mocks/maps/us_incomebrackets_by_state_facet/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/us_incomebrackets_by_state_facet/mock.py @@ -4,21 +4,17 @@ import altair as alt from vega_datasets import data -states = alt.topo_feature(data.us_10m.url, 'states') +states = alt.topo_feature(data.us_10m.url, "states") source = data.income.url alt.Chart(source).mark_geoshape().encode( - shape='geo:G', - color='pct:Q', - tooltip=['name:N', 'pct:Q'], - facet=alt.Facet('group:N', columns=3), + shape="geo:G", + color="pct:Q", + tooltip=["name:N", "pct:Q"], + facet=alt.Facet("group:N", columns=3), ).transform_lookup( - lookup='id', - from_=alt.LookupData(data=states, key='id'), - as_='geo' + lookup="id", from_=alt.LookupData(data=states, key="id"), as_="geo" ).properties( width=150, height=80, -).project( - type='albersUsa' -) \ No newline at end of file +).project(type="albersUsa") diff --git a/vegafusion-python/tests/altair_mocks/maps/world/mock.py b/vegafusion-python/tests/altair_mocks/maps/world/mock.py index dd83054c0..062678b3a 100644 --- a/vegafusion-python/tests/altair_mocks/maps/world/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/world/mock.py @@ -8,13 +8,11 @@ graticule = alt.graticule() # Source of land data -source = alt.topo_feature(data.world_110m.url, 'countries') +source = alt.topo_feature(data.world_110m.url, "countries") # Layering and configuring the components alt.layer( - alt.Chart(sphere).mark_geoshape(fill='lightblue'), - alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5), - alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black') -).project( - 'naturalEarth1' -).properties(width=600, height=400).configure_view(stroke=None) + alt.Chart(sphere).mark_geoshape(fill="lightblue"), + alt.Chart(graticule).mark_geoshape(stroke="white", strokeWidth=0.5), + alt.Chart(source).mark_geoshape(fill="ForestGreen", stroke="black"), +).project("naturalEarth1").properties(width=600, height=400).configure_view(stroke=None) diff --git a/vegafusion-python/tests/altair_mocks/maps/world_projections/mock.py b/vegafusion-python/tests/altair_mocks/maps/world_projections/mock.py index d55546322..d5394e340 100644 --- a/vegafusion-python/tests/altair_mocks/maps/world_projections/mock.py +++ b/vegafusion-python/tests/altair_mocks/maps/world_projections/mock.py @@ -3,18 +3,15 @@ import altair as alt from vega_datasets import data -source = alt.topo_feature(data.world_110m.url, 'countries') +source = alt.topo_feature(data.world_110m.url, "countries") -base = alt.Chart(source).mark_geoshape( - fill='#666666', - stroke='white' -).properties( - width=300, - height=180 +base = ( + alt.Chart(source) + .mark_geoshape(fill="#666666", stroke="white") + .properties(width=300, height=180) ) -projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic'] -charts = [base.project(proj).properties(title=proj) - for proj in projections] +projections = ["equirectangular", "mercator", "orthographic", "gnomonic"] +charts = [base.project(proj).properties(title=proj) for proj in projections] alt.concat(*charts, columns=2) diff --git a/vegafusion-python/tests/altair_mocks/other/bar_chart_with_highlighted_segment/mock.py b/vegafusion-python/tests/altair_mocks/other/bar_chart_with_highlighted_segment/mock.py index 1fa582e0a..465e42fa9 100644 --- a/vegafusion-python/tests/altair_mocks/other/bar_chart_with_highlighted_segment/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/bar_chart_with_highlighted_segment/mock.py @@ -7,21 +7,23 @@ source = data.wheat() threshold = pd.DataFrame([{"threshold": 90}]) -bars = alt.Chart(source).mark_bar().encode( - x="year:O", - y="wheat:Q", +bars = ( + alt.Chart(source) + .mark_bar() + .encode( + x="year:O", + y="wheat:Q", + ) ) -highlight = alt.Chart(source).mark_bar(color="#e45755").encode( - x='year:O', - y='baseline:Q', - y2='wheat:Q' -).transform_filter( - alt.datum.wheat > 90 -).transform_calculate("baseline", "90") - -rule = alt.Chart(threshold).mark_rule().encode( - y='threshold:Q' +highlight = ( + alt.Chart(source) + .mark_bar(color="#e45755") + .encode(x="year:O", y="baseline:Q", y2="wheat:Q") + .transform_filter(alt.datum.wheat > 90) + .transform_calculate("baseline", "90") ) +rule = alt.Chart(threshold).mark_rule().encode(y="threshold:Q") + (bars + highlight + rule).properties(width=500) diff --git a/vegafusion-python/tests/altair_mocks/other/beckers_barley_wrapped_facet/mock.py b/vegafusion-python/tests/altair_mocks/other/beckers_barley_wrapped_facet/mock.py index 808eef13f..45d87948f 100644 --- a/vegafusion-python/tests/altair_mocks/other/beckers_barley_wrapped_facet/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/beckers_barley_wrapped_facet/mock.py @@ -6,10 +6,10 @@ source = data.barley.url alt.Chart(source).mark_point().encode( - alt.X('median(yield):Q', scale=alt.Scale(zero=False)), - y='variety:O', - color='year:N', - facet=alt.Facet('site:O', columns=2), + alt.X("median(yield):Q", scale=alt.Scale(zero=False)), + y="variety:O", + color="year:N", + facet=alt.Facet("site:O", columns=2), ).properties( width=200, height=100, diff --git a/vegafusion-python/tests/altair_mocks/other/binned_heatmap/mock.py b/vegafusion-python/tests/altair_mocks/other/binned_heatmap/mock.py index 93d86d2b7..2875679db 100644 --- a/vegafusion-python/tests/altair_mocks/other/binned_heatmap/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/binned_heatmap/mock.py @@ -6,7 +6,7 @@ source = data.movies.url alt.Chart(source).mark_rect().encode( - alt.X('IMDB_Rating:Q', bin=alt.Bin(maxbins=60)), - alt.Y('Rotten_Tomatoes_Rating:Q', bin=alt.Bin(maxbins=40)), - alt.Color('count():Q', scale=alt.Scale(scheme='greenblue')) + alt.X("IMDB_Rating:Q", bin=alt.Bin(maxbins=60)), + alt.Y("Rotten_Tomatoes_Rating:Q", bin=alt.Bin(maxbins=40)), + alt.Color("count():Q", scale=alt.Scale(scheme="greenblue")), ) diff --git a/vegafusion-python/tests/altair_mocks/other/boxplot/mock.py b/vegafusion-python/tests/altair_mocks/other/boxplot/mock.py index fd87cbcef..b485a48ea 100644 --- a/vegafusion-python/tests/altair_mocks/other/boxplot/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/boxplot/mock.py @@ -5,7 +5,4 @@ source = data.population.url -alt.Chart(source).mark_boxplot(extent='min-max').encode( - x='age:O', - y='people:Q' -) +alt.Chart(source).mark_boxplot(extent="min-max").encode(x="age:O", y="people:Q") diff --git a/vegafusion-python/tests/altair_mocks/other/candlestick_chart/mock.py b/vegafusion-python/tests/altair_mocks/other/candlestick_chart/mock.py index e59bfe139..2b218fd3f 100644 --- a/vegafusion-python/tests/altair_mocks/other/candlestick_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/candlestick_chart/mock.py @@ -5,33 +5,26 @@ source = data.ohlc() -open_close_color = alt.condition("datum.open <= datum.close", - alt.value("#06982d"), - alt.value("#ae1325")) +open_close_color = alt.condition( + "datum.open <= datum.close", alt.value("#06982d"), alt.value("#ae1325") +) base = alt.Chart(source).encode( - alt.X('date:T', - axis=alt.Axis( - format='%m/%d', - labelAngle=-45, - title='Date in 2009' - ) - ), - color=open_close_color + alt.X( + "date:T", axis=alt.Axis(format="%m/%d", labelAngle=-45, title="Date in 2009") + ), + color=open_close_color, ) rule = base.mark_rule().encode( alt.Y( - 'low:Q', - title='Price', + "low:Q", + title="Price", scale=alt.Scale(zero=False), ), - alt.Y2('high:Q') + alt.Y2("high:Q"), ) -bar = base.mark_bar().encode( - alt.Y('open:Q'), - alt.Y2('close:Q') -) +bar = base.mark_bar().encode(alt.Y("open:Q"), alt.Y2("close:Q")) rule + bar diff --git a/vegafusion-python/tests/altair_mocks/other/comet_chart/mock.py b/vegafusion-python/tests/altair_mocks/other/comet_chart/mock.py index e764cf306..a2cb0abd2 100644 --- a/vegafusion-python/tests/altair_mocks/other/comet_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/comet_chart/mock.py @@ -5,20 +5,27 @@ ( alt.Chart(vega_datasets.data.barley.url) - .transform_pivot("year", value="yield", groupby=["variety", "site"]) - .transform_fold(["1931", "1932"], as_=["year", "yield"]) - .transform_calculate(calculate="datum['1932'] - datum['1931']", as_="delta") - .mark_trail() - .encode( - x=alt.X('year:O', title=None), - y=alt.Y('variety:N', title='Variety'), - size=alt.Size('yield:Q', scale=alt.Scale(range=[0, 12]), legend=alt.Legend(values=[20, 60], title='Barley Yield (bushels/acre)')), - color=alt.Color('delta:Q', scale=alt.Scale(domainMid=0), legend=alt.Legend(title='Yield Delta (%)')), - tooltip=alt.Tooltip(['year:O', 'yield:Q']), - column=alt.Column('site:N', title='Site') - + .transform_pivot("year", value="yield", groupby=["variety", "site"]) + .transform_fold(["1931", "1932"], as_=["year", "yield"]) + .transform_calculate(calculate="datum['1932'] - datum['1931']", as_="delta") + .mark_trail() + .encode( + x=alt.X("year:O", title=None), + y=alt.Y("variety:N", title="Variety"), + size=alt.Size( + "yield:Q", + scale=alt.Scale(range=[0, 12]), + legend=alt.Legend(values=[20, 60], title="Barley Yield (bushels/acre)"), + ), + color=alt.Color( + "delta:Q", + scale=alt.Scale(domainMid=0), + legend=alt.Legend(title="Yield Delta (%)"), + ), + tooltip=alt.Tooltip(["year:O", "yield:Q"]), + column=alt.Column("site:N", title="Site"), ) - .configure_view(stroke=None) - .configure_legend(orient='bottom', direction='horizontal') - .properties(title='Barley Yield comparison between 1932 and 1931') + .configure_view(stroke=None) + .configure_legend(orient="bottom", direction="horizontal") + .properties(title="Barley Yield comparison between 1932 and 1931") ) diff --git a/vegafusion-python/tests/altair_mocks/other/errorbars_with_ci/mock.py b/vegafusion-python/tests/altair_mocks/other/errorbars_with_ci/mock.py index 39fbea73d..58b266d0b 100644 --- a/vegafusion-python/tests/altair_mocks/other/errorbars_with_ci/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/errorbars_with_ci/mock.py @@ -6,14 +6,19 @@ source = data.barley() -error_bars = alt.Chart(source).mark_errorbar(extent='ci').encode( - x=alt.X('yield:Q', scale=alt.Scale(zero=False)), - y=alt.Y('variety:N') +error_bars = ( + alt.Chart(source) + .mark_errorbar(extent="ci") + .encode(x=alt.X("yield:Q", scale=alt.Scale(zero=False)), y=alt.Y("variety:N")) ) -points = alt.Chart(source).mark_point(filled=True, color='black').encode( - x=alt.X('yield:Q', aggregate='mean'), - y=alt.Y('variety:N'), +points = ( + alt.Chart(source) + .mark_point(filled=True, color="black") + .encode( + x=alt.X("yield:Q", aggregate="mean"), + y=alt.Y("variety:N"), + ) ) (error_bars + points).properties( diff --git a/vegafusion-python/tests/altair_mocks/other/errorbars_with_std/mock.py b/vegafusion-python/tests/altair_mocks/other/errorbars_with_std/mock.py index 0a5e21f87..853c5a878 100644 --- a/vegafusion-python/tests/altair_mocks/other/errorbars_with_std/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/errorbars_with_std/mock.py @@ -5,14 +5,19 @@ source = data.barley() -error_bars = alt.Chart(source).mark_errorbar(extent='stdev').encode( - x=alt.X('yield:Q', scale=alt.Scale(zero=False)), - y=alt.Y('variety:N') +error_bars = ( + alt.Chart(source) + .mark_errorbar(extent="stdev") + .encode(x=alt.X("yield:Q", scale=alt.Scale(zero=False)), y=alt.Y("variety:N")) ) -points = alt.Chart(source).mark_point(filled=True, color='black').encode( - x=alt.X('yield:Q', aggregate='mean'), - y=alt.Y('variety:N'), +points = ( + alt.Chart(source) + .mark_point(filled=True, color="black") + .encode( + x=alt.X("yield:Q", aggregate="mean"), + y=alt.Y("variety:N"), + ) ) error_bars + points diff --git a/vegafusion-python/tests/altair_mocks/other/gantt_chart/mock.py b/vegafusion-python/tests/altair_mocks/other/gantt_chart/mock.py index da0f7f44c..cca1fb278 100644 --- a/vegafusion-python/tests/altair_mocks/other/gantt_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/gantt_chart/mock.py @@ -3,14 +3,12 @@ import altair as alt import pandas as pd -source = pd.DataFrame([ - {"task": "A", "start": 1, "end": 3}, - {"task": "B", "start": 3, "end": 8}, - {"task": "C", "start": 8, "end": 10} -]) - -alt.Chart(source).mark_bar().encode( - x='start', - x2='end', - y='task' +source = pd.DataFrame( + [ + {"task": "A", "start": 1, "end": 3}, + {"task": "B", "start": 3, "end": 8}, + {"task": "C", "start": 8, "end": 10}, + ] ) + +alt.Chart(source).mark_bar().encode(x="start", x2="end", y="task") diff --git a/vegafusion-python/tests/altair_mocks/other/hexbins/mock.py b/vegafusion-python/tests/altair_mocks/other/hexbins/mock.py index 858670c82..f528a92af 100644 --- a/vegafusion-python/tests/altair_mocks/other/hexbins/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/hexbins/mock.py @@ -12,29 +12,31 @@ # Count of distinct y features yFeaturesCount = 7 # Name of the x field -xField = 'date' +xField = "date" # Name of the y field -yField = 'date' +yField = "date" # the shape of a hexagon hexagon = "M0,-2.3094010768L2,-1.1547005384 2,1.1547005384 0,2.3094010768 -2,1.1547005384 -2,-1.1547005384Z" alt.Chart(source).mark_point(size=size**2, shape=hexagon).encode( - x=alt.X('xFeaturePos:Q', axis=alt.Axis(title='Month', - grid=False, tickOpacity=0, domainOpacity=0)), - y=alt.Y('day(' + yField + '):O', axis=alt.Axis(title='Weekday', - labelPadding=20, tickOpacity=0, domainOpacity=0)), - stroke=alt.value('black'), + x=alt.X( + "xFeaturePos:Q", + axis=alt.Axis(title="Month", grid=False, tickOpacity=0, domainOpacity=0), + ), + y=alt.Y( + "day(" + yField + "):O", + axis=alt.Axis(title="Weekday", labelPadding=20, tickOpacity=0, domainOpacity=0), + ), + stroke=alt.value("black"), strokeWidth=alt.value(1), - fill=alt.Color('mean(temp_max):Q', scale=alt.Scale(scheme='darkblue')), - tooltip=['month(' + xField + '):O', 'day(' + yField + '):O', 'mean(temp_max):Q'] + fill=alt.Color("mean(temp_max):Q", scale=alt.Scale(scheme="darkblue")), + tooltip=["month(" + xField + "):O", "day(" + yField + "):O", "mean(temp_max):Q"], ).transform_calculate( # This field is required for the hexagonal X-Offset - xFeaturePos='(day(datum.' + yField + ') % 2) / 2 + month(datum.' + xField + ')' + xFeaturePos="(day(datum." + yField + ") % 2) / 2 + month(datum." + xField + ")" ).properties( # Exact scaling factors to make the hexbins fit width=size * xFeaturesCount * 2, height=size * yFeaturesCount * 1.7320508076, # 1.7320508076 is approx. sin(60°)*2 -).configure_view( - strokeWidth=0 -) +).configure_view(strokeWidth=0) diff --git a/vegafusion-python/tests/altair_mocks/other/isotype_grid/mock.py b/vegafusion-python/tests/altair_mocks/other/isotype_grid/mock.py index 398b54146..36dff9314 100644 --- a/vegafusion-python/tests/altair_mocks/other/isotype_grid/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/isotype_grid/mock.py @@ -15,20 +15,10 @@ "-0.6 -0.4 -0.6z" ) -alt.Chart(data).transform_calculate( - row="ceil(datum.id/10)" -).transform_calculate( +alt.Chart(data).transform_calculate(row="ceil(datum.id/10)").transform_calculate( col="datum.id - datum.row*10" -).mark_point( - filled=True, - size=50 -).encode( +).mark_point(filled=True, size=50).encode( x=alt.X("col:O", axis=None), y=alt.Y("row:O", axis=None), - shape=alt.ShapeValue(person) -).properties( - width=400, - height=400 -).configure_view( - strokeWidth=0 -) + shape=alt.ShapeValue(person), +).properties(width=400, height=400).configure_view(strokeWidth=0) diff --git a/vegafusion-python/tests/altair_mocks/other/layered_chart_with_dual_axis/mock.py b/vegafusion-python/tests/altair_mocks/other/layered_chart_with_dual_axis/mock.py index f11f2040d..415808618 100644 --- a/vegafusion-python/tests/altair_mocks/other/layered_chart_with_dual_axis/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/layered_chart_with_dual_axis/mock.py @@ -5,21 +5,21 @@ source = data.seattle_weather() -base = alt.Chart(source).encode( - alt.X('month(date):T', axis=alt.Axis(title=None)) -) +base = alt.Chart(source).encode(alt.X("month(date):T", axis=alt.Axis(title=None))) -area = base.mark_area(opacity=0.3, color='#57A44C').encode( - alt.Y('average(temp_max)', - axis=alt.Axis(title='Avg. Temperature (°C)', titleColor='#57A44C')), - alt.Y2('average(temp_min)') +area = base.mark_area(opacity=0.3, color="#57A44C").encode( + alt.Y( + "average(temp_max)", + axis=alt.Axis(title="Avg. Temperature (°C)", titleColor="#57A44C"), + ), + alt.Y2("average(temp_min)"), ) -line = base.mark_line(stroke='#5276A7', interpolate='monotone').encode( - alt.Y('average(precipitation)', - axis=alt.Axis(title='Precipitation (inches)', titleColor='#5276A7')) +line = base.mark_line(stroke="#5276A7", interpolate="monotone").encode( + alt.Y( + "average(precipitation)", + axis=alt.Axis(title="Precipitation (inches)", titleColor="#5276A7"), + ) ) -alt.layer(area, line).resolve_scale( - y = 'independent' -) \ No newline at end of file +alt.layer(area, line).resolve_scale(y="independent") diff --git a/vegafusion-python/tests/altair_mocks/other/layered_heatmap_text/mock.py b/vegafusion-python/tests/altair_mocks/other/layered_heatmap_text/mock.py index 746e3eb0b..4b7a9668d 100644 --- a/vegafusion-python/tests/altair_mocks/other/layered_heatmap_text/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/layered_heatmap_text/mock.py @@ -6,30 +6,30 @@ source = data.cars() # Configure common options -base = alt.Chart(source).transform_aggregate( - num_cars='count()', - groupby=['Origin', 'Cylinders'] -).encode( - alt.X('Cylinders:O', scale=alt.Scale(paddingInner=0)), - alt.Y('Origin:O', scale=alt.Scale(paddingInner=0)), +base = ( + alt.Chart(source) + .transform_aggregate(num_cars="count()", groupby=["Origin", "Cylinders"]) + .encode( + alt.X("Cylinders:O", scale=alt.Scale(paddingInner=0)), + alt.Y("Origin:O", scale=alt.Scale(paddingInner=0)), + ) ) # Configure heatmap heatmap = base.mark_rect().encode( - color=alt.Color('num_cars:Q', - scale=alt.Scale(scheme='viridis'), - legend=alt.Legend(direction='horizontal') - ) + color=alt.Color( + "num_cars:Q", + scale=alt.Scale(scheme="viridis"), + legend=alt.Legend(direction="horizontal"), + ) ) # Configure text -text = base.mark_text(baseline='middle').encode( - text='num_cars:Q', +text = base.mark_text(baseline="middle").encode( + text="num_cars:Q", color=alt.condition( - alt.datum.num_cars > 100, - alt.value('black'), - alt.value('white') - ) + alt.datum.num_cars > 100, alt.value("black"), alt.value("white") + ), ) # Draw the chart diff --git a/vegafusion-python/tests/altair_mocks/other/multiple_marks/mock.py b/vegafusion-python/tests/altair_mocks/other/multiple_marks/mock.py index 71cbc4450..55311b44a 100644 --- a/vegafusion-python/tests/altair_mocks/other/multiple_marks/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/multiple_marks/mock.py @@ -6,7 +6,5 @@ source = data.stocks() alt.Chart(source).mark_line(point=True).encode( - x='date:T', - y='price:Q', - color='symbol:N' + x="date:T", y="price:Q", color="symbol:N" ) diff --git a/vegafusion-python/tests/altair_mocks/other/normed_parallel_coordinates/mock.py b/vegafusion-python/tests/altair_mocks/other/normed_parallel_coordinates/mock.py index 2b984d091..f1d86fbce 100644 --- a/vegafusion-python/tests/altair_mocks/other/normed_parallel_coordinates/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/normed_parallel_coordinates/mock.py @@ -1,26 +1,22 @@ # https://altair-viz.github.io/gallery/normed_parallel_coordinates.html import altair as alt -from vega_datasets import data from altair import datum +from vega_datasets import data source = data.iris() -alt.Chart(source).transform_window( - index='count()' -).transform_fold( - ['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth'] +alt.Chart(source).transform_window(index="count()").transform_fold( + ["petalLength", "petalWidth", "sepalLength", "sepalWidth"] ).transform_joinaggregate( - min='min(value)', - max='max(value)', - groupby=['key'] + min="min(value)", max="max(value)", groupby=["key"] ).transform_calculate( - minmax_value=(datum.value-datum.min)/(datum.max-datum.min), - mid=(datum.min+datum.max)/2 + minmax_value=(datum.value - datum.min) / (datum.max - datum.min), + mid=(datum.min + datum.max) / 2, ).mark_line().encode( - x='key:N', - y='minmax_value:Q', - color='species:N', - detail='index:N', - opacity=alt.value(0.5) + x="key:N", + y="minmax_value:Q", + color="species:N", + detail="index:N", + opacity=alt.value(0.5), ).properties(width=500) diff --git a/vegafusion-python/tests/altair_mocks/other/parallel_coordinates/mock.py b/vegafusion-python/tests/altair_mocks/other/parallel_coordinates/mock.py index 6210ee669..9b4f82e04 100644 --- a/vegafusion-python/tests/altair_mocks/other/parallel_coordinates/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/parallel_coordinates/mock.py @@ -5,14 +5,8 @@ source = data.iris() -alt.Chart(source).transform_window( - index='count()' -).transform_fold( - ['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth'] +alt.Chart(source).transform_window(index="count()").transform_fold( + ["petalLength", "petalWidth", "sepalLength", "sepalWidth"] ).mark_line().encode( - x='key:N', - y='value:Q', - color='species:N', - detail='index:N', - opacity=alt.value(0.5) + x="key:N", y="value:Q", color="species:N", detail="index:N", opacity=alt.value(0.5) ).properties(width=500) diff --git a/vegafusion-python/tests/altair_mocks/other/ranged_dot_plot/mock.py b/vegafusion-python/tests/altair_mocks/other/ranged_dot_plot/mock.py index 00d930a9c..abe477cc2 100644 --- a/vegafusion-python/tests/altair_mocks/other/ranged_dot_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/ranged_dot_plot/mock.py @@ -5,35 +5,34 @@ source = data.countries.url -chart = alt.layer( - data=source -).transform_filter( - filter={"field": 'country', - "oneOf": ["China", "India", "United States", "Indonesia", "Brazil"]} -).transform_filter( - filter={'field': 'year', - "oneOf": [1955, 2000]} +chart = ( + alt.layer(data=source) + .transform_filter( + filter={ + "field": "country", + "oneOf": ["China", "India", "United States", "Indonesia", "Brazil"], + } + ) + .transform_filter(filter={"field": "year", "oneOf": [1955, 2000]}) ) -chart += alt.Chart().mark_line(color='#db646f').encode( - x='life_expect:Q', - y='country:N', - detail='country:N' +chart += ( + alt.Chart() + .mark_line(color="#db646f") + .encode(x="life_expect:Q", y="country:N", detail="country:N") ) # Add points for life expectancy in 1955 & 2000 -chart += alt.Chart().mark_point( - size=100, - opacity=1, - filled=True -).encode( - x='life_expect:Q', - y='country:N', - color=alt.Color('year:O', - scale=alt.Scale( - domain=[1955, 2000], - range=['#e6959c', '#911a24'] - ) - ) -).interactive() +chart += ( + alt.Chart() + .mark_point(size=100, opacity=1, filled=True) + .encode( + x="life_expect:Q", + y="country:N", + color=alt.Color( + "year:O", scale=alt.Scale(domain=[1955, 2000], range=["#e6959c", "#911a24"]) + ), + ) + .interactive() +) chart diff --git a/vegafusion-python/tests/altair_mocks/other/ridgeline_plot/mock.py b/vegafusion-python/tests/altair_mocks/other/ridgeline_plot/mock.py index 2a164c85a..4040d682c 100644 --- a/vegafusion-python/tests/altair_mocks/other/ridgeline_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/ridgeline_plot/mock.py @@ -1,7 +1,6 @@ # https://altair-viz.github.io/gallery/ridgeline_plot.html import altair as alt -from vega_datasets import data source = "https://raw.githubusercontent.com/vega/vega-datasets/v1.31.1/data/seattle-weather.csv" @@ -9,45 +8,29 @@ overlap = 1 alt.Chart(source, height=step).transform_timeunit( - Month='month(date)' -).transform_joinaggregate( - mean_temp='mean(temp_max)', groupby=['Month'] -).transform_bin( - ['bin_max', 'bin_min'], 'temp_max' + Month="month(date)" +).transform_joinaggregate(mean_temp="mean(temp_max)", groupby=["Month"]).transform_bin( + ["bin_max", "bin_min"], "temp_max" ).transform_aggregate( - value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max'] + value="count()", groupby=["Month", "mean_temp", "bin_min", "bin_max"] ).transform_impute( - impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0 + impute="value", groupby=["Month", "mean_temp"], key="bin_min", value=0 ).mark_area( - interpolate='monotone', - fillOpacity=0.8, - stroke='lightgray', - strokeWidth=0.5 + interpolate="monotone", fillOpacity=0.8, stroke="lightgray", strokeWidth=0.5 ).encode( - alt.X('bin_min:Q', bin='binned', title='Maximum Daily Temperature (C)'), - alt.Y( - 'value:Q', - scale=alt.Scale(range=[step, -step * overlap]), - axis=None - ), + alt.X("bin_min:Q", bin="binned", title="Maximum Daily Temperature (C)"), + alt.Y("value:Q", scale=alt.Scale(range=[step, -step * overlap]), axis=None), alt.Fill( - 'mean_temp:Q', + "mean_temp:Q", legend=None, - scale=alt.Scale(domain=[30, 5], scheme='redyellowblue') - ) + scale=alt.Scale(domain=[30, 5], scheme="redyellowblue"), + ), ).facet( row=alt.Row( - 'Month:T', + "Month:T", title=None, - header=alt.Header(labelAngle=0, labelAlign='right', format='%B') + header=alt.Header(labelAngle=0, labelAlign="right", format="%B"), ) -).properties( - title='Seattle Weather', - bounds='flush' -).configure_facet( +).properties(title="Seattle Weather", bounds="flush").configure_facet( spacing=0 -).configure_view( - stroke=None -).configure_title( - anchor='end' -) +).configure_view(stroke=None).configure_title(anchor="end") diff --git a/vegafusion-python/tests/altair_mocks/other/scatter_marginal_hist/mock.py b/vegafusion-python/tests/altair_mocks/other/scatter_marginal_hist/mock.py index f4621d087..4522bdafc 100644 --- a/vegafusion-python/tests/altair_mocks/other/scatter_marginal_hist/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/scatter_marginal_hist/mock.py @@ -10,35 +10,45 @@ xscale = alt.Scale(domain=(4.0, 8.0)) yscale = alt.Scale(domain=(1.9, 4.55)) -bar_args = {'opacity': .3, 'binSpacing': 0} +bar_args = {"opacity": 0.3, "binSpacing": 0} points = base.mark_circle().encode( - alt.X('sepalLength', scale=xscale), - alt.Y('sepalWidth', scale=yscale), - color='species', + alt.X("sepalLength", scale=xscale), + alt.Y("sepalWidth", scale=yscale), + color="species", ) -top_hist = base.mark_bar(**bar_args).encode( - alt.X('sepalLength:Q', - # when using bins, the axis scale is set through - # the bin extent, so we do not specify the scale here - # (which would be ignored anyway) - bin=alt.Bin(maxbins=20, extent=xscale.domain), - stack=None, - title='' - ), - alt.Y('count()', stack=None, title=''), - alt.Color('species:N'), -).properties(height=60) - -right_hist = base.mark_bar(**bar_args).encode( - alt.Y('sepalWidth:Q', - bin=alt.Bin(maxbins=20, extent=yscale.domain), - stack=None, - title='', - ), - alt.X('count()', stack=None, title=''), - alt.Color('species:N'), -).properties(width=60) +top_hist = ( + base.mark_bar(**bar_args) + .encode( + alt.X( + "sepalLength:Q", + # when using bins, the axis scale is set through + # the bin extent, so we do not specify the scale here + # (which would be ignored anyway) + bin=alt.Bin(maxbins=20, extent=xscale.domain), + stack=None, + title="", + ), + alt.Y("count()", stack=None, title=""), + alt.Color("species:N"), + ) + .properties(height=60) +) + +right_hist = ( + base.mark_bar(**bar_args) + .encode( + alt.Y( + "sepalWidth:Q", + bin=alt.Bin(maxbins=20, extent=yscale.domain), + stack=None, + title="", + ), + alt.X("count()", stack=None, title=""), + alt.Color("species:N"), + ) + .properties(width=60) +) top_hist & (points | right_hist) diff --git a/vegafusion-python/tests/altair_mocks/other/sorted_error_bars_with_ci/mock.py b/vegafusion-python/tests/altair_mocks/other/sorted_error_bars_with_ci/mock.py index 6b987a5ff..f9a50a247 100644 --- a/vegafusion-python/tests/altair_mocks/other/sorted_error_bars_with_ci/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/sorted_error_bars_with_ci/mock.py @@ -6,27 +6,22 @@ source = data.barley() -points = alt.Chart(source).mark_point( - filled=True, - color='black' -).encode( - x=alt.X('mean(yield)', title='Barley Yield'), - y=alt.Y( - 'variety', - sort=alt.EncodingSortField( - field='yield', - op='mean', - order='descending' - ) +points = ( + alt.Chart(source) + .mark_point(filled=True, color="black") + .encode( + x=alt.X("mean(yield)", title="Barley Yield"), + y=alt.Y( + "variety", + sort=alt.EncodingSortField(field="yield", op="mean", order="descending"), + ), ) -).properties( - width=400, - height=250 + .properties(width=400, height=250) ) error_bars = points.mark_rule().encode( - x='ci0(yield)', - x2='ci1(yield)', + x="ci0(yield)", + x2="ci1(yield)", ) (points + error_bars).properties( diff --git a/vegafusion-python/tests/altair_mocks/other/stem_and_leaf/mock.py b/vegafusion-python/tests/altair_mocks/other/stem_and_leaf/mock.py index fe954f4f7..ecd141281 100644 --- a/vegafusion-python/tests/altair_mocks/other/stem_and_leaf/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/stem_and_leaf/mock.py @@ -1,35 +1,28 @@ # https://altair-viz.github.io/gallery/stem_and_leaf.html import altair as alt -import pandas as pd import numpy as np +import pandas as pd + np.random.seed(42) # Generating random data -source = pd.DataFrame({'samples': np.random.normal(50, 15, 100).astype(int).astype(str)}) +source = pd.DataFrame( + {"samples": np.random.normal(50, 15, 100).astype(int).astype(str)} +) # Splitting stem and leaf -source['stem'] = source['samples'].str[:-1] -source['leaf'] = source['samples'].str[-1] +source["stem"] = source["samples"].str[:-1] +source["leaf"] = source["samples"].str[-1] -source = source.sort_values(by=['stem', 'leaf']) +source = source.sort_values(by=["stem", "leaf"]) # Determining leaf position -source['position'] = source.groupby('stem').cumcount().add(1) +source["position"] = source.groupby("stem").cumcount().add(1) # Creating stem and leaf plot -alt.Chart(source).mark_text( - align='left', - baseline='middle', - dx=-5 -).encode( - alt.X('position:Q', title='', - axis=alt.Axis(ticks=False, labels=False, grid=False) - ), - alt.Y('stem:N', title='', axis=alt.Axis(tickSize=0)), - text='leaf:N', -).configure_axis( - labelFontSize=20 -).configure_text( - fontSize=20 -) +alt.Chart(source).mark_text(align="left", baseline="middle", dx=-5).encode( + alt.X("position:Q", title="", axis=alt.Axis(ticks=False, labels=False, grid=False)), + alt.Y("stem:N", title="", axis=alt.Axis(tickSize=0)), + text="leaf:N", +).configure_axis(labelFontSize=20).configure_text(fontSize=20) diff --git a/vegafusion-python/tests/altair_mocks/other/violin_plot/mock.py b/vegafusion-python/tests/altair_mocks/other/violin_plot/mock.py index 4cdc79864..6ea02a44f 100644 --- a/vegafusion-python/tests/altair_mocks/other/violin_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/violin_plot/mock.py @@ -4,32 +4,26 @@ from vega_datasets import data alt.Chart(data.cars()).transform_density( - 'Miles_per_Gallon', - as_=['Miles_per_Gallon', 'density'], + "Miles_per_Gallon", + as_=["Miles_per_Gallon", "density"], extent=[5, 50], - groupby=['Origin'] -).mark_area(orient='horizontal').encode( - y='Miles_per_Gallon:Q', - color='Origin:N', + groupby=["Origin"], +).mark_area(orient="horizontal").encode( + y="Miles_per_Gallon:Q", + color="Origin:N", x=alt.X( - 'density:Q', - stack='center', + "density:Q", + stack="center", impute=None, title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True), ), column=alt.Column( - 'Origin:N', + "Origin:N", header=alt.Header( - titleOrient='bottom', - labelOrient='bottom', + titleOrient="bottom", + labelOrient="bottom", labelPadding=0, ), - ) -).properties( - width=100 -).configure_facet( - spacing=0 -).configure_view( - stroke=None -) + ), +).properties(width=100).configure_facet(spacing=0).configure_view(stroke=None) diff --git a/vegafusion-python/tests/altair_mocks/other/wilkinson_dot_plot/mock.py b/vegafusion-python/tests/altair_mocks/other/wilkinson_dot_plot/mock.py index 5469282b9..29e3d7fc0 100644 --- a/vegafusion-python/tests/altair_mocks/other/wilkinson_dot_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/other/wilkinson_dot_plot/mock.py @@ -4,19 +4,11 @@ import pandas as pd source = pd.DataFrame( - {"data":[1,1,1,1,1,1,1,1,1,1, - 2,2,2, - 3,3, - 4,4,4,4,4,4] - } + {"data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4]} ) alt.Chart(source).mark_circle(opacity=1).transform_window( - id='rank()', - groupby=['data'] -).encode( - alt.X('data:O'), - alt.Y('id:O', - axis=None, - sort='descending') -).properties(height=100) + id="rank()", groupby=["data"] +).encode(alt.X("data:O"), alt.Y("id:O", axis=None, sort="descending")).properties( + height=100 +) diff --git a/vegafusion-python/tests/altair_mocks/scatter/binned/mock.py b/vegafusion-python/tests/altair_mocks/scatter/binned/mock.py index 7a0086b35..3c2ddcd98 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/binned/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/binned/mock.py @@ -6,7 +6,7 @@ source = data.movies.url alt.Chart(source).mark_circle().encode( - alt.X('IMDB_Rating:Q', bin=True), - alt.Y('Rotten_Tomatoes_Rating:Q', bin=True), - size='count()' + alt.X("IMDB_Rating:Q", bin=True), + alt.Y("Rotten_Tomatoes_Rating:Q", bin=True), + size="count()", ) diff --git a/vegafusion-python/tests/altair_mocks/scatter/bubble_plot/mock.py b/vegafusion-python/tests/altair_mocks/scatter/bubble_plot/mock.py index 0317a568f..24acea8fa 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/bubble_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/bubble_plot/mock.py @@ -6,7 +6,5 @@ source = data.cars() alt.Chart(source).mark_point().encode( - x='Horsepower', - y='Miles_per_Gallon', - size='Acceleration' + x="Horsepower", y="Miles_per_Gallon", size="Acceleration" ) diff --git a/vegafusion-python/tests/altair_mocks/scatter/connected/mock.py b/vegafusion-python/tests/altair_mocks/scatter/connected/mock.py index 8c92561b6..1aa0e0e22 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/connected/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/connected/mock.py @@ -6,7 +6,7 @@ source = data.driving() alt.Chart(source).mark_line(point=True).encode( - alt.X('miles', scale=alt.Scale(zero=False)), - alt.Y('gas', scale=alt.Scale(zero=False)), - order='year' + alt.X("miles", scale=alt.Scale(zero=False)), + alt.Y("gas", scale=alt.Scale(zero=False)), + order="year", ) diff --git a/vegafusion-python/tests/altair_mocks/scatter/dot_dash_plot/mock.py b/vegafusion-python/tests/altair_mocks/scatter/dot_dash_plot/mock.py index b1cc02a32..733f974a2 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/dot_dash_plot/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/dot_dash_plot/mock.py @@ -9,25 +9,25 @@ # Configure the points points = base.mark_point().encode( - x=alt.X('Miles_per_Gallon', title=''), - y=alt.Y('Horsepower', title=''), - color=alt.condition(brush, 'Origin', alt.value('grey')) + x=alt.X("Miles_per_Gallon", title=""), + y=alt.Y("Horsepower", title=""), + color=alt.condition(brush, "Origin", alt.value("grey")), ) # Configure the ticks tick_axis = alt.Axis(labels=False, domain=False, ticks=False) x_ticks = base.mark_tick().encode( - alt.X('Miles_per_Gallon', axis=tick_axis), - alt.Y('Origin', title='', axis=tick_axis), - color=alt.condition(brush, 'Origin', alt.value('lightgrey')) + alt.X("Miles_per_Gallon", axis=tick_axis), + alt.Y("Origin", title="", axis=tick_axis), + color=alt.condition(brush, "Origin", alt.value("lightgrey")), ) y_ticks = base.mark_tick().encode( - alt.X('Origin', title='', axis=tick_axis), - alt.Y('Horsepower', axis=tick_axis), - color=alt.condition(brush, 'Origin', alt.value('lightgrey')) + alt.X("Origin", title="", axis=tick_axis), + alt.Y("Horsepower", axis=tick_axis), + color=alt.condition(brush, "Origin", alt.value("lightgrey")), ) # Build the chart -y_ticks | (points & x_ticks) \ No newline at end of file +y_ticks | (points & x_ticks) diff --git a/vegafusion-python/tests/altair_mocks/scatter/matrix/mock.py b/vegafusion-python/tests/altair_mocks/scatter/matrix/mock.py index 353f686d0..0ab734eee 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/matrix/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/matrix/mock.py @@ -6,13 +6,10 @@ source = data.cars() alt.Chart(source).mark_circle().encode( - alt.X(alt.repeat("column"), type='quantitative'), - alt.Y(alt.repeat("row"), type='quantitative'), - color='Origin:N' -).properties( - width=120, - height=120 -).repeat( - row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'], - column=['Miles_per_Gallon', 'Acceleration', 'Horsepower'] -).interactive() \ No newline at end of file + alt.X(alt.repeat("column"), type="quantitative"), + alt.Y(alt.repeat("row"), type="quantitative"), + color="Origin:N", +).properties(width=120, height=120).repeat( + row=["Horsepower", "Acceleration", "Miles_per_Gallon"], + column=["Miles_per_Gallon", "Acceleration", "Horsepower"], +).interactive() diff --git a/vegafusion-python/tests/altair_mocks/scatter/multifeature/mock.py b/vegafusion-python/tests/altair_mocks/scatter/multifeature/mock.py index 71ecf9743..01d5daad9 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/multifeature/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/multifeature/mock.py @@ -6,8 +6,8 @@ source = data.iris() alt.Chart(source).mark_circle().encode( - alt.X('sepalLength', scale=alt.Scale(zero=False)), - alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)), - color='species', - size='petalWidth' + alt.X("sepalLength", scale=alt.Scale(zero=False)), + alt.Y("sepalWidth", scale=alt.Scale(zero=False, padding=1)), + color="species", + size="petalWidth", ) diff --git a/vegafusion-python/tests/altair_mocks/scatter/poly_fit_regression/mock.py b/vegafusion-python/tests/altair_mocks/scatter/poly_fit_regression/mock.py index 0cbface4a..15070da6b 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/poly_fit_regression/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/poly_fit_regression/mock.py @@ -1,8 +1,8 @@ # https://altair-viz.github.io/gallery/poly_fit_regression.html +import altair as alt import numpy as np import pandas as pd -import altair as alt # Generate some random data np.random.seed(42) @@ -14,17 +14,15 @@ # Define the degree of the polynomial fits degree_list = [1, 3, 5] -base = alt.Chart(source).mark_circle(color="black").encode( - alt.X("x"), alt.Y("y") -) +base = alt.Chart(source).mark_circle(color="black").encode(alt.X("x"), alt.Y("y")) polynomial_fit = [ base.transform_regression( "x", "y", method="poly", order=order, as_=["x", str(order)] ) - .mark_line() - .transform_fold([str(order)], as_=["degree", "y"]) - .encode(alt.Color("degree:N")) + .mark_line() + .transform_fold([str(order)], as_=["degree", "y"]) + .encode(alt.Color("degree:N")) for order in degree_list ] diff --git a/vegafusion-python/tests/altair_mocks/scatter/qq/mock.py b/vegafusion-python/tests/altair_mocks/scatter/qq/mock.py index 539143d95..5b3b4dc40 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/qq/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/qq/mock.py @@ -5,15 +5,15 @@ source = data.normal_2d.url -base = alt.Chart(source).transform_quantile( - 'u', - step=0.01, - as_ = ['p', 'v'] -).transform_calculate( - uniform = 'quantileUniform(datum.p)', - normal = 'quantileNormal(datum.p)' -).mark_point().encode( - alt.Y('v:Q') -).properties(width=300) +base = ( + alt.Chart(source) + .transform_quantile("u", step=0.01, as_=["p", "v"]) + .transform_calculate( + uniform="quantileUniform(datum.p)", normal="quantileNormal(datum.p)" + ) + .mark_point() + .encode(alt.Y("v:Q")) + .properties(width=300) +) -base.encode(x='uniform:Q') | base.encode(x='normal:Q') +base.encode(x="uniform:Q") | base.encode(x="normal:Q") diff --git a/vegafusion-python/tests/altair_mocks/scatter/stripplot/mock.py b/vegafusion-python/tests/altair_mocks/scatter/stripplot/mock.py index 199bcddcb..c6fad3b9e 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/stripplot/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/stripplot/mock.py @@ -6,32 +6,35 @@ source = data.movies.url -stripplot = alt.Chart(source, width=40, padding=60, bounds="flush").mark_circle(size=8).encode( - x=alt.X( - 'jitter:Q', - title=None, - axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), - scale=alt.Scale(), - ), - y=alt.Y('IMDB_Rating:Q'), - color=alt.Color('Major_Genre:N', legend=None), - column=alt.Column( - 'Major_Genre:N', - header=alt.Header( - labelAngle=-90, - titleOrient='top', - labelOrient='bottom', - labelAlign='right', - labelPadding=3, +stripplot = ( + alt.Chart(source, width=40, padding=60, bounds="flush") + .mark_circle(size=8) + .encode( + x=alt.X( + "jitter:Q", + title=None, + axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False), + scale=alt.Scale(), ), - ), -).transform_calculate( - # Generate Gaussian jitter with a Box-Muller transform - jitter='sqrt(-2*log(random()))*cos(2*PI*random())' -).configure_facet( - spacing=0 -).configure_view( - stroke=None + y=alt.Y("IMDB_Rating:Q"), + color=alt.Color("Major_Genre:N", legend=None), + column=alt.Column( + "Major_Genre:N", + header=alt.Header( + labelAngle=-90, + titleOrient="top", + labelOrient="bottom", + labelAlign="right", + labelPadding=3, + ), + ), + ) + .transform_calculate( + # Generate Gaussian jitter with a Box-Muller transform + jitter="sqrt(-2*log(random()))*cos(2*PI*random())" + ) + .configure_facet(spacing=0) + .configure_view(stroke=None) ) stripplot diff --git a/vegafusion-python/tests/altair_mocks/scatter/table_bubble_plot_github/mock.py b/vegafusion-python/tests/altair_mocks/scatter/table_bubble_plot_github/mock.py index c8f648264..f38550c69 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/table_bubble_plot_github/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/table_bubble_plot_github/mock.py @@ -1,12 +1,9 @@ # https://altair-viz.github.io/gallery/table_bubble_plot_github.html import altair as alt -from vega_datasets import data source = "https://raw.githubusercontent.com/vega/vega-datasets/v1.31.1/data/github.csv" alt.Chart(source).mark_circle().encode( - x='hours(time):O', - y='day(time):O', - size='sum(count):Q' + x="hours(time):O", y="day(time):O", size="sum(count):Q" ) diff --git a/vegafusion-python/tests/altair_mocks/scatter/trellis/mock.py b/vegafusion-python/tests/altair_mocks/scatter/trellis/mock.py index 79e554e4d..ed10ff8a7 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/trellis/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/trellis/mock.py @@ -7,7 +7,5 @@ source = data.cars() alt.Chart(source).mark_point().encode( - x='Horsepower:Q', - y='Miles_per_Gallon:Q', - row='Origin:N' + x="Horsepower:Q", y="Miles_per_Gallon:Q", row="Origin:N" ).properties(height=150) diff --git a/vegafusion-python/tests/altair_mocks/scatter/with_errorbars/mock.py b/vegafusion-python/tests/altair_mocks/scatter/with_errorbars/mock.py index ba8235965..027c822be 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/with_errorbars/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/with_errorbars/mock.py @@ -1,8 +1,8 @@ # https://altair-viz.github.io/gallery/simple_scatter_with_errorbars.html import altair as alt -import pandas as pd import numpy as np +import pandas as pd # generate some data points with uncertainties np.random.seed(0) @@ -15,25 +15,16 @@ # the base chart base = alt.Chart(source).transform_calculate( - ymin="datum.y-datum.yerr", - ymax="datum.y+datum.yerr" + ymin="datum.y-datum.yerr", ymax="datum.y+datum.yerr" ) # generate the points -points = base.mark_point( - filled=True, - size=50, - color='black' -).encode( - x=alt.X('x', scale=alt.Scale(domain=(0, 6))), - y=alt.Y('y', scale=alt.Scale(zero=False)) +points = base.mark_point(filled=True, size=50, color="black").encode( + x=alt.X("x", scale=alt.Scale(domain=(0, 6))), + y=alt.Y("y", scale=alt.Scale(zero=False)), ) # generate the error bars -errorbars = base.mark_errorbar().encode( - x="x", - y="ymin:Q", - y2="ymax:Q" -) +errorbars = base.mark_errorbar().encode(x="x", y="ymin:Q", y2="ymax:Q") points + errorbars diff --git a/vegafusion-python/tests/altair_mocks/scatter/with_labels/mock.py b/vegafusion-python/tests/altair_mocks/scatter/with_labels/mock.py index d33e95754..74f685b8b 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/with_labels/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/with_labels/mock.py @@ -3,23 +3,12 @@ import altair as alt import pandas as pd -source = pd.DataFrame({ - 'x': [1, 3, 5, 7, 9], - 'y': [1, 3, 5, 7, 9], - 'label': ['A', 'B', 'C', 'D', 'E'] -}) - -points = alt.Chart(source).mark_point().encode( - x='x:Q', - y='y:Q' +source = pd.DataFrame( + {"x": [1, 3, 5, 7, 9], "y": [1, 3, 5, 7, 9], "label": ["A", "B", "C", "D", "E"]} ) -text = points.mark_text( - align='left', - baseline='middle', - dx=7 -).encode( - text='label' -) +points = alt.Chart(source).mark_point().encode(x="x:Q", y="y:Q") + +text = points.mark_text(align="left", baseline="middle", dx=7).encode(text="label") points + text diff --git a/vegafusion-python/tests/altair_mocks/scatter/with_lowess/mock.py b/vegafusion-python/tests/altair_mocks/scatter/with_lowess/mock.py index d05e710a7..c2672ff28 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/with_lowess/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/with_lowess/mock.py @@ -1,25 +1,25 @@ # https://altair-viz.github.io/gallery/scatter_with_loess.html import altair as alt -import pandas as pd import numpy as np +import pandas as pd np.random.seed(1) -source = pd.DataFrame({ - 'x': np.arange(100), - 'A': np.random.randn(100).cumsum(), - 'B': np.random.randn(100).cumsum(), - 'C': np.random.randn(100).cumsum(), -}) +source = pd.DataFrame( + { + "x": np.arange(100), + "A": np.random.randn(100).cumsum(), + "B": np.random.randn(100).cumsum(), + "C": np.random.randn(100).cumsum(), + } +) -base = alt.Chart(source).mark_circle(opacity=0.5).transform_fold( - fold=['A', 'B', 'C'], - as_=['category', 'y'] -).encode( - alt.X('x:Q'), - alt.Y('y:Q'), - alt.Color('category:N') +base = ( + alt.Chart(source) + .mark_circle(opacity=0.5) + .transform_fold(fold=["A", "B", "C"], as_=["category", "y"]) + .encode(alt.X("x:Q"), alt.Y("y:Q"), alt.Color("category:N")) ) -base + base.transform_loess('x', 'y', groupby=['category']).mark_line(size=4) +base + base.transform_loess("x", "y", groupby=["category"]).mark_line(size=4) diff --git a/vegafusion-python/tests/altair_mocks/scatter/with_rolling_mean/mock.py b/vegafusion-python/tests/altair_mocks/scatter/with_rolling_mean/mock.py index 19ef84f80..33b1f4f26 100644 --- a/vegafusion-python/tests/altair_mocks/scatter/with_rolling_mean/mock.py +++ b/vegafusion-python/tests/altair_mocks/scatter/with_rolling_mean/mock.py @@ -5,22 +5,21 @@ source = data.seattle_weather() -line = alt.Chart(source).mark_line( - color='red', - size=3 -).transform_window( - rolling_mean='mean(temp_max)', - # sort=[{"field": "date"}], # Shouldn't need this - frame=[-15, 15] -).encode( - x='date:T', - y='rolling_mean:Q' +line = ( + alt.Chart(source) + .mark_line(color="red", size=3) + .transform_window( + rolling_mean="mean(temp_max)", + # sort=[{"field": "date"}], # Shouldn't need this + frame=[-15, 15], + ) + .encode(x="date:T", y="rolling_mean:Q") ) -points = alt.Chart(source).mark_point().encode( - x='date:T', - y=alt.Y('temp_max:Q', - axis=alt.Axis(title='Max Temp')) +points = ( + alt.Chart(source) + .mark_point() + .encode(x="date:T", y=alt.Y("temp_max:Q", axis=alt.Axis(title="Max Temp"))) ) points + line diff --git a/vegafusion-python/tests/altair_mocks/simple/bar_chart/mock.py b/vegafusion-python/tests/altair_mocks/simple/bar_chart/mock.py index 69e27af64..f5369a894 100644 --- a/vegafusion-python/tests/altair_mocks/simple/bar_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/bar_chart/mock.py @@ -3,12 +3,11 @@ import altair as alt import pandas as pd -source = pd.DataFrame({ - 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], - 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52] -}) +source = pd.DataFrame( + { + "a": ["A", "B", "C", "D", "E", "F", "G", "H", "I"], + "b": [28, 55, 43, 91, 81, 53, 19, 87, 52], + } +) -alt.Chart(source).mark_bar().encode( - x='a', - y='b' -) \ No newline at end of file +alt.Chart(source).mark_bar().encode(x="a", y="b") diff --git a/vegafusion-python/tests/altair_mocks/simple/heatmap/mock.py b/vegafusion-python/tests/altair_mocks/simple/heatmap/mock.py index ef9e7561d..f875a30f8 100644 --- a/vegafusion-python/tests/altair_mocks/simple/heatmap/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/heatmap/mock.py @@ -5,15 +5,9 @@ # Compute x^2 + y^2 across a 2D grid x, y = np.meshgrid(range(-5, 5), range(-5, 5)) -z = x ** 2 + y ** 2 +z = x**2 + y**2 # Convert this grid to columnar data expected by Altair -source = pd.DataFrame({'x': x.ravel(), - 'y': y.ravel(), - 'z': z.ravel()}) +source = pd.DataFrame({"x": x.ravel(), "y": y.ravel(), "z": z.ravel()}) -alt.Chart(source).mark_rect().encode( - x='x:O', - y='y:O', - color='z:Q' -) \ No newline at end of file +alt.Chart(source).mark_rect().encode(x="x:O", y="y:O", color="z:Q") diff --git a/vegafusion-python/tests/altair_mocks/simple/line_chart/mock.py b/vegafusion-python/tests/altair_mocks/simple/line_chart/mock.py index 702aed0a5..42af6e6dc 100644 --- a/vegafusion-python/tests/altair_mocks/simple/line_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/line_chart/mock.py @@ -5,12 +5,6 @@ import pandas as pd x = np.arange(100) -source = pd.DataFrame({ - 'x': x, - 'f(x)': np.sin(x / 5) -}) +source = pd.DataFrame({"x": x, "f(x)": np.sin(x / 5)}) -alt.Chart(source).mark_line().encode( - x='x', - y='f(x)' -) +alt.Chart(source).mark_line().encode(x="x", y="f(x)") diff --git a/vegafusion-python/tests/altair_mocks/simple/scatter_tooltips/mock.py b/vegafusion-python/tests/altair_mocks/simple/scatter_tooltips/mock.py index 846f17bff..859e36d21 100644 --- a/vegafusion-python/tests/altair_mocks/simple/scatter_tooltips/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/scatter_tooltips/mock.py @@ -6,8 +6,8 @@ source = data.cars() alt.Chart(source).mark_circle(size=60).encode( - x='Horsepower', - y='Miles_per_Gallon', - color='Origin', - tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon'] + x="Horsepower", + y="Miles_per_Gallon", + color="Origin", + tooltip=["Name", "Origin", "Horsepower", "Miles_per_Gallon"], ).interactive() diff --git a/vegafusion-python/tests/altair_mocks/simple/stacked_bar_chart/mock.py b/vegafusion-python/tests/altair_mocks/simple/stacked_bar_chart/mock.py index ae4990cfd..019d6d772 100644 --- a/vegafusion-python/tests/altair_mocks/simple/stacked_bar_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/stacked_bar_chart/mock.py @@ -5,8 +5,4 @@ source = data.iowa_electricity() -alt.Chart(source).mark_area().encode( - x="year:T", - y="net_generation:Q", - color="source:N" -) +alt.Chart(source).mark_area().encode(x="year:T", y="net_generation:Q", color="source:N") diff --git a/vegafusion-python/tests/altair_mocks/simple/strip_chart/mock.py b/vegafusion-python/tests/altair_mocks/simple/strip_chart/mock.py index 5c261cd0c..78ed1116e 100644 --- a/vegafusion-python/tests/altair_mocks/simple/strip_chart/mock.py +++ b/vegafusion-python/tests/altair_mocks/simple/strip_chart/mock.py @@ -5,7 +5,4 @@ source = data.cars() -alt.Chart(source).mark_tick().encode( - x='Horsepower:Q', - y='Cylinders:O' -) +alt.Chart(source).mark_tick().encode(x="Horsepower:Q", y="Cylinders:O") diff --git a/vegafusion-python/tests/test_datasource.py b/vegafusion-python/tests/test_datasource.py index 853b8ce05..d8ae68301 100644 --- a/vegafusion-python/tests/test_datasource.py +++ b/vegafusion-python/tests/test_datasource.py @@ -1,25 +1,31 @@ import pandas as pd import pyarrow as pa + from vegafusion.datasource import PandasDatasource + def test_mixed_col_type_inference(): - df = pd.DataFrame({ - 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], - 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52], - 'c': [28, 55, 43, 91, 81, 53, 19, 87, 52], - 'd': [28, 55, 43, 91, 81, 53, 19, 87, 52], - 'e': [28, 55, 43, 91, 81, 53, 19, 87, 52], - 'f': [28, 55, 43, 91, '81.5', 53, 19, 87, 52], - }) + df = pd.DataFrame( + { + "a": ["A", "B", "C", "D", "E", "F", "G", "H", "I"], + "b": [28, 55, 43, 91, 81, 53, 19, 87, 52], + "c": [28, 55, 43, 91, 81, 53, 19, 87, 52], + "d": [28, 55, 43, 91, 81, 53, 19, 87, 52], + "e": [28, 55, 43, 91, 81, 53, 19, 87, 52], + "f": [28, 55, 43, 91, "81.5", 53, 19, 87, 52], + } + ) datasource = PandasDatasource(df) - expected_schema = pa.schema([ - pa.field("a", pa.string()), - pa.field("b", pa.int64()), - pa.field("c", pa.int64()), - pa.field("d", pa.int64()), - pa.field("e", pa.int64()), - pa.field("f", pa.string()), - ]) + expected_schema = pa.schema( + [ + pa.field("a", pa.string()), + pa.field("b", pa.int64()), + pa.field("c", pa.int64()), + pa.field("d", pa.int64()), + pa.field("e", pa.int64()), + pa.field("f", pa.string()), + ] + ) assert datasource.schema() == expected_schema diff --git a/vegafusion-python/tests/test_input_utc.py b/vegafusion-python/tests/test_input_utc.py index a10a69318..c3a6b14a0 100644 --- a/vegafusion-python/tests/test_input_utc.py +++ b/vegafusion-python/tests/test_input_utc.py @@ -1,10 +1,12 @@ -import vegafusion as vf -import pandas as pd import json +import pandas as pd + +import vegafusion as vf + def test_input_utc(): - (pre_transformed, warnings) = vf.runtime.pre_transform_spec( + (pre_transformed, _warnings) = vf.runtime.pre_transform_spec( input_spec(), "UTC", "UTC", inline_datasets={"seattle_weather": load_dataset()} ) print(json.dumps(pre_transformed, indent=2)) @@ -16,7 +18,9 @@ def load_dataset(): """ Load seattle-weather dataset with the date column localized as UTC """ - seattle_weather = pd.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/next/data/seattle-weather.csv") + seattle_weather = pd.read_csv( + "https://raw.githubusercontent.com/vega/vega-datasets/next/data/seattle-weather.csv" + ) seattle_weather["date"] = pd.to_datetime(seattle_weather["date"]) seattle_weather = seattle_weather.set_index("date").tz_localize("UTC").reset_index() return seattle_weather diff --git a/vegafusion-python/tests/test_jupyter_widget.py b/vegafusion-python/tests/test_jupyter_widget.py index 27909a0d5..91386af0b 100644 --- a/vegafusion-python/tests/test_jupyter_widget.py +++ b/vegafusion-python/tests/test_jupyter_widget.py @@ -1,24 +1,25 @@ -from pathlib import Path -import jupytext import io +import json +import os +import platform +import shutil import tempfile +import time +from io import BytesIO +from pathlib import Path from subprocess import Popen + +import jupytext import pytest -from selenium.webdriver import ActionChains +from flaky import flaky from selenium import webdriver -import time -from io import BytesIO +from selenium.webdriver import ActionChains from skimage.io import imread from skimage.metrics import structural_similarity as ssim -import shutil -from tenacity import retry, wait, stop -import os -from flaky import flaky -import json -import platform +from tenacity import retry, stop, wait try: - import chromedriver_binary + import chromedriver_binary # noqa: F401 (Side effect import) except ImportError: # chromedriver not provided through chromedriver_binary package pass @@ -65,7 +66,7 @@ def setup_module(module): - """ setup any state specific to the execution of the given module.""" + """setup any state specific to the execution of the given module.""" # Initialize notebooks and screenshots to empty directories shutil.rmtree(temp_notebooks_dir, ignore_errors=True) temp_notebooks_dir.mkdir(parents=True, exist_ok=True) @@ -79,7 +80,8 @@ def setup_module(module): @flaky(max_runs=2) @pytest.mark.parametrize( - "mock_name,img_tolerance,delay", [ + "mock_name,img_tolerance,delay", + [ ("area/cumulative_count", 1.0, 0.25), ("area/density_facet", 1.0, 0.25), ("area/gradient", 1.0, 0.25), @@ -126,7 +128,6 @@ def setup_module(module): ("casestudy/falkensee", 1.0, 0.25), ("casestudy/us_employment", 1.0, 0.25), ("casestudy/top_k_items", 1.0, 0.25), - # Different order of ticks for equal bar lengths ("casestudy/top_k_letters", 0.995, 0.25), ("casestudy/isotype", 1.0, 0.25), @@ -234,25 +235,30 @@ def setup_module(module): ("simple/line_chart", 1.0, 0.25), ("simple/scatter_tooltips", 1.0, 0.25), ("simple/strip_chart", 1.0, 0.25), - # Non-deterministic mocks have lower image tolerance ("other/errorbars_with_ci", 0.8, 0.25), ("other/sorted_error_bars_with_ci", 0.8, 0.25), - ("scatter/stripplot", 0.8, 0.25) # random() - ]) + ("scatter/stripplot", 0.8, 0.25), # random() + ], +) def test_altair_mock(mock_name, img_tolerance, delay): - # Build Jupytext Markdown text containing the mock's code mock_path = altair_mocks_dir / mock_name / "mock.py" actions = load_actions(mock_name) mock_code = mock_path.read_text() altair_default_markdown = altair_default_template.replace("{code}", mock_code) - vegafusion_jupyter_markdown = altair_vegafusion_jupyter_template.replace("{code}", mock_code) + vegafusion_jupyter_markdown = altair_vegafusion_jupyter_template.replace( + "{code}", mock_code + ) # Use jupytext to convert markdown to an ipynb file - altair_default_notebook = jupytext.read(io.StringIO(altair_default_markdown), fmt="markdown") - vegafusion_jupyter_notebook = jupytext.read(io.StringIO(vegafusion_jupyter_markdown), fmt="markdown") + altair_default_notebook = jupytext.read( + io.StringIO(altair_default_markdown), fmt="markdown" + ) + vegafusion_jupyter_notebook = jupytext.read( + io.StringIO(vegafusion_jupyter_markdown), fmt="markdown" + ) voila_proc, chrome_driver = launch_voila() @@ -262,7 +268,11 @@ def test_altair_mock(mock_name, img_tolerance, delay): chrome_driver, altair_default_notebook, name + "_altair", actions, delay ) vegafusion_mime_imgs = export_image_sequence( - chrome_driver, vegafusion_jupyter_notebook, name + "_vegafusion_mime", actions, delay + chrome_driver, + vegafusion_jupyter_notebook, + name + "_vegafusion_mime", + actions, + delay, ) compare_images(altair_imgs, vegafusion_mime_imgs, img_tolerance * 0.99) @@ -274,7 +284,6 @@ def test_altair_mock(mock_name, img_tolerance, delay): def test_vegafusion_widget(): - altair_chart_str = """ from vegafusion.jupyter import VegaFusionWidget @@ -303,7 +312,7 @@ def test_vegafusion_widget(): """ # Convert to Vega spec and use VegaFusionWidget - notebook_text_vf = r""" + notebook_text_vf = r""" ```python {altair_chart_str} @@ -311,7 +320,7 @@ def test_vegafusion_widget(): vega_spec_inline["data"][1]["url"] = "vegafusion+dataset://weather" widget = VegaFusionWidget( - spec=vega_spec_inline, + spec=vega_spec_inline, inline_datasets={"weather": data.seattle_weather()} ) widget @@ -319,9 +328,9 @@ def test_vegafusion_widget(): """.replace("{altair_chart_str}", altair_chart_str) notebook_vf = jupytext.read(io.StringIO(notebook_text_vf), fmt="markdown") - + # Display with default altair renderer - notebook_text_alt = f""" + notebook_text_alt = f""" ```python {altair_chart_str} @@ -329,7 +338,7 @@ def test_vegafusion_widget(): ``` """ notebook_alt = jupytext.read(io.StringIO(notebook_text_alt), fmt="markdown") - + # Define actions to perform a selection actions = [ {"type": "snapshot"}, @@ -337,14 +346,26 @@ def test_vegafusion_widget(): {"type": "click_and_hold"}, {"type": "move_to", "coords": [200, 200]}, {"type": "release"}, - {"type": "snapshot"} + {"type": "snapshot"}, ] voila_proc, chrome_driver = launch_voila() try: - imgs_alt = export_image_sequence(chrome_driver, notebook_alt, "vegafusion_widget_alt", actions=actions, delay=0.25) - imgs_vf = export_image_sequence(chrome_driver, notebook_vf, "vegafusion_widget_vf", actions=actions, delay=0.25) + imgs_alt = export_image_sequence( + chrome_driver, + notebook_alt, + "vegafusion_widget_alt", + actions=actions, + delay=0.25, + ) + imgs_vf = export_image_sequence( + chrome_driver, + notebook_vf, + "vegafusion_widget_vf", + actions=actions, + delay=0.25, + ) compare_images(imgs_alt, imgs_vf, 0.99) finally: @@ -352,6 +373,7 @@ def test_vegafusion_widget(): chrome_driver.close() time.sleep(0.25) + def launch_voila(): # Create selenium Chrome instance chrome_opts = webdriver.ChromeOptions() @@ -363,27 +385,31 @@ def launch_voila(): chrome_opts.add_argument("--disable-dev-shm-usage") chrome_opts.add_argument("--no-sandbox") - chrome_opts.set_capability('goog:loggingPrefs', {'browser': 'ALL'}) + chrome_opts.set_capability("goog:loggingPrefs", {"browser": "ALL"}) chrome_driver = webdriver.Chrome(options=chrome_opts) chrome_driver.set_window_size(800, 800) # Launch Voila server - voila_proc = Popen(["voila", "--no-browser", "--enable_nbextensions=True"], cwd=temp_notebooks_dir) + voila_proc = Popen( + ["voila", "--no-browser", "--enable_nbextensions=True"], cwd=temp_notebooks_dir + ) # Sleep to allow Voila itself to start (this does not include loading a particular dashboard). time.sleep(1.0) return voila_proc, chrome_driver -def compare_images(baseline_imgs, test_imgs, img_tolerance): +def compare_images(baseline_imgs, test_imgs, img_tolerance): for i, (baseline_img, test_img) in enumerate(zip(baseline_imgs, test_imgs)): assert baseline_img.shape == test_img.shape, "Size mismatch" similarity_mime_value = ssim(baseline_img, test_img, channel_axis=2) print(f"({i}) similarity_mime_value={similarity_mime_value}") - assert similarity_mime_value >= img_tolerance, f"Similarity failed with mime renderer on image {i}" + assert ( + similarity_mime_value >= img_tolerance + ), f"Similarity failed with mime renderer on image {i}" def load_actions(mock_name): @@ -395,17 +421,19 @@ def load_actions(mock_name): def export_image_sequence( - chrome_driver: webdriver.Chrome, - notebook: jupytext.jupytext.NotebookNode, - name, - actions, - delay, - voila_url_base: str = "http://localhost:8866/voila/render/", + chrome_driver: webdriver.Chrome, + notebook: jupytext.jupytext.NotebookNode, + name, + actions, + delay, + voila_url_base: str = "http://localhost:8866/voila/render/", ): imgs = [] with tempfile.NamedTemporaryFile( - mode="wt", dir=temp_notebooks_dir, suffix=".ipynb", + mode="wt", + dir=temp_notebooks_dir, + suffix=".ipynb", ) as f: jupytext.write(notebook, f, fmt="ipynb") f.file.flush() @@ -420,11 +448,14 @@ def export_image_sequence( @retry(wait=wait.wait_fixed(0.5), stop=stop.stop_after_delay(10)) def get_url(): return chrome_driver.get(url) + get_url() # Remove padding, margins, and standardize line height. - css = ("body, .jp-Cell, .jp-Notebook, .jupyter-widgets, .jp-RenderedHTMLCommon " - "{margin: 0 !important; padding: 0 !important; line-height: 1.3 !important;}") + css = ( + "body, .jp-Cell, .jp-Notebook, .jupyter-widgets, .jp-RenderedHTMLCommon " + "{margin: 0 !important; padding: 0 !important; line-height: 1.3 !important;}" + ) script = 'document.styleSheets[0].insertRule("' + css + '", 0 )' chrome_driver.execute_script(script) @@ -437,17 +468,19 @@ def get_canvas(): canvas = get_canvas() except: # Write screenshot - chrome_driver.get_screenshot_as_file(str(failure_output / f"{name}_here.png")) + chrome_driver.get_screenshot_as_file( + str(failure_output / f"{name}_here.png") + ) # Write logs - with open(failure_output / f"{name}_console.log", "wt") as f: + with open(failure_output / f"{name}_console.log", "w") as f: for log in chrome_driver.get_log("browser"): f.write(json.dumps(log) + "\n") # Write html dump root = chrome_driver.find_element("xpath", "//html") - with open(failure_output / f"{name}_page.html", "wt") as f: - f.write(root.get_attribute('innerHTML')) + with open(failure_output / f"{name}_page.html", "w") as f: + f.write(root.get_attribute("innerHTML")) raise time.sleep(delay) @@ -461,7 +494,7 @@ def get_canvas(): chain.perform() time.sleep(0.25) - img_path = (temp_screenshots_dir / f"{name}_{i}.png").as_posix(); + img_path = (temp_screenshots_dir / f"{name}_{i}.png").as_posix() print(f"img_path: {img_path}") if action_type == "snapshot": img_bytes = canvas.screenshot_as_png @@ -488,7 +521,9 @@ def get_canvas(): # Origin of element center xoffset = canvas.size["width"] / 2 yoffset = canvas.size["height"] / 2 - chain = chain.move_to_element_with_offset(canvas, coords[0] - xoffset, coords[1] - yoffset) + chain = chain.move_to_element_with_offset( + canvas, coords[0] - xoffset, coords[1] - yoffset + ) elif action_type == "move_by": coords = action["coords"] chain = chain.move_by_offset(coords[0], coords[1]) diff --git a/vegafusion-python/tests/test_pretransform.py b/vegafusion-python/tests/test_pretransform.py index 4df4d1449..4e39592eb 100644 --- a/vegafusion-python/tests/test_pretransform.py +++ b/vegafusion-python/tests/test_pretransform.py @@ -1,13 +1,17 @@ +import base64 +import decimal +import json +from datetime import date +from importlib.util import find_spec + import pandas as pd -import pytest -from pandas import Timestamp, NaT +import polars as pl import pyarrow as pa +import pytest +from pandas import NaT, Timestamp + import vegafusion as vf -import json -import polars as pl -from datetime import date -import decimal -import base64 + def setup_module(module): vf.set_local_tz("UTC") @@ -16,8 +20,8 @@ def setup_module(module): def get_connections(): connections = ["datafusion"] try: - import duckdb - connections.append("duckdb") + if find_spec("duckdb") is not None: + connections.append("duckdb") except ImportError: pass @@ -100,7 +104,8 @@ def order_items_spec(): def movies_histogram_spec(agg="count"): - return json.loads(""" + return json.loads( + """ { "$schema": "https://vega.github.io/schema/vega/v5.json", "background": "white", @@ -136,7 +141,11 @@ def movies_histogram_spec(agg="count"): "bin_maxbins_10_IMDB Rating", "bin_maxbins_10_IMDB Rating_end" ], - "ops": [""" + '"' + agg + '"' + r"""], + "ops": [""" + + '"' + + agg + + '"' + + r"""], "fields": ["Worldwide Gross"], "as": ["median_Worldwide Gross"] }, @@ -227,11 +236,13 @@ def movies_histogram_spec(agg="count"): "zindex": 0 } ] -}""") +}""" + ) def standalone_aggregate_spec(agg="count"): - return json.loads(""" + return json.loads( + """ { "$schema": "https://vega.github.io/schema/vega/v5.json", "data": [ @@ -265,7 +276,11 @@ def standalone_aggregate_spec(agg="count"): "y" ], "ops": [ - """ + '"' + agg + '"' + """ + """ + + '"' + + agg + + '"' + + """ ], "as": [ "median_y" @@ -508,8 +523,9 @@ def standalone_aggregate_spec(agg="count"): "usermeta": { "warnings": [] } -} - """) +} + """ + ) def date_column_spec(): @@ -686,7 +702,7 @@ def period_in_col_name_spec(): "zindex": 0 } ] -} +} """) @@ -998,7 +1014,7 @@ def date32_timeunit_spec(): def gh_268_hang_spec(): - return json.loads(r""" + return json.loads(r""" { "$schema": "https://vega.github.io/schema/vega/v5.json", "data": [ @@ -1369,43 +1385,45 @@ def empty_histogram_spec(): def test_pre_transform_multi_partition(): n = 4050 - order_items = pd.DataFrame({ - "menu_item": [0] * n + [1] * n - }) + order_items = pd.DataFrame({"menu_item": [0] * n + [1] * n}) vega_spec = order_items_spec() - new_spec, warnings = vf.runtime.pre_transform_spec(vega_spec, inline_datasets={ - "order_items": order_items, - }) + new_spec, _warnings = vf.runtime.pre_transform_spec( + vega_spec, + inline_datasets={ + "order_items": order_items, + }, + ) - assert new_spec["data"][1] == dict( - name="data_0", - values=[ + assert new_spec["data"][1] == { + "name": "data_0", + "values": [ {"menu_item": 0, "__count": n}, {"menu_item": 1, "__count": n}, - ] - ) + ], + } def test_pre_transform_cache_cleared(): # Make sure that result changes when input DataFrame changes def check(n): - order_items = pd.DataFrame({ - "menu_item": [0] * n + [1] * n - }) + order_items = pd.DataFrame({"menu_item": [0] * n + [1] * n}) vega_spec = order_items_spec() - new_spec, warnings = vf.runtime.pre_transform_spec(vega_spec, inline_datasets={ - "order_items": order_items, - }) + new_spec, _warnings = vf.runtime.pre_transform_spec( + vega_spec, + inline_datasets={ + "order_items": order_items, + }, + ) - assert new_spec["data"][1] == dict( - name="data_0", - values=[ + assert new_spec["data"][1] == { + "name": "data_0", + "values": [ {"menu_item": 0, "__count": n}, {"menu_item": 1, "__count": n}, - ] - ) + ], + } check(16) check(32) @@ -1413,9 +1431,7 @@ def check(n): def test_pre_transform_datasets(): n = 4050 - order_items = pd.DataFrame({ - "menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n) - }) + order_items = pd.DataFrame({"menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n)}) vega_spec = order_items_spec() datasets, warnings = vf.runtime.pre_transform_datasets( @@ -1423,7 +1439,7 @@ def test_pre_transform_datasets(): ["data_0"], inline_datasets={ "order_items": order_items, - } + }, ) assert len(warnings) == 0 assert len(datasets) == 1 @@ -1436,12 +1452,12 @@ def test_pre_transform_datasets(): def test_pre_transform_planner_warning1(): # Pre-transform with supported aggregate function should result in no warnings vega_spec = movies_histogram_spec("mean") - datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) + _datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) assert len(warnings) == 0 # Pre-transform with unsupported aggregate function should result in one warning vega_spec = movies_histogram_spec("ci0") - datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) + _datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) assert len(warnings) == 1 warning = warnings[0] @@ -1452,12 +1468,12 @@ def test_pre_transform_planner_warning1(): def test_pre_transform_planner_warning2(): # Pre-transform with supported aggregate function should result in no warnings vega_spec = standalone_aggregate_spec("mean") - datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) + _datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) assert len(warnings) == 0 # Pre-transform with unsupported aggregate function should result in one warning vega_spec = standalone_aggregate_spec("ci0") - datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) + _datasets, warnings = vf.runtime.pre_transform_spec(vega_spec) assert len(warnings) == 1 warning = warnings[0] @@ -1467,40 +1483,54 @@ def test_pre_transform_planner_warning2(): def test_date32_pre_transform_dataset(): # Test to make sure that date32 columns are interpreted in the local timezone - dates_df = pd.DataFrame({ - "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], - }) + dates_df = pd.DataFrame( + { + "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], + } + ) spec = date_column_spec() - (output_ds,), warnings = vf.runtime.pre_transform_datasets( - spec, ["data_0"], "America/New_York", default_input_tz="UTC", inline_datasets=dict(dates=dates_df) + (output_ds,), _warnings = vf.runtime.pre_transform_datasets( + spec, + ["data_0"], + "America/New_York", + default_input_tz="UTC", + inline_datasets={"dates": dates_df}, ) # Timestamps are in the local timezone, so they should be midnight local time assert list(output_ds.date_col) == [ - pd.Timestamp('2022-01-01 00:00:00-0500', tz='America/New_York'), - pd.Timestamp('2022-01-02 00:00:00-0500', tz='America/New_York'), - pd.Timestamp('2022-01-03 00:00:00-0500', tz='America/New_York') + pd.Timestamp("2022-01-01 00:00:00-0500", tz="America/New_York"), + pd.Timestamp("2022-01-02 00:00:00-0500", tz="America/New_York"), + pd.Timestamp("2022-01-03 00:00:00-0500", tz="America/New_York"), ] + def test_date32_pre_transform_dataset_polars(): # Test to make sure that date32 columns are interpreted in the local timezone - dates_df = pl.DataFrame({ - "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], - }) + dates_df = pl.DataFrame( + { + "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], + } + ) spec = date_column_spec() - (output_ds,), warnings = vf.runtime.pre_transform_datasets( - spec, ["data_0"], "America/New_York", default_input_tz="UTC", inline_datasets=dict(dates=dates_df) + (output_ds,), _warnings = vf.runtime.pre_transform_datasets( + spec, + ["data_0"], + "America/New_York", + default_input_tz="UTC", + inline_datasets={"dates": dates_df}, ) # Timestamps are in the local timezone, so they should be midnight local time assert list(output_ds["date_col"]) == [ - pd.Timestamp('2022-01-01 00:00:00-0500', tz='America/New_York'), - pd.Timestamp('2022-01-02 00:00:00-0500', tz='America/New_York'), - pd.Timestamp('2022-01-03 00:00:00-0500', tz='America/New_York') + pd.Timestamp("2022-01-01 00:00:00-0500", tz="America/New_York"), + pd.Timestamp("2022-01-02 00:00:00-0500", tz="America/New_York"), + pd.Timestamp("2022-01-03 00:00:00-0500", tz="America/New_York"), ] + def test_date32_in_timeunit_duckdb_crash(): try: # Set this as the active connection @@ -1508,15 +1538,15 @@ def test_date32_in_timeunit_duckdb_crash(): # order_items includes a table://order_items data url vega_spec = date32_timeunit_spec() - dataframe = pd.DataFrame({ - "GO_LIVE_MONTH": [date(2021, 1, 1), date(2021, 2, 1)], - "PERCENT_GO_LIVES": [0.2, 0.3], - }) + dataframe = pd.DataFrame( + { + "GO_LIVE_MONTH": [date(2021, 1, 1), date(2021, 2, 1)], + "PERCENT_GO_LIVES": [0.2, 0.3], + } + ) datasets, warnings = vf.runtime.pre_transform_datasets( - vega_spec, - ["data_1"], - inline_datasets=dict(dataframe=dataframe) + vega_spec, ["data_1"], inline_datasets={"dataframe": dataframe} ) assert len(warnings) == 0 assert len(datasets) == 1 @@ -1526,11 +1556,11 @@ def test_date32_in_timeunit_duckdb_crash(): def test_period_in_column_name(): - df_period = pd.DataFrame([[1, 2]], columns=['normal', 'a.b']) + df_period = pd.DataFrame([[1, 2]], columns=["normal", "a.b"]) spec = period_in_col_name_spec() - datasets, warnings = vf.runtime.pre_transform_datasets(spec, ["data_0"], inline_datasets=dict( - df_period=df_period - )) + datasets, warnings = vf.runtime.pre_transform_datasets( + spec, ["data_0"], inline_datasets={"df_period": df_period} + ) assert len(warnings) == 0 assert len(datasets) == 1 @@ -1539,64 +1569,76 @@ def test_period_in_column_name(): def test_nat_values(): - dataframe = pd.DataFrame([ - {'ORDER_DATE': date(2011, 3, 1), - 'SALES': 457.568, - 'NULL_TEST': Timestamp('2011-03-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 3, 1), - 'SALES': 376.509, - 'NULL_TEST': Timestamp('2011-03-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 3, 1), - 'SALES': 362.25, - 'NULL_TEST': Timestamp('2011-03-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 3, 1), - 'SALES': 129.552, - 'NULL_TEST': Timestamp('2011-03-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 3, 1), 'SALES': 18.84, 'NULL_TEST': NaT}, - {'ORDER_DATE': date(2011, 4, 1), - 'SALES': 66.96, - 'NULL_TEST': Timestamp('2011-04-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 4, 1), 'SALES': 6.24, 'NULL_TEST': NaT}, - {'ORDER_DATE': date(2011, 6, 1), - 'SALES': 881.93, - 'NULL_TEST': Timestamp('2011-06-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 6, 1), - 'SALES': 166.72, - 'NULL_TEST': Timestamp('2011-06-01 00:00:00+0000', tz="UTC")}, - {'ORDER_DATE': date(2011, 6, 1), 'SALES': 25.92, 'NULL_TEST': NaT} - ]) + dataframe = pd.DataFrame( + [ + { + "ORDER_DATE": date(2011, 3, 1), + "SALES": 457.568, + "NULL_TEST": Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + }, + { + "ORDER_DATE": date(2011, 3, 1), + "SALES": 376.509, + "NULL_TEST": Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + }, + { + "ORDER_DATE": date(2011, 3, 1), + "SALES": 362.25, + "NULL_TEST": Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + }, + { + "ORDER_DATE": date(2011, 3, 1), + "SALES": 129.552, + "NULL_TEST": Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + }, + {"ORDER_DATE": date(2011, 3, 1), "SALES": 18.84, "NULL_TEST": NaT}, + { + "ORDER_DATE": date(2011, 4, 1), + "SALES": 66.96, + "NULL_TEST": Timestamp("2011-04-01 00:00:00+0000", tz="UTC"), + }, + {"ORDER_DATE": date(2011, 4, 1), "SALES": 6.24, "NULL_TEST": NaT}, + { + "ORDER_DATE": date(2011, 6, 1), + "SALES": 881.93, + "NULL_TEST": Timestamp("2011-06-01 00:00:00+0000", tz="UTC"), + }, + { + "ORDER_DATE": date(2011, 6, 1), + "SALES": 166.72, + "NULL_TEST": Timestamp("2011-06-01 00:00:00+0000", tz="UTC"), + }, + {"ORDER_DATE": date(2011, 6, 1), "SALES": 25.92, "NULL_TEST": NaT}, + ] + ) spec = nat_bar_spec() - datasets, warnings = vf.runtime.pre_transform_datasets(spec, ["dataframe"], inline_datasets=dict( - dataframe=dataframe - )) + datasets, warnings = vf.runtime.pre_transform_datasets( + spec, ["dataframe"], inline_datasets={"dataframe": dataframe} + ) assert len(warnings) == 0 assert len(datasets) == 1 dataset = datasets[0] assert dataset.to_dict("records")[0] == { - 'NULL_TEST': pd.Timestamp('2011-03-01 00:00:00+0000', tz="UTC"), - 'ORDER_DATE': Timestamp('2011-03-01 00:00:00+0000', tz="UTC"), - 'SALES': 457.568, - 'SALES_end': 457.568, - 'SALES_start': 0.0, + "NULL_TEST": pd.Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + "ORDER_DATE": Timestamp("2011-03-01 00:00:00+0000", tz="UTC"), + "SALES": 457.568, + "SALES_end": 457.568, + "SALES_start": 0.0, } def test_pre_transform_dataset_dataframe_interface_protocol(): - - try: - import pyarrow.interchange - except ImportError: + if find_spec("pyarrow.interchange") is None: pytest.skip("DataFrame interface protocol requires pyarrow 11.0.0 or later") + from polars.testing import assert_frame_equal n = 4050 # Input a polars DataFrame (which follows the DataFrame Interface Protocol) - order_items = pl.DataFrame({ - "menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n) - }) + order_items = pl.DataFrame({"menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n)}) vega_spec = order_items_spec() datasets, warnings = vf.runtime.pre_transform_datasets( @@ -1604,7 +1646,7 @@ def test_pre_transform_dataset_dataframe_interface_protocol(): ["data_0"], inline_datasets={ "order_items": order_items, - } + }, ) assert len(warnings) == 0 assert len(datasets) == 1 @@ -1622,9 +1664,7 @@ def test_pre_transform_dataset_duckdb_conn(): n = 4050 # Input a polars DataFrame (which follows the DataFrame Interface Protocol) - order_items = pd.DataFrame({ - "menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n) - }) + order_items = pd.DataFrame({"menu_item": [0] * n + [1] * (2 * n) + [2] * (3 * n)}) try: # Create duckdb connection and register order_items with duckdb @@ -1655,9 +1695,9 @@ def test_pre_transform_dataset_duckdb_with_decimal_conn(): n = 4050 # Input a polars DataFrame (which follows the DataFrame Interface Protocol) - order_items = pd.DataFrame({ - "menu_item_int": [0] * n + [1] * (2 * n) + [2] * (3 * n) - }) + order_items = pd.DataFrame( + {"menu_item_int": [0] * n + [1] * (2 * n) + [2] * (3 * n)} + ) try: # Create duckdb connection and register order_items with duckdb @@ -1680,10 +1720,16 @@ def test_pre_transform_dataset_duckdb_with_decimal_conn(): assert len(datasets) == 1 result = datasets[0] - expected = pd.DataFrame({ - "menu_item": [decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)], - "__count": [n, 2 * n, 3 * n] - }) + expected = pd.DataFrame( + { + "menu_item": [ + decimal.Decimal(0), + decimal.Decimal(1), + decimal.Decimal(2), + ], + "__count": [n, 2 * n, 3 * n], + } + ) pd.testing.assert_frame_equal(result, expected) finally: vf.runtime.set_connection("datafusion") @@ -1692,21 +1738,27 @@ def test_pre_transform_dataset_duckdb_with_decimal_conn(): def test_duckdb_timestamp_with_timezone(): try: vf.runtime.set_connection("duckdb") - dates_df = pd.DataFrame({ - "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], - }) + dates_df = pd.DataFrame( + { + "date_col": [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)], + } + ) dates_df["date_col"] = pd.to_datetime(dates_df.date_col).dt.tz_localize("UTC") spec = date_column_spec() - (output_ds,), warnings = vf.runtime.pre_transform_datasets( - spec, ["data_0"], "America/New_York", default_input_tz="UTC", inline_datasets=dict(dates=dates_df) + (output_ds,), _warnings = vf.runtime.pre_transform_datasets( + spec, + ["data_0"], + "America/New_York", + default_input_tz="UTC", + inline_datasets={"dates": dates_df}, ) # Timestamps are in the local timezone, so they should be midnight local time assert list(output_ds.date_col) == [ - pd.Timestamp('2022-01-01 00:00:00', tz='UTC'), - pd.Timestamp('2022-01-02 00:00:00', tz='UTC'), - pd.Timestamp('2022-01-03 00:00:00', tz='UTC') + pd.Timestamp("2022-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2022-01-02 00:00:00", tz="UTC"), + pd.Timestamp("2022-01-03 00:00:00", tz="UTC"), ] finally: vf.runtime.set_connection("datafusion") @@ -1717,12 +1769,16 @@ def test_gh_268_hang(): Tests for hang reported in https://github.com/hex-inc/vegafusion/issues/268 """ vf.runtime.set_connection("datafusion") - movies = pd.read_json("https://raw.githubusercontent.com/vega/vega-datasets/main/data/movies.json") + movies = pd.read_json( + "https://raw.githubusercontent.com/vega/vega-datasets/main/data/movies.json" + ) spec = gh_268_hang_spec() for i in range(20): # Break cache by removing one row each iteration movies_inner = movies.iloc[i:] - vf.runtime.pre_transform_datasets(spec, ["data_3"], inline_datasets=dict(movies_clean=movies_inner)) + vf.runtime.pre_transform_datasets( + spec, ["data_3"], inline_datasets={"movies_clean": movies_inner} + ) def test_repeat_duckdb(): @@ -1730,26 +1786,32 @@ def test_repeat_duckdb(): Tests for hang reported in https://github.com/hex-inc/vegafusion/issues/268 """ vf.runtime.set_connection("duckdb") - movies = pd.read_json("https://raw.githubusercontent.com/vega/vega-datasets/main/data/movies.json") + movies = pd.read_json( + "https://raw.githubusercontent.com/vega/vega-datasets/main/data/movies.json" + ) spec = gh_268_hang_spec() - for i in range(2): - vf.runtime.pre_transform_datasets(spec, ["data_3"], inline_datasets=dict(movies_clean=movies)) + for _ in range(2): + vf.runtime.pre_transform_datasets( + spec, ["data_3"], inline_datasets={"movies_clean": movies} + ) @pytest.mark.parametrize("connection", get_connections()) def test_pivot_mixed_case(connection): vf.runtime.set_connection(connection) - source_0 = pd.DataFrame.from_records([ - {"country": "Norway", "type": "gold", "count": 14}, - {"country": "Norway", "type": "silver", "count": 14}, - {"country": "Norway", "type": "Gold", "count": 11}, - {"country": "Germany", "type": "gold", "count": 14}, - {"country": "Germany", "type": "silver", "count": 10}, - {"country": "Germany", "type": "bronze", "count": 7}, - {"country": "Canada", "type": "gold", "count": 11}, - {"country": "Canada", "type": "silver", "count": 8}, - {"country": "Canada", "type": "bronze", "count": 10} - ]) + source_0 = pd.DataFrame.from_records( + [ + {"country": "Norway", "type": "gold", "count": 14}, + {"country": "Norway", "type": "silver", "count": 14}, + {"country": "Norway", "type": "Gold", "count": 11}, + {"country": "Germany", "type": "gold", "count": 14}, + {"country": "Germany", "type": "silver", "count": 10}, + {"country": "Germany", "type": "bronze", "count": 7}, + {"country": "Canada", "type": "gold", "count": 11}, + {"country": "Canada", "type": "silver", "count": 8}, + {"country": "Canada", "type": "bronze", "count": 10}, + ] + ) spec = json.loads(r""" { "$schema": "https://vega.github.io/schema/vega/v5.json", @@ -1771,27 +1833,32 @@ def test_pivot_mixed_case(connection): ] } ] -} +} """) - datasets, warnings = vf.runtime.pre_transform_datasets( - spec, ["data_0"], inline_datasets=dict(source_0=source_0) + datasets, _warnings = vf.runtime.pre_transform_datasets( + spec, ["data_0"], inline_datasets={"source_0": source_0} ) - assert set(datasets[0].columns.tolist()) == {"gold", "Gold", "silver", "bronze", "country"} + assert set(datasets[0].columns.tolist()) == { + "gold", + "Gold", + "silver", + "bronze", + "country", + } def test_keep_signals(): spec = manual_histogram_spec() # pre-transform without keep_signals. No signals should be present in pre-transformed spec - tx_spec, warnings = vf.runtime.pre_transform_spec(spec) + tx_spec, _warnings = vf.runtime.pre_transform_spec(spec) assert len(tx_spec.get("signals", [])) == 0 # Specify single keep_signal as a string - tx_spec, warnings = vf.runtime.pre_transform_spec( - spec, - keep_signals="layer_0_layer_0_bin_maxbins_10_IMDB_Rating_bins" + tx_spec, _warnings = vf.runtime.pre_transform_spec( + spec, keep_signals="layer_0_layer_0_bin_maxbins_10_IMDB_Rating_bins" ) assert len(tx_spec.get("signals", [])) == 1 sig0 = tx_spec["signals"][0] @@ -1799,12 +1866,12 @@ def test_keep_signals(): assert sig0["value"]["step"] == 1.0 # Specify multiple keep_signals as a list - tx_spec, warnings = vf.runtime.pre_transform_spec( + tx_spec, _warnings = vf.runtime.pre_transform_spec( spec, keep_signals=[ "layer_0_layer_0_bin_maxbins_10_IMDB_Rating_bins", - ("layer_0_layer_0_bin_maxbins_10_IMDB_Rating_extent", []) - ] + ("layer_0_layer_0_bin_maxbins_10_IMDB_Rating_extent", []), + ], ) assert len(tx_spec.get("signals", [])) == 2 sig0 = tx_spec["signals"][0] @@ -1817,12 +1884,17 @@ def test_keep_signals(): def test_empty_histogram(): spec = empty_histogram_spec() - empty_df = pd.DataFrame({ 'col': []}) - (data_0,), warnings = vf.runtime.pre_transform_datasets( - spec, ["data_0"], inline_datasets=dict(empty_df=empty_df) + empty_df = pd.DataFrame({"col": []}) + (data_0,), _warnings = vf.runtime.pre_transform_datasets( + spec, ["data_0"], inline_datasets={"empty_df": empty_df} ) assert data_0.empty - assert data_0.columns.tolist() == ["__bin_field_name", "__bin_field_name_end", "__count", "__bin_range"] + assert data_0.columns.tolist() == [ + "__bin_field_name", + "__bin_field_name_end", + "__count", + "__bin_range", + ] def test_pre_transform_spec_encoded_datasets(): @@ -1830,7 +1902,7 @@ def test_pre_transform_spec_encoded_datasets(): vega_spec = movies_histogram_spec() # default list of dict format - tx_spec, warnings = vf.runtime.pre_transform_spec( + tx_spec, _warnings = vf.runtime.pre_transform_spec( vega_spec, data_encoding_threshold=10, data_encoding_format="pyarrow" ) @@ -1839,7 +1911,7 @@ def test_pre_transform_spec_encoded_datasets(): assert len(values) == 9 # pyarrow format - tx_spec, warnings = vf.runtime.pre_transform_spec( + tx_spec, _warnings = vf.runtime.pre_transform_spec( vega_spec, data_encoding_threshold=0, data_encoding_format="pyarrow" ) @@ -1850,7 +1922,7 @@ def test_pre_transform_spec_encoded_datasets(): assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc format - tx_spec, warnings = vf.runtime.pre_transform_spec( + tx_spec, _warnings = vf.runtime.pre_transform_spec( vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc" ) @@ -1861,7 +1933,7 @@ def test_pre_transform_spec_encoded_datasets(): assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" # arrow-ipc-base64 format - tx_spec, warnings = vf.runtime.pre_transform_spec( + tx_spec, _warnings = vf.runtime.pre_transform_spec( vega_spec, data_encoding_threshold=0, data_encoding_format="arrow-ipc-base64" ) @@ -1870,4 +1942,3 @@ def test_pre_transform_spec_encoded_datasets(): values_df = pa.ipc.deserialize_pandas(base64.standard_b64decode(values)) assert len(values_df) == 9 assert values_df.columns[0] == "bin_maxbins_10_IMDB Rating" - diff --git a/vegafusion-python/tests/test_pretransform_specs.py b/vegafusion-python/tests/test_pretransform_specs.py index 1b6787b0e..57b5ead6d 100644 --- a/vegafusion-python/tests/test_pretransform_specs.py +++ b/vegafusion-python/tests/test_pretransform_specs.py @@ -1,13 +1,15 @@ -from pathlib import Path import json -import vegafusion as vf -from vl_convert import vega_to_png - from io import BytesIO +from pathlib import Path + import pyarrow as pa +import pytest from skimage.io import imread from skimage.metrics import structural_similarity as ssim -import pytest +from vl_convert import vega_to_png + +import vegafusion as vf + here = Path(__file__).parent spec_dir = here / ".." / ".." / "vegafusion-runtime" / "tests" / "specs" @@ -59,10 +61,8 @@ def maybe_skip(category, name): pytest.skip("Non-deterministic specification") -@pytest.mark.parametrize( - "category,name", load_test_cases()) +@pytest.mark.parametrize("category,name", load_test_cases()) def test_it(category, name): - maybe_skip(category, name) # Load spec into dict @@ -83,10 +83,14 @@ def test_it(category, name): img_duckdb = imread(BytesIO(vega_to_png(transformed))) # Compare images - assert img_datafusion.shape == img_duckdb.shape, "Size mismatch between datafusion and duckdb connections" + assert ( + img_datafusion.shape == img_duckdb.shape + ), "Size mismatch between datafusion and duckdb connections" similarity = ssim(img_datafusion, img_duckdb, channel_axis=2) print(similarity) - assert similarity >= 0.998, f"Similarity failed between datafusion and duckdb connections" + assert ( + similarity >= 0.998 + ), "Similarity failed between datafusion and duckdb connections" def test_pretransform_extract(): @@ -94,12 +98,12 @@ def test_pretransform_extract(): spec = json.loads(spec_file.read_text("utf8")) vf.runtime.set_connection("datafusion") - (transformed, datasets, warnings) = vf.runtime.pre_transform_extract(spec, "UTC") + (_transformed, datasets, warnings) = vf.runtime.pre_transform_extract(spec, "UTC") assert len(warnings) == 0 assert len(datasets) == 1 - (name, scope, table)= datasets[0] + (name, scope, table) = datasets[0] assert name == "source_0" assert scope == [] assert isinstance(table, pa.Table) diff --git a/vegafusion-python/tests/test_sql_dataset.py b/vegafusion-python/tests/test_sql_dataset.py index 681ab718d..678f48228 100644 --- a/vegafusion-python/tests/test_sql_dataset.py +++ b/vegafusion-python/tests/test_sql_dataset.py @@ -1,7 +1,8 @@ -from vegafusion.dataset.duckdb import DuckDbDataset import duckdb import pytest +from vegafusion.dataset.duckdb import DuckDbDataset + def test_sql_dataset_dfi(): try: diff --git a/vegafusion-python/tests/test_transformed_data.py b/vegafusion-python/tests/test_transformed_data.py index 81cb1e97b..04558ad9f 100644 --- a/vegafusion-python/tests/test_transformed_data.py +++ b/vegafusion-python/tests/test_transformed_data.py @@ -1,15 +1,16 @@ from pathlib import Path +import altair as alt import pandas as pd +import polars as pl import pyarrow as pa import pytest from altair.utils.execeval import eval_block -import vegafusion as vf from vega_datasets import data -import polars as pl -import altair as alt -pa_major_minor = tuple((int(v) for v in pa.__version__.split(".")[:2])) +import vegafusion as vf + +pa_major_minor = tuple(int(v) for v in pa.__version__.split(".")[:2]) here = Path(__file__).parent altair_mocks_dir = here / "altair_mocks" @@ -17,59 +18,136 @@ def get_connections(): connections = ["datafusion"] - try: - import duckdb + + from importlib.util import find_spec + + if find_spec("duckdb") is not None: connections.append("duckdb") - except ImportError: - pass return connections @pytest.mark.parametrize( - "mock_name,expected_len,expected_cols", [ + "mock_name,expected_len,expected_cols", + [ ("area/cumulative_count", 3201, ["Running_Time_min", "cumulative_count"]), ("area/gradient", 68, ["symbol", "date", "price"]), ("area/layered", 51, ["year", "source", "net_generation"]), - ("area/normalized_stacked", 51, ["year", "source", "net_generation_start", "net_generation_end"]), - ("area/streamgraph", 1708, ["series", "yearmonth_date", "sum_count_start", "sum_count_end"]), - ("area/trellis", 51, ["year", "source", "net_generation_start", "net_generation_end"]), - ("area/trellis_sort_array", 492, ["symbol", "date", "row_symbol_sort_index", "price_start", "price_end"]), - ("bar/diverging_stacked", 40, ["question", "percentage", "percentage_start", "percentage_end"]), + ( + "area/normalized_stacked", + 51, + ["year", "source", "net_generation_start", "net_generation_end"], + ), + ( + "area/streamgraph", + 1708, + ["series", "yearmonth_date", "sum_count_start", "sum_count_end"], + ), + ( + "area/trellis", + 51, + ["year", "source", "net_generation_start", "net_generation_end"], + ), + ( + "area/trellis_sort_array", + 492, + ["symbol", "date", "row_symbol_sort_index", "price_start", "price_end"], + ), + ( + "bar/diverging_stacked", + 40, + ["question", "percentage", "percentage_start", "percentage_end"], + ), ("bar/grouped", 12, ["year", "site", "sum_yield"]), ("bar/horizontal", 52, ["year", "wheat", "wages"]), ("bar/horizontal_grouped", 12, ["year", "site", "sum_yield"]), - ("bar/horizontal_stacked", 60, ["site", "variety", "sum_yield_start", "sum_yield_end"]), + ( + "bar/horizontal_stacked", + 60, + ["site", "variety", "sum_yield_start", "sum_yield_end"], + ), ("bar/layered", 51, ["year", "source", "net_generation"]), - ("bar/normalized_stacked", 60, ["site", "variety", "sum_yield_start", "sum_yield_end"]), - ("bar/percentage_of_total", 5, ["Activity", "Time", "TotalTime", "PercentOfTotal"]), + ( + "bar/normalized_stacked", + 60, + ["site", "variety", "sum_yield_start", "sum_yield_end"], + ), + ( + "bar/percentage_of_total", + 5, + ["Activity", "Time", "TotalTime", "PercentOfTotal"], + ), ("bar/sorted", 6, ["site", "sum_yield"]), ("bar/stacked", 60, ["site", "variety", "sum_yield_start", "sum_yield_end"]), - ("bar/stacked_with_sorted_segments", 60, ["site", "variety", "sum_yield_start", "sum_yield_end"]), + ( + "bar/stacked_with_sorted_segments", + 60, + ["site", "variety", "sum_yield_start", "sum_yield_end"], + ), ("bar/trellis_compact", 27, ["a", "b", "c", "p"]), - ("bar/trellis_stacked", 120, ["yield", "variety", "year", "yield_start", "yield_end"]), + ( + "bar/trellis_stacked", + 120, + ["yield", "variety", "year", "yield_start", "yield_end"], + ), ("bar/with_highlighted_bar", 52, ["year", "wheat", "wages"]), ("bar/with_negative_values", 120, ["month", "nonfarm_change"]), - ("bar/with_rounded_edges", 53, ["weather", "month_date", "__count_start", "__count_end"]), + ( + "bar/with_rounded_edges", + 53, + ["weather", "month_date", "__count_start", "__count_end"], + ), ("casestudy/anscombe_plot", 44, ["Series", "X", "Y"]), - ("casestudy/beckers_barley_trellis_plot", 120, ["yield", "variety", "year", "site"]), + ( + "casestudy/beckers_barley_trellis_plot", + 120, + ["yield", "variety", "year", "site"], + ), ("casestudy/gapminder_bubble_plot", 187, ["country", "health", "population"]), - ("casestudy/iowa_electricity", 51, ["year", "net_generation_start", "net_generation_end"]), + ( + "casestudy/iowa_electricity", + 51, + ["year", "net_generation_start", "net_generation_end"], + ), ("casestudy/isotype", 37, ["country", "animal", "x"]), ("casestudy/natural_disasters", 686, ["Entity", "Year", "Deaths"]), ("casestudy/top_k_items", 9, ["Title", "IMDB_Rating_start", "IMDB_Rating_end"]), ("casestudy/top_k_letters", 9, ["letters", "count", "rank"]), - ("casestudy/top_k_with_others", 10, ["ranked_director", "mean_aggregate_gross"]), + ( + "casestudy/top_k_with_others", + 10, + ["ranked_director", "mean_aggregate_gross"], + ), ("casestudy/us_population_over_time_facet", 285, ["age", "year", "sum_people"]), ("casestudy/window_rank", 12, ["team", "matchday", "rank"]), ("circular/donut", 6, ["category", "value_start", "value_end"]), ("circular/pie", 6, ["category", "value_start", "value_end"]), - ("histogram/trellis", 20, ["Origin", "__count", "bin_maxbins_10_Horsepower_end"]), - ("histogram/layered", 113, ["Experiment", "__count", "bin_maxbins_100_Measurement"]), + ( + "histogram/trellis", + 20, + ["Origin", "__count", "bin_maxbins_10_Horsepower_end"], + ), + ( + "histogram/layered", + 113, + ["Experiment", "__count", "bin_maxbins_100_Measurement"], + ), ("interactive/brush", 392, ["Name", "Cylinders", "Origin"]), - ("interactive/casestudy-us_population_over_time", 38, ["year", "age", "sex", "people"]), - ("interactive/casestudy-weather_heatmap", 365, ["monthdate_date", "date_date", "max_temp"]), - ("interactive/legend", 1708, ["yearmonth_date", "sum_count_start", "sum_count_end"]), + ( + "interactive/casestudy-us_population_over_time", + 38, + ["year", "age", "sex", "people"], + ), + ( + "interactive/casestudy-weather_heatmap", + 365, + ["monthdate_date", "date_date", "max_temp"], + ), + ( + "interactive/legend", + 1708, + ["yearmonth_date", "sum_count_start", "sum_count_end"], + ), ("interactive/other-image_tooltip", 2, ["a", "b", "image"]), ("interactive/scatter-href", 392, ["Name", "Horsepower", "url"]), ("interactive/scatter_plot", 392, ["Name", "Horsepower", "Year"]), @@ -83,7 +161,11 @@ def get_connections(): ("line/with_generator", 256, ["x", "sin", "cos", "key", "value"]), ("line/with_logarithmic_scale", 15, ["year", "sum_people"]), ("line/with_points", 100, ["x", "f(x)"]), - ("other/beckers_barley_wrapped_facet", 120, ["variety", "site", "median_yield"]), + ( + "other/beckers_barley_wrapped_facet", + 120, + ["variety", "site", "median_yield"], + ), ("other/binned_heatmap", 378, ["__count", "bin_maxbins_60_IMDB_Rating_end"]), ("other/boxplot", 19, ["age", "mid_box_people"]), ("other/comet_chart", 120, ["variety", "1932", "delta"]), @@ -94,13 +176,25 @@ def get_connections(): ("other/stem_and_leaf", 100, ["samples", "stem", "leaf", "position"]), ("other/wilkinson_dot_plot", 21, ["data", "id"]), ("other/parallel_coordinates", 600, ["sepalWidth", "index", "key", "value"]), - ("other/normed_parallel_coordinates", 600, ["sepalWidth", "minmax_value", "mid"]), + ( + "other/normed_parallel_coordinates", + 600, + ["sepalWidth", "minmax_value", "mid"], + ), ("other/ridgeline_plot", 108, ["Month", "mean_temp", "value"]), - ("scatter/binned", 64, ["__count", "bin_maxbins_10_Rotten_Tomatoes_Rating_end"]), + ( + "scatter/binned", + 64, + ["__count", "bin_maxbins_10_Rotten_Tomatoes_Rating_end"], + ), ("scatter/bubble_plot", 392, ["Name", "Cylinders", "Origin"]), ("scatter/connected", 55, ["side", "year", "miles", "gas"]), ("scatter/multifeature", 150, ["sepalLength", "petalLength", "species"]), - ("scatter/table_bubble_plot_github", 168, ["hours_time", "day_time", "sum_count"]), + ( + "scatter/table_bubble_plot_github", + 168, + ["hours_time", "day_time", "sum_count"], + ), ("scatter/trellis", 392, ["Name", "Cylinders", "Year"]), ("simple/bar_chart", 9, ["a", "b"]), ("simple/heatmap", 100, ["x", "y", "z"]), @@ -108,7 +202,7 @@ def get_connections(): ("simple/scatter_tooltips", 392, ["Name", "Cylinders", "Year"]), ("simple/stacked_bar_chart", 51, ["year", "source", "net_generation_end"]), ("simple/strip_chart", 400, ["Name", "Cylinders", "Origin"]), - ] + ], ) @pytest.mark.parametrize("connection", get_connections()) def test_transformed_data_for_mock(mock_name, expected_len, expected_cols, connection): @@ -135,49 +229,140 @@ def test_transformed_data_for_mock(mock_name, expected_len, expected_cols, conne @pytest.mark.parametrize( - "mock_name,expected_lens,all_expected_cols", [ + "mock_name,expected_lens,all_expected_cols", + [ ("area/horizon_graph", [20, 20], [["x", "y"], ["x", "y", "ny"]]), ("bar/and_tick_chart", [7, 7], [["goal", "score_start"], ["project", "goal"]]), - ("bar/stacked_with_text_overlay", [60, 60], [["site", "sum_yield_start"], ["variety", "sum_yield_end"]]), + ( + "bar/stacked_with_text_overlay", + [60, 60], + [["site", "sum_yield_start"], ["variety", "sum_yield_end"]], + ), ("bar/with_labels", [52, 52], [["wages", "wheat_start"], ["wheat", "wages"]]), ("bar/with_line_at_mean", [52, 1], [["wages", "wheat_start"], ["mean_wheat"]]), - ("bar/with_line_on_dual_axis", [52, 52], [["wages", "wheat_start"], ["wheat", "wages"]]), - ("bar/with_rolling_mean", [52, 52], [["wages", "wheat_start"], ["wheat", "wages"]]), - ("casestudy/co2_concentration", [713, 7, 7], [["year", "decade"], ["scaled_date", "first_date"], ["end"]]), - ("casestudy/falkensee", [2, 38, 38], [["event", "start"], ["population", "year"], ["year"]]), - ("casestudy/us_employment", [120, 1, 2], [["construction"], ["president", "end"], ["start"]]), - ("casestudy/wheat_wages", [52, 52, 52, 52], [["wheat"], ["year_end"], ["year"], ["year"]]), - ("histogram/with_a_global_mean_overlay", [9, 1], [["bin_maxbins_10_IMDB_Rating_end"], ["mean_IMDB_Rating"]]), - ("interactive/area-interval_selection", [123, 123], [["price_start"], ["price_end"]]), - ("interactive/casestudy-seattle_weather_interactive", [1461, 5], [["monthdate_date"], ["__count"]]), - ("interactive/casestudy-us_population_pyramid_over_time", [19, 38, 19], [["sum_people"], ["people"], ["sum_people_end"]]), - ("interactive/cross_highlight", [64, 64, 13], [["__count"], ["__count"], ["__count"]]), + ( + "bar/with_line_on_dual_axis", + [52, 52], + [["wages", "wheat_start"], ["wheat", "wages"]], + ), + ( + "bar/with_rolling_mean", + [52, 52], + [["wages", "wheat_start"], ["wheat", "wages"]], + ), + ( + "casestudy/co2_concentration", + [713, 7, 7], + [["year", "decade"], ["scaled_date", "first_date"], ["end"]], + ), + ( + "casestudy/falkensee", + [2, 38, 38], + [["event", "start"], ["population", "year"], ["year"]], + ), + ( + "casestudy/us_employment", + [120, 1, 2], + [["construction"], ["president", "end"], ["start"]], + ), + ( + "casestudy/wheat_wages", + [52, 52, 52, 52], + [["wheat"], ["year_end"], ["year"], ["year"]], + ), + ( + "histogram/with_a_global_mean_overlay", + [9, 1], + [["bin_maxbins_10_IMDB_Rating_end"], ["mean_IMDB_Rating"]], + ), + ( + "interactive/area-interval_selection", + [123, 123], + [["price_start"], ["price_end"]], + ), + ( + "interactive/casestudy-seattle_weather_interactive", + [1461, 5], + [["monthdate_date"], ["__count"]], + ), + ( + "interactive/casestudy-us_population_pyramid_over_time", + [19, 38, 19], + [["sum_people"], ["people"], ["sum_people_end"]], + ), + ( + "interactive/cross_highlight", + [64, 64, 13], + [["__count"], ["__count"], ["__count"]], + ), ("interactive/histogram-responsive", [20, 20], [["__count"], ["__count"]]), ("interactive/multiline_highlight", [560, 560], [["price"], ["price"]]), - ("interactive/multiline_tooltip", [300, 300, 300], [["x"], ["y"], ["category"]]), - ("interactive/scatter-with_linked_table", [392, 19, 19, 19], [["Year"], ["rank"], ["rank"], ["rank"]]), + ( + "interactive/multiline_tooltip", + [300, 300, 300], + [["x"], ["y"], ["category"]], + ), + ( + "interactive/scatter-with_linked_table", + [392, 19, 19, 19], + [["Year"], ["rank"], ["rank"], ["rank"]], + ), ("interactive/scatter-with_minimap", [1461, 1461], [["weather"], ["weather"]]), - ("interactive/scatter_with_layered_histogram", [2, 19], [["mean_height"], ["bin_step_5_age"]]), + ( + "interactive/scatter_with_layered_histogram", + [2, 19], + [["mean_height"], ["bin_step_5_age"]], + ), ("interactive/select_detail", [20, 1000], [["mean_y"], ["value"]]), - ("interactive/select_mark_area", [122, 122], [["sum_count"], ["yearmonth_date"]]), + ( + "interactive/select_mark_area", + [122, 122], + [["sum_count"], ["yearmonth_date"]], + ), ("interactive/selection_histogram", [392, 3], [["Cylinders"], ["__count"]]), - ("interactive/selection_layer_bar_month", [12, 1], [["mean_precipitation"], ["mean_precipitation"]]), + ( + "interactive/selection_layer_bar_month", + [12, 1], + [["mean_precipitation"], ["mean_precipitation"]], + ), ("line/layer_line_color_rule", [560, 5], [["symbol"], ["average_price"]]), - ("other/bar_chart_with_highlighted_segment", [52, 1, 1], [["wheat_start"], ["baseline"], ["threshold"]]), + ( + "other/bar_chart_with_highlighted_segment", + [52, 1, 1], + [["wheat_start"], ["baseline"], ["threshold"]], + ), ("other/candlestick_chart", [44, 44], [["ret"], ["signal"]]), ("other/errorbars_with_std", [10, 10], [["mean_yield"], ["variety"]]), - ("other/layered_chart_with_dual_axis", [12, 12], [["average_temp_max"], ["average_temp_max"]]), + ( + "other/layered_chart_with_dual_axis", + [12, 12], + [["average_temp_max"], ["average_temp_max"]], + ), ("other/layered_heatmap_text", [9, 9], [["Origin"], ["num_cars"]]), ("other/ranged_dot_plot", [10, 10], [["life_expect"], ["country"]]), - ("other/scatter_marginal_hist", [34, 150, 27], [["__count"], ["species"], ["__count"]]), - ("scatter/dot_dash_plot", [400, 392, 398], [["Cylinders"], ["Cylinders"], ["Cylinders"]]), + ( + "other/scatter_marginal_hist", + [34, 150, 27], + [["__count"], ["species"], ["__count"]], + ), + ( + "scatter/dot_dash_plot", + [400, 392, 398], + [["Cylinders"], ["Cylinders"], ["Cylinders"]], + ), ("scatter/with_errorbars", [5, 5], [["ymin"], ["upper_ymin"]]), ("scatter/with_labels", [5, 5], [["x"], ["label"]]), - ("scatter/with_rolling_mean", [1461, 1461], [["precipitation"], ["rolling_mean"]]), - ] + ( + "scatter/with_rolling_mean", + [1461, 1461], + [["precipitation"], ["rolling_mean"]], + ), + ], ) @pytest.mark.parametrize("connection", get_connections()) -def test_multi_transformed_data_for_mock(mock_name, expected_lens, all_expected_cols, connection): +def test_multi_transformed_data_for_mock( + mock_name, expected_lens, all_expected_cols, connection +): vf.runtime.set_connection(connection) mock_path = altair_mocks_dir / mock_name / "mock.py" mock_src = mock_path.read_text("utf8") @@ -226,13 +411,10 @@ def test_gh_286(): vf.runtime.set_connection("datafusion") source = pl.from_pandas(data.seattle_weather()) - chart = alt.Chart(source).mark_bar( - cornerRadiusTopLeft=3, - cornerRadiusTopRight=3 - ).encode( - x='month(date):O', - y='count():Q', - color='weather:N' + chart = ( + alt.Chart(source) + .mark_bar(cornerRadiusTopLeft=3, cornerRadiusTopRight=3) + .encode(x="month(date):O", y="count():Q", color="weather:N") ) transformed = chart.transformed_data() assert isinstance(transformed, pl.DataFrame) @@ -243,14 +425,14 @@ def test_gh_286(): def test_categorical_columns(connection): vf.runtime.set_connection(connection) - df = pd.DataFrame({ - "a": [0, 1, 2, 3, 4, 5], - "categorical": pd.Categorical.from_codes([0, 1, 0, 1, 1, 0], ["A", "BB"]) - }) - - chart = alt.Chart(df).mark_bar().encode( - alt.X("categorical:N"), alt.Y("sum(a):Q") + df = pd.DataFrame( + { + "a": [0, 1, 2, 3, 4, 5], + "categorical": pd.Categorical.from_codes([0, 1, 0, 1, 1, 0], ["A", "BB"]), + } ) + + chart = alt.Chart(df).mark_bar().encode(alt.X("categorical:N"), alt.Y("sum(a):Q")) transformed = chart.transformed_data() expected = pd.DataFrame({"categorical": ["A", "BB"], "sum_a": [7, 8]}) - pd.testing.assert_frame_equal(transformed, expected) \ No newline at end of file + pd.testing.assert_frame_equal(transformed, expected) diff --git a/vegafusion-python/tests/test_transformer.py b/vegafusion-python/tests/test_transformer.py index 4b7d132f9..8852bb068 100644 --- a/vegafusion-python/tests/test_transformer.py +++ b/vegafusion-python/tests/test_transformer.py @@ -1,14 +1,14 @@ +from decimal import Decimal + import pandas as pd import pyarrow as pa -from decimal import Decimal + from vegafusion.transformer import to_arrow_table + def test_to_arrow_expands_categoricals(): # Build DataFrame with one categorical column - df = pd.DataFrame({ - "a": [1, 2, 3], - "b": ["One", "One", "Two"] - }) + df = pd.DataFrame({"a": [1, 2, 3], "b": ["One", "One", "Two"]}) df["b"] = df["b"].astype("category") assert isinstance(df["b"].dtype, pd.CategoricalDtype) @@ -22,10 +22,9 @@ def test_to_arrow_expands_categoricals(): def test_to_table_converts_decimals(): # Build DataFrame with one Decimal column - df = pd.DataFrame({ - "a": [1, 2, 3], - "b": [Decimal("3.12"), Decimal("4.9"), Decimal("6")] - }) + df = pd.DataFrame( + {"a": [1, 2, 3], "b": [Decimal("3.12"), Decimal("4.9"), Decimal("6")]} + ) assert df["b"].dtype.kind == "O" # Convert to pyarrow table @@ -38,10 +37,7 @@ def test_to_table_converts_decimals(): def test_to_table_with_mixed_string_int_column(): # Build DataFrame with one Decimal column - df = pd.DataFrame({ - "a": [1, 2, 3], - "b": ["A", "B", 3] - }) + df = pd.DataFrame({"a": [1, 2, 3], "b": ["A", "B", 3]}) assert df["b"].dtype.kind == "O" # Convert to pyarrow table @@ -53,12 +49,14 @@ def test_to_table_with_mixed_string_int_column(): def test_to_table_with_all_conversions(): - df = pd.DataFrame({ - "a": [1, 2, 3], - "b": ["One", "One", "Two"], - "c": [Decimal("3.12"), Decimal("4.9"), Decimal("6")], - "d": ["A", "B", 3] - }) + df = pd.DataFrame( + { + "a": [1, 2, 3], + "b": ["One", "One", "Two"], + "c": [Decimal("3.12"), Decimal("4.9"), Decimal("6")], + "d": ["A", "B", 3], + } + ) df["b"] = df["b"].astype("category") # Check initial pandas column types diff --git a/vegafusion-python/vegafusion/__init__.py b/vegafusion-python/vegafusion/__init__.py index 79b4a1fae..ecd8a6525 100644 --- a/vegafusion-python/vegafusion/__init__.py +++ b/vegafusion-python/vegafusion/__init__.py @@ -1,20 +1,27 @@ -from .runtime import runtime -from .transformer import to_feather, get_inline_datasets_for_spec -from .local_tz import set_local_tz, get_local_tz -from importlib.abc import MetaPathFinder, Loader -from importlib.metadata import version as _original_version import importlib.metadata +from importlib.metadata import version as _original_version +from typing import cast from ._vegafusion import __version__ +from .local_tz import get_local_tz, set_local_tz +from .runtime import runtime -def patched_version(distribution_name): + +def patched_version(distribution_name: str) -> str: """ Fake the version of the vegafusion-python-embed package to match the version of the vegafusion package. This is just to satisfy Altair's version check. """ if distribution_name == "vegafusion-python-embed": - return __version__ + return cast(str, __version__) return _original_version(distribution_name) + # Patch importlib.metadata.version to handle our dummy package -importlib.metadata.version = patched_version \ No newline at end of file +importlib.metadata.version = patched_version + +__all__ = [ + "runtime", + "set_local_tz", + "get_local_tz", +] diff --git a/vegafusion-python/vegafusion/connection/__init__.py b/vegafusion-python/vegafusion/connection/__init__.py index 56984240c..b5381dc76 100644 --- a/vegafusion-python/vegafusion/connection/__init__.py +++ b/vegafusion-python/vegafusion/connection/__init__.py @@ -1,10 +1,12 @@ -from typing import Dict, Optional, TYPE_CHECKING -from dataclasses import dataclass +from __future__ import annotations + from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING if TYPE_CHECKING: - from pyarrow import Schema, Table from pandas import DataFrame + from pyarrow import Schema, Table @dataclass @@ -12,10 +14,11 @@ class CsvReadOptions: """ CSV Read configuration options """ + has_header: bool delimeter: str file_extension: str - schema: Optional["Schema"] + schema: Schema | None class RegistrationNotSupportedError(RuntimeError): @@ -26,6 +29,7 @@ class SqlConnection(ABC): """ Python interface for SQL connections """ + @classmethod def dialect(cls) -> str: """ @@ -46,7 +50,7 @@ def dialect(cls) -> str: raise NotImplementedError() @abstractmethod - def tables(self) -> Dict[str, "Schema"]: + def tables(self) -> dict[str, Schema]: """ Returns the names and schema for the tables that are provided by the connection. These are the tables that may be referenced by SQL queries passed to the @@ -57,7 +61,7 @@ def tables(self) -> Dict[str, "Schema"]: raise NotImplementedError() @abstractmethod - def fetch_query(self, query: str, schema: "Schema") -> "Table": + def fetch_query(self, query: str, schema: Schema) -> Table: """ Returns the result of evaluating the requested query. The resulting pa.Table should have a schema matching the provided schema @@ -77,7 +81,9 @@ def fallback(self) -> bool: """ return True - def register_pandas(self, name: str, df: "DataFrame", temporary: bool = False): + def register_pandas( + self, name: str, df: DataFrame, temporary: bool = False + ) -> None: """ Register the provided pandas DataFrame as a table with the provided name @@ -86,9 +92,11 @@ def register_pandas(self, name: str, df: "DataFrame", temporary: bool = False): :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of pandas datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of pandas datasets" + ) - def register_arrow(self, name: str, table: "Table", temporary: bool = False): + def register_arrow(self, name: str, table: Table, temporary: bool = False) -> None: """ Register the provided pyarrow Table as a table with the provided name :param name: Table name @@ -96,9 +104,11 @@ def register_arrow(self, name: str, table: "Table", temporary: bool = False): :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of arrow datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of arrow datasets" + ) - def register_json(self, name: str, path: str, temporary: bool = False): + def register_json(self, name: str, path: str, temporary: bool = False) -> None: """ Register the JSON file at the provided path as a table with the provided name :param name: Table name @@ -106,9 +116,13 @@ def register_json(self, name: str, path: str, temporary: bool = False): :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of json datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of json datasets" + ) - def register_csv(self, name: str, path: str, options: CsvReadOptions, temporary: bool = False): + def register_csv( + self, name: str, path: str, options: CsvReadOptions, temporary: bool = False + ) -> None: """ Register the CSV file at the provided path as a table with the provided name :param name: Table name @@ -117,9 +131,11 @@ def register_csv(self, name: str, path: str, options: CsvReadOptions, temporary: :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of csv datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of csv datasets" + ) - def register_parquet(self, name: str, path: str, temporary: bool = False): + def register_parquet(self, name: str, path: str, temporary: bool = False) -> None: """ Register the Parquet file at the provided path as a table with the provided name :param name: Table name @@ -127,9 +143,13 @@ def register_parquet(self, name: str, path: str, temporary: bool = False): :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of parquet datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of parquet datasets" + ) - def register_arrow_file(self, name: str, path: str, temporary: bool = False): + def register_arrow_file( + self, name: str, path: str, temporary: bool = False + ) -> None: """ Register the Arrow file at the provided path as a table with the provided name :param name: Table name @@ -137,17 +157,23 @@ def register_arrow_file(self, name: str, path: str, temporary: bool = False): :param temporary: Whether table is considered temporary, and should be removed by unregister_temporary_tables """ - raise RegistrationNotSupportedError("Connection does not support registration of arrow file datasets") + raise RegistrationNotSupportedError( + "Connection does not support registration of arrow file datasets" + ) - def unregister(self, name: str): + def unregister(self, name: str) -> None: """ Unregister a table (temporary or otherwise) by name :param name: Table name """ - raise RegistrationNotSupportedError("Connection does not support unregistration") + raise RegistrationNotSupportedError( + "Connection does not support unregistration" + ) - def unregister_temporary_tables(self): + def unregister_temporary_tables(self) -> None: """ Unregister all dynamically registered tables """ - raise RegistrationNotSupportedError("Connection does not support unregistering temporary tables") + raise RegistrationNotSupportedError( + "Connection does not support unregistering temporary tables" + ) diff --git a/vegafusion-python/vegafusion/connection/duckdb.py b/vegafusion-python/vegafusion/connection/duckdb.py index 574d72d53..91a7971ee 100644 --- a/vegafusion-python/vegafusion/connection/duckdb.py +++ b/vegafusion-python/vegafusion/connection/duckdb.py @@ -1,24 +1,28 @@ +from __future__ import annotations + +import logging import re +import uuid import warnings - -from . import SqlConnection, CsvReadOptions - -from typing import Dict, Optional from distutils.version import LooseVersion +from typing import Any import duckdb +import duckdb.typing +import pandas as pd import pyarrow as pa import pyarrow.feather -import pandas as pd -import logging -import uuid + +from . import CsvReadOptions, SqlConnection # Table suffix name to use for raw registered table RAW_PREFIX = "_vf_raw_" -def duckdb_type_name_to_pyarrow_type(duckdb_type: str) -> pa.DataType: - duckdb_type = str(duckdb_type).upper() +def duckdb_type_name_to_pyarrow_type( + duckdb_type: str, +) -> pa.DataType: + duckdb_type = duckdb_type.upper() if duckdb_type in ("VARCHAR", "JSON", "CHAR", "CATEGORICAL"): return pa.string() elif duckdb_type in ("REAL", "FLOAT4", "FLOAT"): @@ -64,7 +68,7 @@ def duckdb_relation_to_schema(rel: duckdb.DuckDBPyRelation) -> pa.Schema: schema_fields = {} for col, type_name in zip(rel.columns, rel.dtypes): try: - type_ = duckdb_type_name_to_pyarrow_type(type_name) + type_ = duckdb_type_name_to_pyarrow_type(str(type_name)) schema_fields[col] = type_ except ValueError: # Skip columns with unrecognized types @@ -72,7 +76,7 @@ def duckdb_relation_to_schema(rel: duckdb.DuckDBPyRelation) -> pa.Schema: return pa.schema(schema_fields) -def pyarrow_type_to_duckdb_type_name(field_type: pa.Schema) -> Optional[str]: +def pyarrow_type_to_duckdb_type_name(field_type: pa.Schema) -> str | None: if field_type in (pa.utf8(), pa.large_utf8()): return "VARCHAR" elif field_type in (pa.float16(), pa.float32()): @@ -132,29 +136,39 @@ def pyarrow_schema_to_select_replace(schema: pa.Schema, table_name: str) -> str: class DuckDbConnection(SqlConnection): - def __init__(self, connection: duckdb.DuckDBPyConnection = None, fallback: bool = True, verbose: bool = False): + def __init__( + self, + connection: duckdb.DuckDBPyConnection | None = None, + fallback: bool = True, + verbose: bool = False, + ) -> None: # Validate duckdb version if LooseVersion(duckdb.__version__) < LooseVersion("0.7.0"): raise ImportError( - f"The VegaFusion DuckDB connection requires at least DuckDB version 0.7.0\n" + "The VegaFusion DuckDB connection requires at least DuckDB " + "version 0.7.0\n" f"Found version {duckdb.__version__}" ) self._fallback = fallback self._verbose = verbose - self._temp_tables = set() + self._temp_tables: set[str] = set() if connection is None: connection = duckdb.connect() - # Install and load the httpfs extension only if we are creating the duckdb connection - # here. If a connection was passed in, don't assume it has internet access and the - # ability to install extensions + # Install and load the httpfs extension only if we are creating + # the duckdb connection here. If a connection was passed in, don't + # assume it has internet access and the ability to install + # extensions try: connection.install_extension("httpfs") connection.load_extension("httpfs") - except (IOError, duckdb.IOException, duckdb.InvalidInputException) as e: - warnings.warn(f"Failed to install and load the DuckDB httpfs extension:\n{e}") + except (OSError, duckdb.IOException, duckdb.InvalidInputException) as e: + warnings.warn( + f"Failed to install and load the DuckDB httpfs extension:\n{e}", + stacklevel=2, + ) # Use a less round number for pandas_analyze_sample (default is 1000) connection.execute("SET GLOBAL pandas_analyze_sample=1007") @@ -165,7 +179,8 @@ def __init__(self, connection: duckdb.DuckDBPyConnection = None, fallback: bool self.conn = connection self.logger = logging.getLogger("DuckDbConnection") - self._registered_table_schemas = dict() + self._registered_table_schemas: dict[str, Any] = {} + # Call self.tables to warm the cache of table schemas self.tables() @@ -176,7 +191,7 @@ def dialect(cls) -> str: def fallback(self) -> bool: return self._fallback - def _replace_query_for_table(self, table_name: str): + def _replace_query_for_table(self, table_name: str) -> str: """ Build a `SELECT * REPLACE(...) FROM table_name` query for a table that converts unsupported column types to varchar columns @@ -186,7 +201,7 @@ def _replace_query_for_table(self, table_name: str): for col, type_name in zip(rel.columns, rel.dtypes): quoted_col_name = quote_column(col) try: - duckdb_type_name_to_pyarrow_type(type_name) + duckdb_type_name_to_pyarrow_type(str(type_name)) # Skip columns with supported types except ValueError: # Convert unsupported types to strings (except struct) @@ -199,24 +214,26 @@ def _replace_query_for_table(self, table_name: str): else: return f"SELECT * FROM {table_name}" - def _schema_for_table(self, table_name: str): + def _schema_for_table(self, table_name: str) -> pa.Schema: rel = self.conn.query(f'select * from "{table_name}" limit 1') return duckdb_relation_to_schema(rel) - def tables(self) -> Dict[str, pa.Schema]: + def tables(self) -> dict[str, pa.Schema]: result = {} - table_names = self.conn.query( - "select table_name from information_schema.tables" - ).to_df()["table_name"].tolist() + table_names = ( + self.conn.query("select table_name from information_schema.tables") + .to_df()["table_name"] + .tolist() + ) for table_name in table_names: if table_name in self._registered_table_schemas: - # Registered tables are expected to only change when self.register_* is called, - # so use the cached version + # Registered tables are expected to only change when + # self.register_* is called, so use the cached version result[table_name] = self._registered_table_schemas[table_name] elif not table_name.startswith(RAW_PREFIX): - # Dynamically look up schema for tables that are registered with duckdb but not with - # the self.register_* methods. Skip raw tables + # Dynamically look up schema for tables that are registered with + # duckdb but not with the self.register_* methods. Skip raw tables result[table_name] = self._schema_for_table(table_name) return result @@ -240,15 +257,17 @@ def fetch_query(self, query: str, schema: pa.Schema) -> pa.Table: return result - def _update_temp_names(self, name: str, temporary: bool): + def _update_temp_names(self, name: str, temporary: bool) -> None: if temporary: self._temp_tables.add(name) elif name in self._temp_tables: self._temp_tables.remove(name) - def register_pandas(self, name: str, df: pd.DataFrame, temporary: bool = False): - # Add _vf_order column to avoid the more expensive operation of computing it with a - # ROW_NUMBER function in duckdb + def register_pandas( + self, name: str, df: pd.DataFrame, temporary: bool = False + ) -> None: + # Add _vf_order column to avoid the more expensive operation of computing it + # with a ROW_NUMBER function in duckdb df = df.copy(deep=False) df["_vf_order"] = range(0, len(df)) @@ -263,7 +282,9 @@ def register_pandas(self, name: str, df: pd.DataFrame, temporary: bool = False): self._update_temp_names(name, temporary) self._registered_table_schemas[name] = self._schema_for_table(name) - def register_arrow(self, name: str, table: pa.Table, temporary: bool = False): + def register_arrow( + self, name: str, table: pa.Table, temporary: bool = False + ) -> None: # Register raw table under name with prefix raw_name = RAW_PREFIX + name self.conn.register(raw_name, table) @@ -275,13 +296,15 @@ def register_arrow(self, name: str, table: pa.Table, temporary: bool = False): self._update_temp_names(name, temporary) self._registered_table_schemas[name] = table.schema - def register_json(self, name: str, path: str, temporary: bool = False): + def register_json(self, name: str, path: str, temporary: bool = False) -> None: relation = self.conn.read_json(path) relation.to_view(name) self._update_temp_names(name, temporary) self._registered_table_schemas[name] = self._schema_for_table(name) - def register_csv(self, name: str, path: str, options: CsvReadOptions, temporary: bool = False): + def register_csv( + self, name: str, path: str, options: CsvReadOptions, temporary: bool = False + ) -> None: relation = self.conn.read_csv( path, header=options.has_header, @@ -310,7 +333,7 @@ def register_csv(self, name: str, path: str, options: CsvReadOptions, temporary: self._update_temp_names(name, temporary) self._registered_table_schemas[name] = self._schema_for_table(name) - def register_parquet(self, name: str, path: str, temporary: bool = False): + def register_parquet(self, name: str, path: str, temporary: bool = False) -> None: # Register raw table under name with prefix raw_name = RAW_PREFIX + name self.conn.read_parquet(path).to_view(raw_name) @@ -322,22 +345,24 @@ def register_parquet(self, name: str, path: str, temporary: bool = False): self._update_temp_names(name, temporary) self._registered_table_schemas[name] = self._schema_for_table(name) - def register_arrow_file(self, name: str, path: str, temporary: bool = False): + def register_arrow_file( + self, name: str, path: str, temporary: bool = False + ) -> None: arrow_table = pa.feather.read_table(path) self.register_arrow(name, arrow_table, temporary) - def unregister(self, name: str): + def unregister(self, name: str) -> None: for view_name in [name, RAW_PREFIX + name]: self.conn.unregister(view_name) if view_name in self._temp_tables: self._temp_tables.remove(view_name) self._registered_table_schemas.pop(view_name, None) - def unregister_temporary_tables(self): + def unregister_temporary_tables(self) -> None: for name in list(self._temp_tables): self.conn.unregister(name) self._temp_tables.remove(name) -def quote_column(name: str): +def quote_column(name: str) -> str: return '"' + name.replace('"', '""') + '"' diff --git a/vegafusion-python/vegafusion/dataset/__init__.py b/vegafusion-python/vegafusion/dataset/__init__.py index 4453b1a03..e8e011769 100644 --- a/vegafusion-python/vegafusion/dataset/__init__.py +++ b/vegafusion-python/vegafusion/dataset/__init__.py @@ -1,2 +1,3 @@ from .sql import SqlDataset +__all__ = ["SqlDataset"] diff --git a/vegafusion-python/vegafusion/dataset/dfi.py b/vegafusion-python/vegafusion/dataset/dfi.py index 42c49ead1..95ef34ef9 100644 --- a/vegafusion-python/vegafusion/dataset/dfi.py +++ b/vegafusion-python/vegafusion/dataset/dfi.py @@ -1,9 +1,14 @@ +from __future__ import annotations + +from collections.abc import Sequence from functools import cached_property -from typing import Sequence, Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional, cast + from .sql import SqlDataset if TYPE_CHECKING: - from pyarrow import Table, Schema + from pyarrow import Schema, Table + class SqlDatasetDataFrame: """An implementation of the dataframe interchange protocol. @@ -23,15 +28,15 @@ def __init__( dataset: SqlDataset, nan_as_null: bool = False, allow_copy: bool = True, - pyarrow_table: Optional["Table"] = None, - ): + pyarrow_table: Optional[Table] = None, + ) -> None: self._dataset = dataset self._nan_as_null = nan_as_null self._allow_copy = allow_copy self._pyarrow_table = pyarrow_table @cached_property - def _pyarrow_df(self): + def _pyarrow_df(self) -> Any: # noqa: ANN401 """Returns the pyarrow implementation of the __dataframe__ protocol. If the backing Dataset hasn't been executed yet, this will result in executing and caching the result.""" @@ -46,7 +51,7 @@ def _pyarrow_df(self): ) @cached_property - def _empty_pyarrow_df(self): + def _empty_pyarrow_df(self) -> Any: # noqa: ANN401 """A pyarrow implementation of the __dataframe__ protocol for an empty table with the same schema as this table. Used for returning dtype information without executing the backing ibis @@ -56,40 +61,40 @@ def _empty_pyarrow_df(self): return schema.empty_table().__dataframe__() @property - def _schema(self) -> "Schema": + def _schema(self) -> Schema: return self._dataset.table_schema() - def _get_dtype(self, name): + def _get_dtype(self, name: str) -> Any: # noqa: ANN401 """Get the dtype info for a column named `name`.""" return self._empty_pyarrow_df.get_column_by_name(name).dtype # These methods may all be handled without executing the query - def num_columns(self): + def num_columns(self) -> int: return len(self._schema.names) - def column_names(self): - return self._schema.names + def column_names(self) -> list[str]: + return cast(list[str], self._schema.names) - def get_column(self, i: int) -> "DatasetColumn": + def get_column(self, i: int) -> DatasetColumn: name = self._schema.names[i] return self.get_column_by_name(name) - def get_column_by_name(self, name: str) -> "DatasetColumn": + def get_column_by_name(self, name: str) -> DatasetColumn: return DatasetColumn(self, name) - def get_columns(self): + def get_columns(self) -> list[DatasetColumn]: return [DatasetColumn(self, name) for name in self._schema.names] - def select_columns(self, indices: Sequence[int]) -> "SqlDatasetDataFrame": + def select_columns(self, indices: Sequence[int]) -> SqlDatasetDataFrame: names = [self._schema.names[i] for i in indices] return self.select_columns_by_name(names) - def select_columns_by_name(self, names: Sequence[str]) -> "SqlDatasetDataFrame": - return self._pyarrow_df.select_columns_by_name(names) + def select_columns_by_name(self, names: Sequence[str]) -> SqlDatasetDataFrame: + return cast(SqlDatasetDataFrame, self._pyarrow_df.select_columns_by_name(names)) def __dataframe__( self, nan_as_null: bool = False, allow_copy: bool = True - ) -> "SqlDatasetDataFrame": + ) -> SqlDatasetDataFrame: return SqlDatasetDataFrame( self._dataset, nan_as_null=nan_as_null, @@ -99,26 +104,26 @@ def __dataframe__( # These methods require executing the query @property - def metadata(self): + def metadata(self) -> Any: # noqa: ANN401 return self._pyarrow_df.metadata def num_rows(self) -> Optional[int]: - return self._pyarrow_df.num_rows() + return cast(Optional[int], self._pyarrow_df.num_rows()) def num_chunks(self) -> int: - return self._pyarrow_df.num_chunks() + return cast(int, self._pyarrow_df.num_chunks()) - def get_chunks(self, n_chunks: Optional[int] = None): + def get_chunks(self, n_chunks: Optional[int] = None) -> Any: # noqa: ANN401 return self._pyarrow_df.get_chunks(n_chunks=n_chunks) class DatasetColumn: - def __init__(self, df: SqlDatasetDataFrame, name: str): + def __init__(self, df: SqlDatasetDataFrame, name: str) -> None: self._df = df self._name = name @cached_property - def _pyarrow_col(self): + def _pyarrow_col(self) -> Any: # noqa: ANN401 """Returns the pyarrow implementation of the __dataframe__ protocol's Column type. If the backing SqlDataset hasn't been executed yet, this will result @@ -127,38 +132,38 @@ def _pyarrow_col(self): # These methods may all be handled without executing the query @property - def dtype(self): + def dtype(self) -> Any: # noqa: ANN401 return self._df._get_dtype(self._name) # These methods require executing the query - def size(self): - return self._pyarrow_col.size() + def size(self) -> int: + return cast(int, self._pyarrow_col.size()) @property - def describe_categorical(self): + def describe_categorical(self) -> Any: # noqa: ANN401 return self._pyarrow_col.describe_categorical @property - def offset(self): - return self._pyarrow_col.offset + def offset(self) -> int: + return cast(int, self._pyarrow_col.offset) @property - def describe_null(self): + def describe_null(self) -> Any: # noqa: ANN401 return self._pyarrow_col.describe_null @property - def null_count(self): - return self._pyarrow_col.null_count + def null_count(self) -> int: + return cast(int, self._pyarrow_col.null_count) @property - def metadata(self): + def metadata(self) -> Any: # noqa: ANN401 return self._pyarrow_col.metadata def num_chunks(self) -> int: - return self._pyarrow_col.num_chunks() + return cast(int, self._pyarrow_col.num_chunks()) - def get_chunks(self, n_chunks: Optional[int] = None): + def get_chunks(self, n_chunks: Optional[int] = None) -> Any: # noqa: ANN401 return self._pyarrow_col.get_chunks(n_chunks=n_chunks) - def get_buffers(self): + def get_buffers(self) -> Any: # noqa: ANN401 return self._pyarrow_col.get_buffers() diff --git a/vegafusion-python/vegafusion/dataset/duckdb.py b/vegafusion-python/vegafusion/dataset/duckdb.py index e4390408b..17c5d2fc9 100644 --- a/vegafusion-python/vegafusion/dataset/duckdb.py +++ b/vegafusion-python/vegafusion/dataset/duckdb.py @@ -1,15 +1,19 @@ import logging + import pyarrow as pa from duckdb import DuckDBPyRelation -from .sql import SqlDataset + from ..connection.duckdb import duckdb_relation_to_schema +from .sql import SqlDataset class DuckDbDataset(SqlDataset): def dialect(self) -> str: return "duckdb" - def __init__(self, relation: DuckDBPyRelation, fallback: bool = True, verbose: bool = False): + def __init__( + self, relation: DuckDBPyRelation, fallback: bool = True, verbose: bool = False + ) -> None: self._relation = relation self._fallback = fallback self._verbose = verbose diff --git a/vegafusion-python/vegafusion/dataset/snowpark.py b/vegafusion-python/vegafusion/dataset/snowpark.py deleted file mode 100644 index dfe150df6..000000000 --- a/vegafusion-python/vegafusion/dataset/snowpark.py +++ /dev/null @@ -1,101 +0,0 @@ -import logging -import pyarrow as pa -from .sql import SqlDataset -from snowflake.snowpark import Table as SnowparkTable -from snowflake.snowpark.types import DataType as SnowparkDataType -from typing import Dict - -from ..transformer import to_arrow_table - -SNOWPARK_TO_PYARROW_TYPES: Dict[SnowparkDataType, pa.DataType] = {} - - -def get_snowpark_to_pyarrow_types(): - if not SNOWPARK_TO_PYARROW_TYPES: - import snowflake.snowpark.types as sp_types - - SNOWPARK_TO_PYARROW_TYPES.update( - { - sp_types.LongType: pa.int64(), - sp_types.BinaryType: pa.binary(), - sp_types.BooleanType: pa.bool_(), - sp_types.ByteType: pa.int8(), - sp_types.StringType: pa.string(), - sp_types.DateType: pa.date32(), - sp_types.DoubleType: pa.float64(), - sp_types.FloatType: pa.float32(), - sp_types.IntegerType: pa.int32(), - sp_types.ShortType: pa.int16(), - sp_types.TimestampType: pa.timestamp("ms"), - } - ) - return SNOWPARK_TO_PYARROW_TYPES - - -def snowflake_field_to_pyarrow_type(provided_type: SnowparkDataType) -> pa.DataType: - """ - Converts Snowflake types to PyArrow equivalent types, raising a ValueError if they aren't comparable. - See https://docs.snowflake.com/en/sql-reference/intro-summary-data-types - """ - from snowflake.snowpark.types import DecimalType as SnowparkDecimalType - - type_map = get_snowpark_to_pyarrow_types() - if provided_type.__class__ in type_map: - return type_map[provided_type.__class__] - - if isinstance(provided_type, SnowparkDecimalType): - return pa.decimal128(provided_type.precision, provided_type.scale) - else: - raise ValueError(f"Unsupported Snowpark type: {provided_type}") - - -def snowpark_table_to_pyarrow_schema(table: SnowparkTable) -> pa.Schema: - schema_fields = {} - for name, field in zip(table.schema.names, table.schema.fields): - normalised_name = name.strip('"') - schema_fields[normalised_name] = snowflake_field_to_pyarrow_type(field.datatype) - return pa.schema(schema_fields) - - -class SnowparkDataset(SqlDataset): - def dialect(self) -> str: - return "snowflake" - - def __init__( - self, table: SnowparkTable, fallback: bool = True, verbose: bool = False - ): - if not isinstance(table, SnowparkTable): - raise ValueError( - f"SnowparkDataset accepts a snowpark Table. Received: {type(table)}" - ) - self._table = table - self._session = table._session - - self._fallback = fallback - self._verbose = verbose - self._table_name = table.table_name - self._table_schema = snowpark_table_to_pyarrow_schema(self._table) - - self.logger = logging.getLogger("SnowparkDataset") - - def table_name(self) -> str: - return self._table_name - - def table_schema(self) -> pa.Schema: - return self._table_schema - - def fetch_query(self, query: str, schema: pa.Schema) -> pa.Table: - self.logger.info(f"Snowflake Query:\n{query}\n") - if self._verbose: - print(f"Snowflake Query:\n{query}\n") - - sp_df = self._session.sql(query) - batches = [] - for pd_batch in sp_df.to_pandas_batches(): - pa_tbl = to_arrow_table(pd_batch).cast(schema, safe=False) - batches.extend(pa_tbl.to_batches()) - - return pa.Table.from_batches(batches, schema) - - def fallback(self) -> bool: - return self._fallback diff --git a/vegafusion-python/vegafusion/dataset/sql.py b/vegafusion-python/vegafusion/dataset/sql.py index 0c3739015..e206242fc 100644 --- a/vegafusion-python/vegafusion/dataset/sql.py +++ b/vegafusion-python/vegafusion/dataset/sql.py @@ -1,10 +1,13 @@ +from __future__ import annotations + from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from .dfi import SqlDatasetDataFrame import pyarrow as pa + from .dfi import SqlDatasetDataFrame + class SqlDataset(ABC): """ @@ -35,12 +38,12 @@ def table_name(self) -> str: raise NotImplementedError() @abstractmethod - def table_schema(self) -> "pa.Schema": + def table_schema(self) -> pa.Schema: """Schema of source table""" raise NotImplementedError() @abstractmethod - def fetch_query(self, query: str, schema: "pa.Schema") -> "pa.Table": + def fetch_query(self, query: str, schema: pa.Schema) -> pa.Table: """ Returns the result of evaluating the requested query. The resulting pa.Table should have a schema matching the provided schema @@ -70,8 +73,12 @@ def main_thread(self) -> bool: return True def __dataframe__( - self, nan_as_null: bool = False, allow_copy: bool = True, **kwargs - ) -> "SqlDatasetDataFrame": + self, + nan_as_null: bool = False, + allow_copy: bool = True, + **kwargs: Any, # noqa: ANN401 + ) -> SqlDatasetDataFrame: """DataFrame interchange protocol support""" from .dfi import SqlDatasetDataFrame + return SqlDatasetDataFrame(self, nan_as_null=nan_as_null, allow_copy=allow_copy) diff --git a/vegafusion-python/vegafusion/datasource/__init__.py b/vegafusion-python/vegafusion/datasource/__init__.py index cc8d1d0a2..095619b5e 100644 --- a/vegafusion-python/vegafusion/datasource/__init__.py +++ b/vegafusion-python/vegafusion/datasource/__init__.py @@ -1,4 +1,11 @@ +from .datasource import Datasource from .dfi_datasource import DfiDatasource from .pandas_datasource import PandasDatasource from .pyarrow_datasource import PyArrowDatasource -from .datasource import Datasource + +__all__ = [ + "Datasource", + "DfiDatasource", + "PandasDatasource", + "PyArrowDatasource", +] diff --git a/vegafusion-python/vegafusion/datasource/_dfi_types.py b/vegafusion-python/vegafusion/datasource/_dfi_types.py index 16b83fb4d..0458d34a1 100644 --- a/vegafusion-python/vegafusion/datasource/_dfi_types.py +++ b/vegafusion-python/vegafusion/datasource/_dfi_types.py @@ -1,18 +1,19 @@ +# mypy: ignore-errors + # DataFrame Interchange Protocol Types # Copied from https://data-apis.org/dataframe-protocol/latest/API.html # # These classes are only for use in type signatures +import enum from abc import ( ABC, abstractmethod, ) -import enum +from collections.abc import Iterable, Sequence from typing import ( Any, Dict, - Iterable, Optional, - Sequence, Tuple, TypedDict, ) diff --git a/vegafusion-python/vegafusion/datasource/datasource.py b/vegafusion-python/vegafusion/datasource/datasource.py index 2dd1a0901..c1d6bea5b 100644 --- a/vegafusion-python/vegafusion/datasource/datasource.py +++ b/vegafusion-python/vegafusion/datasource/datasource.py @@ -1,5 +1,8 @@ +from __future__ import annotations + from abc import ABC, abstractmethod -from typing import Iterable, TYPE_CHECKING +from collections.abc import Iterable +from typing import TYPE_CHECKING if TYPE_CHECKING: import pyarrow as pa @@ -7,9 +10,9 @@ class Datasource(ABC): @abstractmethod - def schema(self) -> "pa.Schema": + def schema(self) -> pa.Schema: raise NotImplementedError() @abstractmethod - def fetch(self, columns: Iterable[str]) -> "pa.Table": + def fetch(self, columns: Iterable[str]) -> pa.Table: raise NotImplementedError() diff --git a/vegafusion-python/vegafusion/datasource/dfi_datasource.py b/vegafusion-python/vegafusion/datasource/dfi_datasource.py index 268ac3b5b..34f019741 100644 --- a/vegafusion-python/vegafusion/datasource/dfi_datasource.py +++ b/vegafusion-python/vegafusion/datasource/dfi_datasource.py @@ -1,14 +1,20 @@ -from typing import Iterable, TYPE_CHECKING +from __future__ import annotations + import re +from collections.abc import Iterable +from typing import TYPE_CHECKING -from ._dfi_types import DtypeKind, DataFrame as DfiDataFrame +from ._dfi_types import DataFrame as DfiDataFrame +from ._dfi_types import DtypeKind from .datasource import Datasource if TYPE_CHECKING: import pyarrow as pa -def get_pyarrow_dtype(kind, bit_width): + +def get_pyarrow_dtype(kind: DtypeKind, bit_width: int) -> pa.DataType: import pyarrow as pa + if kind == DtypeKind.INT: if bit_width == 8: return pa.int8() @@ -48,7 +54,7 @@ def get_pyarrow_dtype(kind, bit_width): return None -def parse_datetime_format_str(format_str): +def parse_datetime_format_str(format_str: str) -> tuple[str, str]: """Parse datetime `format_str` to interpret the `data`.""" # timestamp 'ts{unit}:tz' @@ -66,9 +72,10 @@ def parse_datetime_format_str(format_str): raise NotImplementedError(f"DateTime kind is not supported: {format_str}") -def map_date_type(data_type): - """Map column date type to pyarrow date type. """ +def map_date_type(data_type: tuple[DtypeKind, int, str, str]) -> pa.DataType: + """Map column date type to pyarrow date type.""" import pyarrow as pa + kind, bit_width, f_string, _ = data_type if kind == DtypeKind.DATETIME: @@ -82,12 +89,14 @@ def map_date_type(data_type): return pa_dtype else: raise NotImplementedError( - f"Conversion for {data_type} is not yet supported.") + f"Conversion for {data_type} is not yet supported." + ) class DfiDatasource(Datasource): - def __init__(self, dataframe: DfiDataFrame): + def __init__(self, dataframe: DfiDataFrame) -> None: import pyarrow as pa + if hasattr(dataframe, "__dataframe__"): dataframe = dataframe.__dataframe__() fields = [] @@ -99,12 +108,13 @@ def __init__(self, dataframe: DfiDataFrame): self._dataframe = dataframe self._schema = pa.schema(fields) - def schema(self) -> "pa.Schema": + def schema(self) -> pa.Schema: return self._schema - def fetch(self, columns: Iterable[str]) -> "pa.Table": + def fetch(self, columns: Iterable[str]) -> pa.Table: import pyarrow as pa from pyarrow.interchange import from_dataframe + columns = list(columns) projected_schema = pa.schema([f for f in self._schema if f.name in columns]) table = from_dataframe(self._dataframe.select_columns_by_name(columns)) diff --git a/vegafusion-python/vegafusion/datasource/pandas_datasource.py b/vegafusion-python/vegafusion/datasource/pandas_datasource.py index 14cfdb50b..e640b22e6 100644 --- a/vegafusion-python/vegafusion/datasource/pandas_datasource.py +++ b/vegafusion-python/vegafusion/datasource/pandas_datasource.py @@ -1,5 +1,9 @@ -from typing import Iterable, TYPE_CHECKING +from __future__ import annotations + +from collections.abc import Iterable from math import floor +from typing import TYPE_CHECKING + from .datasource import Datasource if TYPE_CHECKING: @@ -8,12 +12,14 @@ class PandasDatasource(Datasource): - def __init__(self, df: "pd.DataFrame", sample_size: int = 1000, batch_size: int = 8096): + def __init__( + self, df: pd.DataFrame, sample_size: int = 1000, batch_size: int = 8096 + ) -> None: import pandas as pd import pyarrow as pa fields = [] - casts = {} + casts: dict[str, str] = {} sample_stride = max(1, floor(len(df) / sample_size)) # Shallow copy and add named index levels as columns @@ -32,9 +38,13 @@ def __init__(self, df: "pd.DataFrame", sample_size: int = 1000, batch_size: int # We will expand categoricals (not yet supported in VegaFusion) if isinstance(pd_type, pd.CategoricalDtype): cat = df[col].cat - field = pa.Schema.from_pandas(pd.DataFrame({col: cat.categories})).field(col) + field = pa.Schema.from_pandas( + pd.DataFrame({col: cat.categories}) + ).field(col) else: - candidate_schema = pa.Schema.from_pandas(df.iloc[::sample_stride][[col]]) + candidate_schema = pa.Schema.from_pandas( + df.iloc[::sample_stride][[col]] + ) field = candidate_schema.field(col) except (pa.ArrowTypeError, pa.ArrowInvalid): # If arrow fails to infer the type, fall back to string @@ -52,7 +62,7 @@ def __init__(self, df: "pd.DataFrame", sample_size: int = 1000, batch_size: int except pa.ArrowTypeError: if pd_type.kind == "O": fields.append(pa.field(col, pa.string())) - casts[col] = str + casts[col] = "str" else: raise self._df = df @@ -60,12 +70,13 @@ def __init__(self, df: "pd.DataFrame", sample_size: int = 1000, batch_size: int self._casts = casts self._batch_size = batch_size - def schema(self) -> "pa.Schema": + def schema(self) -> pa.Schema: return self._schema - def fetch(self, columns: Iterable[str]) -> "pa.Table": + def fetch(self, columns: Iterable[str]) -> pa.Table: import pandas as pd import pyarrow as pa + projected = self._df[columns].copy(deep=False) for col, pd_type in projected.dtypes.items(): diff --git a/vegafusion-python/vegafusion/datasource/pyarrow_datasource.py b/vegafusion-python/vegafusion/datasource/pyarrow_datasource.py index 49fc30a50..6a1026095 100644 --- a/vegafusion-python/vegafusion/datasource/pyarrow_datasource.py +++ b/vegafusion-python/vegafusion/datasource/pyarrow_datasource.py @@ -1,19 +1,24 @@ -from typing import Iterable, TYPE_CHECKING +from __future__ import annotations + +from collections.abc import Iterable +from typing import TYPE_CHECKING + from .datasource import Datasource if TYPE_CHECKING: import pyarrow as pa + class PyArrowDatasource(Datasource): - def __init__(self, dataframe: "pa.Table"): + def __init__(self, dataframe: pa.Table) -> None: self._table = dataframe - def schema(self) -> "pa.Schema": + def schema(self) -> pa.Schema: return self._table.schema - def fetch(self, columns: Iterable[str]) -> "pa.Table": + def fetch(self, columns: Iterable[str]) -> pa.Table: import pyarrow as pa + return pa.Table.from_arrays( - [self._table[c] for c in columns], - names=list(columns) + [self._table[c] for c in columns], names=list(columns) ) diff --git a/vegafusion-python/vegafusion/jupyter/__init__.py b/vegafusion-python/vegafusion/jupyter/__init__.py index 08a898c09..59689695e 100644 --- a/vegafusion-python/vegafusion/jupyter/__init__.py +++ b/vegafusion-python/vegafusion/jupyter/__init__.py @@ -1,9 +1,16 @@ -import json +# Ignore all mypy errors for now since traitlets aren't well typed +# type: ignore +from __future__ import annotations + import pathlib from typing import Any + import anywidget import traitlets + from vegafusion import runtime +from vegafusion.runtime import PreTransformWarning +from vegafusion.transformer import DataFrameLike _here = pathlib.Path(__file__).parent @@ -31,7 +38,7 @@ class VegaFusionWidget(anywidget.AnyWidget): embed_options = traitlets.Dict(default_value=None, allow_none=True).tag(sync=True) debug = traitlets.Bool(default_value=False) row_limit = traitlets.Int(default_value=100000).tag(sync=True) - + # Public output traitlets warnings = traitlets.List(allow_none=True) @@ -47,17 +54,14 @@ class VegaFusionWidget(anywidget.AnyWidget): _is_offline = False @classmethod - def enable_offline(cls, offline: bool = True): - """ - Configure VegaFusionWidget's offline behavior - - Parameters - ---------- - offline: bool - If True, configure VegaFusionWidget to operate in offline mode where JavaScript - dependencies are loaded from vl-convert. - If False, configure it to operate in online mode where JavaScript dependencies - are loaded from CDN dynamically. This is the default behavior. + def enable_offline(cls, offline: bool = True) -> None: + """Configure VegaFusionWidget's offline behavior. + + Args: + offline: If True, configure VegaFusionWidget to operate in offline mode + where JavaScript dependencies are loaded from vl-convert. If False, + configure it to operate in online mode where JavaScript dependencies + are loaded from CDN dynamically. This is the default behavior. """ import vl_convert as vlc @@ -78,9 +82,10 @@ def enable_offline(cls, offline: bool = True): src = "\n".join(src_lines) - # vl-convert's javascript_bundle function creates a self-contained JavaScript bundle - # for JavaScript snippets that import from a small set of dependencies that - # vl-convert includes. To see the available imports and their imported names, run + # vl-convert's javascript_bundle function creates a self-contained + # JavaScript bundle for JavaScript snippets that import from a small + # set of dependencies that vl-convert includes. To see the available + # imports and their imported names, run # import vl_convert as vlc # help(vlc.javascript_bundle) bundled_src = vlc.javascript_bundle(src) @@ -90,46 +95,41 @@ def enable_offline(cls, offline: bool = True): cls._esm = load_js_src() cls._is_offline = False - def __init__( self, - spec: dict, - inline_datasets: dict | None = None, + spec: dict[str, Any], + inline_datasets: dict[str, DataFrameLike] | None = None, debounce_wait: int = 10, max_wait: bool = True, debug: bool = False, - embed_options: dict | None = None, + embed_options: dict[str, Any] | None = None, local_tz: str | None = None, row_limit: int = 100000, - **kwargs: Any, - ): - """ - Jupyter Widget for displaying Vega chart specifications, using VegaFusion + **kwargs: Any, # noqa: ANN401 + ) -> None: + """Jupyter Widget for displaying Vega chart specifications, using VegaFusion for server-side scaling. - Parameters - ---------- - spec: dict - Vega chart specification - inline_datasets: dict | None - Datasets referenced in the Vega spec in vegafusion+dataset:// URLs - debounce_wait: int - Debouncing wait time in milliseconds. Updates will be sent from the client to the kernel - after debounce_wait milliseconds of no chart interactions. - max_wait: bool - If True (default), updates will be sent from the client to the kernel every debounce_wait - milliseconds even if there are ongoing chart interactions. If False, updates will not be - sent until chart interactions have completed. - debug: bool - If True, debug messages will be printed - embed_options: dict - Options to pass to vega-embed. - See https://github.com/vega/vega-embed?tab=readme-ov-file#options - local_tz: str | None - Timezone to use for the chart. If None, the chart will use the browser's local timezone. - row_limit: int - Maximum number of rows to send to the browser, after VegaFusion has performed is transformations. - A RowLimitError will be raised if the VegaFusion operation results in more than row_limit rows. + Args: + spec: Vega chart specification. + inline_datasets: Datasets referenced in the Vega spec in + vegafusion+dataset:// URLs. + debounce_wait: Debouncing wait time in milliseconds. Updates will be + sent from the client to the kernel after debounce_wait + milliseconds of no chart interactions. + max_wait: If True (default), updates will be sent from the client to + the kernel every debounce_wait milliseconds even if there are + ongoing chart interactions. If False, updates will not be sent + until chart interactions have completed. + debug: If True, debug messages will be printed. + embed_options: Options to pass to vega-embed. See + https://github.com/vega/vega-embed?tab=readme-ov-file#options + local_tz: Timezone to use for the chart. If None, the chart will use + the browser's local timezone. + row_limit: Maximum number of rows to send to the browser, after + VegaFusion has performed its transformations. A RowLimitError + will be raised if the VegaFusion operation results in more than + row_limit rows. """ super().__init__( spec=spec, @@ -145,12 +145,12 @@ def __init__( self.on_msg(self._handle_custom_msg) @traitlets.observe("spec") - def _on_change_spec(self, change): + def _on_change_spec(self, change: dict[str, Any]) -> None: """ Internal callback function that updates the widgets's internal state when the Vega chart specification changes """ - new_spec = change.new + new_spec = change["new"] if new_spec is None: # Clear state @@ -159,9 +159,10 @@ def _on_change_spec(self, change): self._chart_state = None self._js_watch_plan = None return - + if self.local_tz is None: - def on_local_tz_change(change): + + def on_local_tz_change(change: dict[str, Any]) -> None: self._init_chart_state(change["new"]) self.observe(on_local_tz_change, ["local_tz"]) @@ -169,41 +170,42 @@ def on_local_tz_change(change): self._init_chart_state(self.local_tz) @traitlets.observe("inline_datasets") - def _on_change_inline_datasets(self, change): + def _on_change_inline_datasets(self, change: dict[str, Any]) -> None: """ Internal callback function that updates the widgets's internal state when the inline datasets change """ self._init_chart_state(self.local_tz) - def _handle_custom_msg(self, content, buffers): - if content.get('type') == 'update_state': - self._handle_update_state(content.get('updates', [])) + def _handle_custom_msg(self, content: dict[str, Any], buffers: Any) -> None: # noqa: ANN401 + if content.get("type") == "update_state": + self._handle_update_state(content.get("updates", [])) - def _handle_update_state(self, updates): + def _handle_update_state(self, updates: list[dict[str, Any]]) -> None: """ Handle the 'update_state' message from JavaScript """ if self.debug: print(f"Received update_state message from JavaScript:\n{updates}") - + # Process the updates using the chart state if self._chart_state is not None: processed_updates = self._chart_state.update(updates) - + if self.debug: print(f"Processed updates:\n{processed_updates}") - + # Send the processed updates back to JavaScript self.send({"type": "update_view", "updates": processed_updates}) else: - print("Warning: Received update_state message, but chart state is not initialized.") - + print( + "Warning: Received update_state message, but chart state is not " + "initialized." + ) - def _init_chart_state(self, local_tz: str): + def _init_chart_state(self, local_tz: str) -> None: if self.spec is not None: with self.hold_sync(): - # Build the chart state self._chart_state = runtime.new_chart_state( self.spec, @@ -213,7 +215,9 @@ def _init_chart_state(self, local_tz: str): ) # Check if the row limit was exceeded - handle_row_limit_exceeded(self.row_limit, self._chart_state.get_warnings()) + handle_row_limit_exceeded( + self.row_limit, self._chart_state.get_warnings() + ) # Get the watch plan and transformed spec self._js_watch_plan = self._chart_state.get_watch_plan()[ @@ -223,24 +227,27 @@ def _init_chart_state(self, local_tz: str): self.warnings = self._chart_state.get_warnings() -def handle_row_limit_exceeded(row_limit: int, warnings: list): +def handle_row_limit_exceeded( + row_limit: int, warnings: list[PreTransformWarning] +) -> None: for warning in warnings: - if warning.get("type") == "RowLimitExceeded": + if warning["type"] == "RowLimitExceeded": msg = ( - "The number of dataset rows after filtering and aggregation exceeds\n" - f"the current limit of {row_limit}. Try adding an aggregation to reduce\n" - "the size of the dataset that must be loaded into the browser. Or, disable\n" - "the limit by setting the row_limit traitlet to None. Note that\n" - "disabling this limit may cause the browser to freeze or crash." + "The number of dataset rows after filtering and aggregation\n" + f"exceeds the current limit of {row_limit}. Try adding an\n" + "aggregation to reduce the size of the dataset that must be\n" + "loaded into the browser. Or, disable the limit by setting the\n" + "row_limit traitlet to None. Note that disabling this limit may\n" + "cause the browser to freeze or crash." ) raise RowLimitExceededError(msg) class RowLimitExceededError(Exception): """ - Exception raised when the number of dataset rows after filtering and aggregation exceeds - the current limit. + Exception raised when the number of dataset rows after filtering and aggregation + exceeds the current limit. """ - def __init__(self, message: str): - super().__init__(message) + def __init__(self, message: str) -> None: + super().__init__(message) diff --git a/vegafusion-python/vegafusion/local_tz.py b/vegafusion-python/vegafusion/local_tz.py index 289bf0df9..51a1d9723 100644 --- a/vegafusion-python/vegafusion/local_tz.py +++ b/vegafusion-python/vegafusion/local_tz.py @@ -1,7 +1,9 @@ -__tz_config = dict(local_tz=None) +from __future__ import annotations +__tz_config: dict[str, str | None] = {"local_tz": None} -def get_local_tz(): + +def get_local_tz() -> str: """ Get the named local timezone that the VegaFusion mimetype renderer will use for calculations. @@ -13,22 +15,26 @@ def get_local_tz(): :return: named timezone string """ - if __tz_config["local_tz"] is None: + local_tz = __tz_config["local_tz"] + if local_tz is None: # Fall back to getting local_tz from vl-convert if not set try: import vl_convert as vlc - __tz_config["local_tz"] = vlc.get_local_tz() or "UTC" - except ImportError: + + local_tz = vlc.get_local_tz() or "UTC" + __tz_config["local_tz"] = local_tz + except ImportError as e: raise ImportError( - "vl-convert is not installed and so the local system timezone cannot be determined.\n" - "Either install the vl-convert-python package or set the local timezone manually using\n" - "the vegafusion.set_local_tz function" - ) + "vl-convert is not installed and so the local system timezone cannot " + "be determined.\nEither install the vl-convert-python package or set " + "the local timezone manually using\nthe vegafusion.set_local_tz " + "function" + ) from e - return __tz_config["local_tz"] + return local_tz -def set_local_tz(local_tz): +def set_local_tz(local_tz: str) -> None: """ Set the named local timezone that the VegaFusion mimetype renderer will use for calculations. diff --git a/vegafusion-python/vegafusion/py.typed b/vegafusion-python/vegafusion/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/vegafusion-python/vegafusion/runtime.py b/vegafusion-python/vegafusion/runtime.py index ae63ae415..946f4523b 100644 --- a/vegafusion-python/vegafusion/runtime.py +++ b/vegafusion-python/vegafusion/runtime.py @@ -1,17 +1,42 @@ +from __future__ import annotations + import sys -from typing import TypedDict, List, Literal, Any, Union, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal, TypedDict, Union, cast import psutil +from vegafusion.datasource.datasource import Datasource +from vegafusion.transformer import DataFrameLike + from .connection import SqlConnection from .dataset import SqlDataset -from .datasource import PandasDatasource, DfiDatasource, PyArrowDatasource +from .datasource import DfiDatasource, PandasDatasource, PyArrowDatasource from .local_tz import get_local_tz if TYPE_CHECKING: + import pyarrow as pa from duckdb import DuckDBPyConnection + from grpc import Channel + + from vegafusion._vegafusion import PyChartState, PyVegaFusionRuntime + +# This type isn't defined in the grpcio package, so let's at least name it +UnaryUnaryMultiCallable = Any + + +def _all_datasets_have_type( + inline_datasets: dict[str, Any] | None, types: tuple[type, ...] +) -> bool: + """ + Check if all datasets in inline_datasets are instances of the given types. + + Args: + inline_datasets: A dictionary of inline datasets. + types: A tuple of types to check against. -def _all_datasets_have_type(inline_datasets, types): + Returns: + bool: True if all datasets are instances of the given types, False otherwise. + """ if not inline_datasets: # If there are no inline datasets, return false # (we want the default pandas behavior in this case) @@ -26,81 +51,115 @@ def _all_datasets_have_type(inline_datasets, types): class VariableUpdate(TypedDict): name: str namespace: Literal["data", "signal"] - scope: List[int] + scope: list[int] value: Any class Watch(TypedDict): name: str namespace: Literal["data", "signal"] - scope: List[int] + scope: list[int] class WatchPlan(TypedDict): - client_to_server: List[Watch] - server_to_client: List[Watch] + client_to_server: list[Watch] + server_to_client: list[Watch] class PreTransformWarning(TypedDict): - type: str + type: Literal["RowLimitExceeded", "BrokenInteractivity", "Unsupported"] message: str class ChartState: - def __init__(self, chart_state): + def __init__(self, chart_state: PyChartState) -> None: self._chart_state = chart_state - def update(self, client_updates: List[VariableUpdate]) -> List[VariableUpdate]: - """Update chart state with updates from the client + def update(self, client_updates: list[VariableUpdate]) -> list[VariableUpdate]: + """ + Update chart state with updates from the client. + + Args: + client_updates: List of VariableUpdate values from the client. - :param client_updates: List of VariableUpdate values from the client - :return: list of VariableUpdates that should be pushed to the client + Returns: + list of VariableUpdates that should be pushed to the client. """ - return self._chart_state.update(client_updates) + return cast(list[VariableUpdate], self._chart_state.update(client_updates)) def get_watch_plan(self) -> WatchPlan: - """Get ChartState's watch plan + """ + Get ChartState's watch plan. - The watch plan specifies the signals and datasets that should be communicated - between ChartState and client to preserve the input Vega spec's interactivity - :return: WatchPlan + Returns: + WatchPlan specifying the signals and datasets that should be communicated + between ChartState and client to preserve the input Vega spec's + interactivity. """ - return self._chart_state.get_watch_plan() + return cast(WatchPlan, self._chart_state.get_watch_plan()) - def get_transformed_spec(self) -> dict: - """Get initial transformed spec + def get_transformed_spec(self) -> dict[str, Any]: + """ + Get initial transformed spec. - Get the initial transformed spec. This is equivalent to the spec that would - be produced by vf.runtime.pre_transform_spec() + Returns: + The initial transformed spec, equivalent to the spec produced by + vf.runtime.pre_transform_spec(). """ - return self._chart_state.get_transformed_spec() + return cast(dict[str, Any], self._chart_state.get_transformed_spec()) - def get_warnings(self) -> List[PreTransformWarning]: + def get_warnings(self) -> list[PreTransformWarning]: """Get transformed spec warnings - :return: A list of warnings as dictionaries. Each warning dict has a 'type' - key indicating the warning type, and a 'message' key containing - a description of the warning. Potential warning types include: - 'RowLimitExceeded': Some datasets in resulting Vega specification - have been truncated to the provided row limit - 'BrokenInteractivity': Some interactive features may have been - broken in the resulting Vega specification - 'Unsupported': No transforms in the provided Vega specification were - eligible for pre-transforming + Returns: + list[PreTransformWarning]: A list of warnings as dictionaries. + Each warning dict has a 'type' key indicating the warning type, + and a 'message' key containing a description of the warning. + + Potential warning types include: + 'RowLimitExceeded': Some datasets in resulting Vega specification + have been truncated to the provided row limit + 'BrokenInteractivity': Some interactive features may have been + broken in the resulting Vega specification + 'Unsupported': No transforms in the provided Vega specification were + eligible for pre-transforming + """ + return cast(list[PreTransformWarning], self._chart_state.get_warnings()) + + def get_server_spec(self) -> dict[str, Any]: + """ + Returns: + dict: The server spec. """ - return self._chart_state.get_warnings() + return cast(dict[str, Any], self._chart_state.get_server_spec()) - def get_server_spec(self) -> dict: - """Get server spec""" - return self._chart_state.get_server_spec() + def get_client_spec(self) -> dict[str, Any]: + """ + Get client spec. - def get_client_spec(self) -> dict: - """Get client spec""" - return self._chart_state.get_client_spec() + Returns: + dict: The client spec. + """ + return cast(dict[str, Any], self._chart_state.get_client_spec()) class VegaFusionRuntime: - def __init__(self, cache_capacity, memory_limit, worker_threads, connection=None): + def __init__( + self, + cache_capacity: int, + memory_limit: int, + worker_threads: int, + connection: SqlConnection | None = None, + ) -> None: + """ + Initialize a VegaFusionRuntime. + + Args: + cache_capacity: Cache capacity. + memory_limit: Memory limit. + worker_threads: Number of worker threads. + connection: SQL connection (optional). + """ self._embedded_runtime = None self._grpc_channel = None self._grpc_query = None @@ -110,31 +169,48 @@ def __init__(self, cache_capacity, memory_limit, worker_threads, connection=None self._connection = connection @property - def embedded_runtime(self): + def embedded_runtime(self) -> PyVegaFusionRuntime: + """ + Get or initialize the embedded runtime. + + Returns: + The embedded runtime. + """ if self._embedded_runtime is None: # Try to initialize an embedded runtime from vegafusion._vegafusion import PyVegaFusionRuntime + self._embedded_runtime = PyVegaFusionRuntime( - self.cache_capacity, self.memory_limit, self.worker_threads, connection=self._connection + self.cache_capacity, + self.memory_limit, + self.worker_threads, + connection=self._connection, ) return self._embedded_runtime - def set_connection(self, connection: Union[str, SqlConnection, "DuckDBPyConnection"] = "datafusion"): + def set_connection( + self, + connection: Literal["datafusion", "duckdb"] + | SqlConnection + | DuckDBPyConnection + | None = "datafusion", + ) -> None: """ Sets the connection to use to evaluate Vega data transformations. - Named tables returned by the connection's `tables` method may be referenced in Vega/Altair - chart specifications using special dataset URLs. For example, if the connection's `tables` - method returns a dictionary that includes "tableA" as a key, then this table may be - referenced in a chart specification using the URL "table://tableA" or - "vegafusion+dataset://tableA". - - :param connection: One of: - - An instance of vegafusion.connection.SqlConnection - - An instance of a duckdb connection - - A string, one of: - - "datafusion" (default) - - "duckdb" + Named tables returned by the connection's `tables` method may be referenced in + Vega/Altair chart specifications using special dataset URLs. For example, if the + connection's `tables` method returns a dictionary that includes "tableA" as a + key, then this table may be referenced in a chart specification using the URL + "table://tableA" or "vegafusion+dataset://tableA". + + Args: + connection: One of: + - An instance of vegafusion.connection.SqlConnection + - An instance of a duckdb connection + - A string, one of: + - "datafusion" (default) + - "duckdb" """ # Don't import duckdb unless it's already loaded. If it's not loaded, # then the input connection can't be a duckdb connection. @@ -145,11 +221,13 @@ def set_connection(self, connection: Union[str, SqlConnection, "DuckDBPyConnecti connection = None elif connection == "duckdb": from vegafusion.connection.duckdb import DuckDbConnection + connection = DuckDbConnection() else: raise ValueError(f"Unsupported connection name: {connection}") elif duckdb is not None and isinstance(connection, duckdb.DuckDBPyConnection): from vegafusion.connection.duckdb import DuckDbConnection + connection = DuckDbConnection(connection) elif not isinstance(connection, SqlConnection): raise ValueError( @@ -160,46 +238,83 @@ def set_connection(self, connection: Union[str, SqlConnection, "DuckDBPyConnecti self._connection = connection self.reset() - def grpc_connect(self, channel): + def grpc_connect(self, channel: Channel) -> None: """ Connect to a VegaFusion server over gRPC using the provided gRPC channel - :param channel: grpc.Channel instance configured with the address of a running VegaFusion server + Args: + channel: grpc.Channel instance configured with the address of a running + VegaFusion server """ - # TODO: check channel type self._grpc_channel = channel @property - def using_grpc(self): + def using_grpc(self) -> bool: + """ + Check if using gRPC. + + Returns: + True if using gRPC, False otherwise. + """ return self._grpc_channel is not None @property - def grpc_query(self): + def grpc_query(self) -> UnaryUnaryMultiCallable: + """ + Get the gRPC query object. + + Returns: + The gRPC query object. + + Raises: + ValueError: If no gRPC channel is registered. + """ if self._grpc_channel is None: raise ValueError( - "No grpc channel registered. Use runtime.grpc_connect to provide a grpc channel" + "No grpc channel registered. Use runtime.grpc_connect to provide " + "a grpc channel" ) if self._grpc_query is None: self._grpc_query = self._grpc_channel.unary_unary( - '/services.VegaFusionRuntime/TaskGraphQuery', + "/services.VegaFusionRuntime/TaskGraphQuery", ) return self._grpc_query - def process_request_bytes(self, request): + def process_request_bytes(self, request: bytes) -> bytes: + """ + Process a request in bytes format. + + Args: + request: The request in bytes format. + + Returns: + The processed request in bytes format. + """ if self._grpc_channel: return self.grpc_query(request) else: # No grpc channel, get or initialize an embedded runtime - return self.embedded_runtime.process_request_bytes(request) + return cast(bytes, self.embedded_runtime.process_request_bytes(request)) + + def _import_or_register_inline_datasets( + self, inline_datasets: dict[str, DataFrameLike] | None = None + ) -> dict[str, Datasource | SqlDataset]: + """ + Import or register inline datasets. - def _import_or_register_inline_datasets(self, inline_datasets=None): + Args: + inline_datasets: A dictionary from dataset names to pandas DataFrames or + pyarrow Tables. Inline datasets may be referenced by the input + specification using the following url syntax + 'vegafusion+dataset://{dataset_name}' or 'table://{dataset_name}'. + """ pl = sys.modules.get("polars", None) pa = sys.modules.get("pyarrow", None) pd = sys.modules.get("pandas", None) - inline_datasets = inline_datasets or dict() - imported_inline_datasets = dict() + inline_datasets = inline_datasets or {} + imported_inline_datasets: dict[str, Datasource | SqlDataset] = {} for name, value in inline_datasets.items(): if isinstance(value, SqlDataset): imported_inline_datasets[name] = value @@ -214,8 +329,9 @@ def _import_or_register_inline_datasets(self, inline_datasets=None): imported_inline_datasets[name] = PandasDatasource(value) elif hasattr(value, "__dataframe__"): - # Let polars convert to pyarrow since it has broader support than the raw dataframe interchange - # protocol, and "This operation is mostly zero copy." + # Let polars convert to pyarrow since it has broader support than the + # raw dataframe interchange protocol, and "This operation is mostly + # zero copy." try: if pl is not None and isinstance(value, pl.DataFrame): value = value.to_arrow() @@ -239,131 +355,143 @@ def _import_or_register_inline_datasets(self, inline_datasets=None): return imported_inline_datasets def build_pre_transform_spec_plan( - self, - spec, - preserve_interactivity=True, - keep_signals=None, - keep_datasets=None, - ): + self, + spec: dict[str, Any] | str, + preserve_interactivity: bool = True, + keep_signals: list[str | tuple[str, list[int]]] | None = None, + keep_datasets: list[str | tuple[str, list[int]]] | None = None, + ) -> dict[str, Any]: """ Diagnostic function that returns the plan used by the pre_transform_spec method - :param spec: A Vega specification dict or JSON string - :param preserve_interactivity: If True (default) then the interactive behavior of - the chart will pre preserved. This requires that all the data that participates - in interactions be included in the resulting spec rather than being pre-transformed. - If False, then all possible data transformations are applied even if they break - the original interactive behavior of the chart. - :param keep_signals: Signals from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level signal as a string - - A two-element tuple where the first element is the name of a signal as a string - and the second element is the nested scope of the dataset as a list of integers - :param keep_datasets: Datasets from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset as a string - and the second element is the nested scope of the dataset as a list of integers - :return: - dict with keys: + Args: + spec: A Vega specification dict or JSON string. + preserve_interactivity: If True (default), the interactive behavior of the + chart will be preserved. This requires that all the data that + participates in interactions be included in the resulting spec rather + than being pre-transformed. If False, all possible data transformations + are applied even if they break the original interactive behavior of the + chart. + keep_signals: Signals from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level signal as a string + - A two-element tuple where the first element is the name of a signal + as a string and the second element is the nested scope of the dataset + as a list of integers + keep_datasets: Datasets from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level dataset as a string + - A two-element tuple where the first element is the name of a dataset + as a string and the second element is the nested scope of the dataset + as a list of integers + + Returns: + dict: A dictionary with the following keys: - "client_spec": Planned client spec - - "server_spec: Planned server spec + - "server_spec": Planned server spec - "comm_plan": Communication plan - "warnings": List of planner warnings """ if self._grpc_channel: - raise ValueError("build_pre_transform_spec_plan not yet supported over gRPC") + raise ValueError( + "build_pre_transform_spec_plan not yet supported over gRPC" + ) else: - # Parse input keep signals and datasets - keep_signals = parse_variables(keep_signals) - keep_datasets = parse_variables(keep_datasets) - return self.embedded_runtime.build_pre_transform_spec_plan( + plan = self.embedded_runtime.build_pre_transform_spec_plan( spec, preserve_interactivity=preserve_interactivity, - keep_signals=keep_signals, - keep_datasets=keep_datasets, + keep_signals=parse_variables(keep_signals), + keep_datasets=parse_variables(keep_datasets), ) + return cast(dict[str, Any], plan) def pre_transform_spec( self, - spec, - local_tz=None, - default_input_tz=None, - row_limit=None, - preserve_interactivity=True, - inline_datasets=None, - keep_signals=None, - keep_datasets=None, - data_encoding_threshold=None, - data_encoding_format="pyarrow", - ): - """ - Evaluate supported transforms in an input Vega specification and produce a new - specification with pre-transformed datasets included inline. - - :param spec: A Vega specification dict or JSON string - :param local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. - Defaults to the value of vf.get_local_tz(), which defaults to the system timezone - if one can be determined. - :param default_input_tz: Name of timezone (e.g. 'America/New_York') that naive datetime - strings should be interpreted in. Defaults to `local_tz`. - :param row_limit: Maximum number of dataset rows to include in the returned - specification. If exceeded, datasets will be truncated to this number of rows - and a RowLimitExceeded warning will be included in the resulting warnings list - :param preserve_interactivity: If True (default) then the interactive behavior of - the chart will pre preserved. This requires that all the data that participates - in interactions be included in the resulting spec rather than being pre-transformed. - If False, then all possible data transformations are applied even if they break - the original interactive behavior of the chart. - :param inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow - Tables. Inline datasets may be referenced by the input specification using - the following url syntax 'vegafusion+dataset://{dataset_name}' or - 'table://{dataset_name}'. - :param keep_signals: Signals from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level signal as a string - - A two-element tuple where the first element is the name of a signal as a string - and the second element is the nested scope of the dataset as a list of integers - :param keep_datasets: Datasets from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset as a string - and the second element is the nested scope of the dataset as a list of integers - :param data_encoding_threshold: threshold for encoding datasets - When length of pre-transformed datasets exceeds data_encoding_threshold, datasets - are encoded into an alternative format (as determined by the data_encoding_format - argument). When None (the default), pre-transformed datasets are never encoded and - are always included as JSON compatible lists of dictionaries. - :param data_encoding_format: format of encoded datasets - Format to use to encode datasets with length exceeding the data_encoding_threshold - argument. + spec: Union[dict[str, Any], str], + local_tz: str | None = None, + default_input_tz: str | None = None, + row_limit: int | None = None, + preserve_interactivity: bool = True, + inline_datasets: dict[str, Any] | None = None, + keep_signals: list[Union[str, tuple[str, list[int]]]] | None = None, + keep_datasets: list[Union[str, tuple[str, list[int]]]] | None = None, + data_encoding_threshold: int | None = None, + data_encoding_format: str = "pyarrow", + ) -> tuple[Union[dict[str, Any], str], list[dict[str, str]]]: + """ + Evaluate supported transforms in an input Vega specification + + Produces a new specification with pre-transformed datasets included inline. + + Args: + spec: A Vega specification dict or JSON string + local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. + Defaults to the value of vf.get_local_tz(), which defaults to the system + timezone if one can be determined. + default_input_tz: Name of timezone (e.g. 'America/New_York') that naive + datetime strings should be interpreted in. Defaults to `local_tz`. + row_limit: Maximum number of dataset rows to include in the returned + specification. If exceeded, datasets will be truncated to this number + of rows and a RowLimitExceeded warning will be included in the + resulting warnings list + preserve_interactivity: If True (default) then the interactive behavior of + the chart will pre preserved. This requires that all the data that + participates in interactions be included in the resulting spec rather + than being pre-transformed. If False, then all possible data + transformations are applied even if they break the original interactive + behavior of the chart. + inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow + Tables. Inline datasets may be referenced by the input specification + using the following url syntax 'vegafusion+dataset://{dataset_name}' or + 'table://{dataset_name}'. + keep_signals: Signals from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level signal as a string + - A two-element tuple where the first element is the name of a signal + as a string and the second element is the nested scope of the dataset + as a list of integers + keep_datasets: Datasets from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level dataset as a string + - A two-element tuple where the first element is the name of a dataset + as a string and the second element is the nested scope of the dataset + as a list of integers + data_encoding_threshold: threshold for encoding datasets. When length of + pre-transformed datasets exceeds data_encoding_threshold, datasets are + encoded into an alternative format (as determined by the + data_encoding_format argument). When None (the default), + pre-transformed datasets are never encoded and are always included as + JSON compatible lists of dictionaries. + data_encoding_format: format of encoded datasets. Format to use to encode + datasets with length exceeding the data_encoding_threshold argument. - "pyarrow": Encode datasets as pyarrow Tables. Not JSON compatible. - - "arrow-ipc": Encode datasets as bytes in Arrow IPC format. Not JSON compatible. - - "arrow-ipc-base64": Encode datasets as strings in base64 encoded Arrow IPC format. - JSON compatible. - :return: - Two-element tuple: - 0. A string containing the JSON representation of a Vega specification - with pre-transformed datasets included inline - 1. A list of warnings as dictionaries. Each warning dict has a 'type' - key indicating the warning type, and a 'message' key containing - a description of the warning. Potential warning types include: - 'RowLimitExceeded': Some datasets in resulting Vega specification - have been truncated to the provided row limit - 'BrokenInteractivity': Some interactive features may have been - broken in the resulting Vega specification - 'Unsupported': No transforms in the provided Vega specification were - eligible for pre-transforming + - "arrow-ipc": Encode datasets as bytes in Arrow IPC format. Not JSON + compatible. + - "arrow-ipc-base64": Encode datasets as strings in base64 encoded + Arrow IPC format. JSON compatible. + + Returns: + A tuple containing: + - A string containing the JSON representation of a Vega specification + with pre-transformed datasets included inline + - A list of warnings as dictionaries. Each warning dict has a 'type' + key indicating the warning type, and a 'message' key containing + a description of the warning. Potential warning types include: + 'RowLimitExceeded': Some datasets in resulting Vega specification + have been truncated to the provided row limit + 'BrokenInteractivity': Some interactive features may have been + broken in the resulting Vega specification + 'Unsupported': No transforms in the provided Vega specification were + eligible for pre-transforming """ if self._grpc_channel: raise ValueError("pre_transform_spec not yet supported over gRPC") else: local_tz = local_tz or get_local_tz() - imported_inline_dataset = self._import_or_register_inline_datasets(inline_datasets) + imported_inline_dataset = self._import_or_register_inline_datasets( + inline_datasets + ) - # Parse input keep signals and datasets - keep_signals = parse_variables(keep_signals) - keep_datasets = parse_variables(keep_datasets) try: if data_encoding_threshold is None: new_spec, warnings = self.embedded_runtime.pre_transform_spec( @@ -373,26 +501,28 @@ def pre_transform_spec( row_limit=row_limit, preserve_interactivity=preserve_interactivity, inline_datasets=imported_inline_dataset, - keep_signals=keep_signals, - keep_datasets=keep_datasets, + keep_signals=parse_variables(keep_signals), + keep_datasets=parse_variables(keep_datasets), ) else: # Use pre_transform_extract to extract large datasets - new_spec, datasets, warnings = self.embedded_runtime.pre_transform_extract( - spec, - local_tz=local_tz, - default_input_tz=default_input_tz, - preserve_interactivity=preserve_interactivity, - extract_threshold=data_encoding_threshold, - extracted_format=data_encoding_format, - inline_datasets=imported_inline_dataset, - keep_signals=keep_signals, - keep_datasets=keep_datasets, + new_spec, datasets, warnings = ( + self.embedded_runtime.pre_transform_extract( + spec, + local_tz=local_tz, + default_input_tz=default_input_tz, + preserve_interactivity=preserve_interactivity, + extract_threshold=data_encoding_threshold, + extracted_format=data_encoding_format, + inline_datasets=imported_inline_dataset, + keep_signals=parse_variables(keep_signals), + keep_datasets=parse_variables(keep_datasets), + ) ) # Insert encoded datasets back into spec - for (name, scope, tbl) in datasets: - group = get_mark_group_for_scope(new_spec, scope) + for name, scope, tbl in datasets: + group = get_mark_group_for_scope(new_spec, scope) or {} for data in group.get("data", []): if data.get("name", None) == name: data["values"] = tbl @@ -405,71 +535,88 @@ def pre_transform_spec( return new_spec, warnings def new_chart_state( - self, spec, local_tz=None, default_input_tz=None, row_limit=None, inline_datasets=None + self, + spec: Union[dict[str, Any], str], + local_tz: str | None = None, + default_input_tz: str | None = None, + row_limit: int | None = None, + inline_datasets: dict[str, DataFrameLike] | None = None, ) -> ChartState: - """ - Construct new ChartState object - - :param spec: A Vega specification dict or JSON string - :param local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. - Defaults to the value of vf.get_local_tz(), which defaults to the system timezone - if one can be determined. - :param default_input_tz: Name of timezone (e.g. 'America/New_York') that naive datetime - strings should be interpreted in. Defaults to `local_tz`. - :param row_limit: Maximum number of dataset rows to include in the returned - datasets. If exceeded, datasets will be truncated to this number of rows - and a RowLimitExceeded warning will be included in the ChartState's warnings list - :param inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow - Tables. Inline datasets may be referenced by the input specification using - the following url syntax 'vegafusion+dataset://{dataset_name}' or - 'table://{dataset_name}'. - :return: ChartState + """Construct new ChartState object. + + Args: + spec: A Vega specification dict or JSON string. + local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. + Defaults to the value of vf.get_local_tz(), which defaults to the system + timezone if one can be determined. + default_input_tz: Name of timezone (e.g. 'America/New_York') that naive + datetime strings should be interpreted in. Defaults to `local_tz`. + row_limit: Maximum number of dataset rows to include in the returned + datasets. If exceeded, datasets will be truncated to this number of + rows and a RowLimitExceeded warning will be included in the ChartState's + warnings list. + inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow + Tables. Inline datasets may be referenced by the input specification + using the following url syntax 'vegafusion+dataset://{dataset_name}' or + 'table://{dataset_name}'. + + Returns: + ChartState object. """ if self._grpc_channel: raise ValueError("new_chart_state not yet supported over gRPC") else: local_tz = local_tz or get_local_tz() - inline_arrow_dataset = self._import_or_register_inline_datasets(inline_datasets) + inline_arrow_dataset = self._import_or_register_inline_datasets( + inline_datasets + ) return ChartState( - self.embedded_runtime.new_chart_state(spec, local_tz, default_input_tz, row_limit, inline_arrow_dataset) + self.embedded_runtime.new_chart_state( + spec, local_tz, default_input_tz, row_limit, inline_arrow_dataset + ) ) def pre_transform_datasets( self, - spec, - datasets, - local_tz=None, - default_input_tz=None, - row_limit=None, - inline_datasets=None - ): - """ - Extract the fully evaluated form of the requested datasets from a Vega specification - as pandas DataFrames. - - :param spec: A Vega specification dict or JSON string - :param datasets: A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset as a string - and the second element is the nested scope of the dataset as a list of integers - :param local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. - Defaults to the value of vf.get_local_tz(), which defaults to the system timezone - if one can be determined. - :param default_input_tz: Name of timezone (e.g. 'America/New_York') that naive datetime - strings should be interpreted in. Defaults to `local_tz`. - :param row_limit: Maximum number of dataset rows to include in the returned - datasets. If exceeded, datasets will be truncated to this number of rows - and a RowLimitExceeded warning will be included in the resulting warnings list - :param inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow - Tables. Inline datasets may be referenced by the input specification using - the following url syntax 'vegafusion+dataset://{dataset_name}' or - 'table://{dataset_name}'. - :return: - Two-element tuple: - 0. List of pandas DataFrames corresponding to the input datasets list - 1. A list of warnings as dictionaries. Each warning dict has a 'type' - key indicating the warning type, and a 'message' key containing - a description of the warning. + spec: Union[dict[str, Any], str], + datasets: list[Union[str, tuple[str, list[int]]]], + local_tz: str | None = None, + default_input_tz: str | None = None, + row_limit: int | None = None, + inline_datasets: dict[str, DataFrameLike] | None = None, + ) -> tuple[list[DataFrameLike], list[dict[str, str]]]: + """Extract the fully evaluated form of the requested datasets from a Vega + specification. + + Extracts datasets as pandas DataFrames. + + Args: + spec: A Vega specification dict or JSON string. + datasets: A list with elements that are either: + - The name of a top-level dataset as a string + - A two-element tuple where the first element is the name of a dataset + as a string and the second element is the nested scope of the dataset + as a list of integers + local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. + Defaults to the value of vf.get_local_tz(), which defaults to the + system timezone if one can be determined. + default_input_tz: Name of timezone (e.g. 'America/New_York') that naive + datetime strings should be interpreted in. Defaults to `local_tz`. + row_limit: Maximum number of dataset rows to include in the returned + datasets. If exceeded, datasets will be truncated to this number of + rows and a RowLimitExceeded warning will be included in the resulting + warnings list. + inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow + Tables. Inline datasets may be referenced by the input specification + using the following url syntax 'vegafusion+dataset://{dataset_name}' + or 'table://{dataset_name}'. + + Returns: + A tuple containing: + - List of pandas DataFrames corresponding to the input datasets list + - A list of warnings as dictionaries. Each warning dict has a 'type' + key indicating the warning type, and a 'message' key containing a + description of the warning. """ if self._grpc_channel: raise ValueError("pre_transform_datasets not yet supported over gRPC") @@ -480,7 +627,9 @@ def pre_transform_datasets( pre_tx_vars = parse_variables(datasets) # Serialize inline datasets - inline_arrow_dataset = self._import_or_register_inline_datasets(inline_datasets) + inline_arrow_dataset = self._import_or_register_inline_datasets( + inline_datasets + ) try: values, warnings = self.embedded_runtime.pre_transform_datasets( spec, @@ -488,7 +637,7 @@ def pre_transform_datasets( local_tz=local_tz, default_input_tz=default_input_tz, row_limit=row_limit, - inline_datasets=inline_arrow_dataset + inline_datasets=inline_arrow_dataset, ) finally: # Clean up registered tables (both inline and internal temporary tables) @@ -497,16 +646,25 @@ def pre_transform_datasets( pl = sys.modules.get("polars", None) pa = sys.modules.get("pyarrow", None) - if pl is not None and _all_datasets_have_type(inline_datasets, (pl.DataFrame, pl.LazyFrame)): + if pl is not None and _all_datasets_have_type( + inline_datasets, (pl.DataFrame, pl.LazyFrame) + ): + if TYPE_CHECKING: + import polars as pl + # Deserialize values to Polars tables - datasets = [pl.from_arrow(value) for value in values] + pl_dataframes = [pl.from_arrow(value) for value in values] # Localize datetime columns to UTC processed_datasets = [] - for df in datasets: + for df in pl_dataframes: for name, dtype in zip(df.columns, df.dtypes): if dtype == pl.Datetime: - df = df.with_columns(df[name].dt.replace_time_zone("UTC").dt.convert_time_zone(local_tz)) + df = df.with_columns( + df[name] + .dt.replace_time_zone("UTC") + .dt.convert_time_zone(local_tz) + ) processed_datasets.append(df) return processed_datasets, warnings @@ -520,89 +678,103 @@ def pre_transform_datasets( for df in datasets: for name, dtype in df.dtypes.items(): if dtype.kind == "M": - df[name] = df[name].dt.tz_localize("UTC").dt.tz_convert(local_tz) + df[name] = ( + df[name].dt.tz_localize("UTC").dt.tz_convert(local_tz) + ) return datasets, warnings def pre_transform_extract( self, - spec, - local_tz=None, - default_input_tz=None, - preserve_interactivity=True, - extract_threshold=20, - extracted_format="pyarrow", - inline_datasets=None, - keep_signals=None, - keep_datasets=None, - ): - """ - Evaluate supported transforms in an input Vega specification and produce a new - specification with small pre-transformed datasets (under 100 rows) included inline - and larger inline datasets (100 rows or more) are extracted into pyarrow tables. - - :param spec: A Vega specification dict or JSON string - :param local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. - Defaults to the value of vf.get_local_tz(), which defaults to the system timezone - if one can be determined. - :param default_input_tz: Name of timezone (e.g. 'America/New_York') that naive datetime - strings should be interpreted in. Defaults to `local_tz`. - :param preserve_interactivity: If True (default) then the interactive behavior of - the chart will pre preserved. This requires that all the data that participates - in interactions be included in the resulting spec rather than being pre-transformed. - If False, then all possible data transformations are applied even if they break - the original interactive behavior of the chart. - :param extract_threshold: Datasets with length below extract_threshold will be - inlined - :param extracted_format: The format for the extracted datasets - The format for extracted datasets: + spec: dict[str, Any] | str, + local_tz: str | None = None, + default_input_tz: str | None = None, + preserve_interactivity: bool = True, + extract_threshold: int = 20, + extracted_format: str = "pyarrow", + inline_datasets: dict[str, DataFrameLike] | None = None, + keep_signals: list[str | tuple[str, list[int]]] | None = None, + keep_datasets: list[str | tuple[str, list[int]]] | None = None, + ) -> tuple[ + dict[str, Any], list[tuple[str, list[int], pa.Table]], list[dict[str, str]] + ]: + """ + Evaluate supported transforms in an input Vega specification. + + Produces a new specification with small pre-transformed datasets (under 100 + rows) included inline and larger inline datasets (100 rows or more) extracted + into pyarrow tables. + + Args: + spec: A Vega specification dict or JSON string. + local_tz: Name of timezone to be considered local. E.g. 'America/New_York'. + Defaults to the value of vf.get_local_tz(), which defaults to the system + timezone if one can be determined. + default_input_tz: Name of timezone (e.g. 'America/New_York') that naive + datetime strings should be interpreted in. Defaults to `local_tz`. + preserve_interactivity: If True (default) then the interactive behavior of + the chart will pre preserved. This requires that all the data that + participates in interactions be included in the resulting spec rather + than being pre-transformed. If False, then all possible data + transformations are applied even if they break the original interactive + behavior of the chart. + extract_threshold: Datasets with length below extract_threshold will be + inlined. + extracted_format: The format for the extracted datasets. Options are: - "pyarrow": pyarrow.Table - "arrow-ipc": bytes in arrow IPC format - "arrow-ipc-base64": base64 encoded arrow IPC format - :param inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow - Tables. Inline datasets may be referenced by the input specification using - the following url syntax 'vegafusion+dataset://{dataset_name}' or - 'table://{dataset_name}'. - :param keep_signals: Signals from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level signal as a string - - A two-element tuple where the first element is the name of a signal as a string - and the second element is the nested scope of the dataset as a list of integers - :param keep_datasets: Datasets from the input spec that must be included in the - pre-transformed spec. A list with elements that are either: - - The name of a top-level dataset as a string - - A two-element tuple where the first element is the name of a dataset as a string - and the second element is the nested scope of the dataset as a list of integers - :return: - Three-element tuple: - 0. A dict containing the JSON representation of the pre-transformed Vega - specification without pre-transformed datasets included inline - 1. Extracted datasets as a list of three element tuples - 0. dataset name - 1. dataset scope - 2. pyarrow Table - 2. A list of warnings as dictionaries. Each warning dict has a 'type' - key indicating the warning type, and a 'message' key containing - a description of the warning. Potential warning types include: - 'Planner': Planner warning + inline_datasets: A dict from dataset names to pandas DataFrames or pyarrow + Tables. Inline datasets may be referenced by the input specification + using the following url syntax 'vegafusion+dataset://{dataset_name}' or + 'table://{dataset_name}'. + keep_signals: Signals from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level signal as a string + - A two-element tuple where the first element is the name of a signal as + a string and the second element is the nested scope of the dataset as + a list of integers + keep_datasets: Datasets from the input spec that must be included in the + pre-transformed spec. A list with elements that are either: + - The name of a top-level dataset as a string + - A two-element tuple where the first element is the name of a dataset + as a string and the second element is the nested scope of the dataset + as a list of integers + + Returns: + A tuple containing three elements: + 1. A dict containing the JSON representation of the pre-transformed Vega + specification without pre-transformed datasets included inline + 2. Extracted datasets as a list of three element tuples: + - dataset name + - dataset scope + - pyarrow Table + 3. A list of warnings as dictionaries. Each warning dict has a 'type' key + indicating the warning type, and a 'message' key containing a description + of the warning. Potential warning types include: + - 'Planner': Planner warning """ if self._grpc_channel: raise ValueError("pre_transform_spec not yet supported over gRPC") else: local_tz = local_tz or get_local_tz() - inline_arrow_dataset = self._import_or_register_inline_datasets(inline_datasets) + inline_arrow_dataset = self._import_or_register_inline_datasets( + inline_datasets + ) try: - new_spec, datasets, warnings = self.embedded_runtime.pre_transform_extract( - spec, - local_tz=local_tz, - default_input_tz=default_input_tz, - preserve_interactivity=preserve_interactivity, - extract_threshold=extract_threshold, - extracted_format=extracted_format, - inline_datasets=inline_arrow_dataset, - keep_signals=keep_signals, - keep_datasets=keep_datasets, + new_spec, datasets, warnings = ( + self.embedded_runtime.pre_transform_extract( + spec, + local_tz=local_tz, + default_input_tz=default_input_tz, + preserve_interactivity=preserve_interactivity, + extract_threshold=extract_threshold, + extracted_format=extracted_format, + inline_datasets=inline_arrow_dataset, + keep_signals=keep_signals, + keep_datasets=keep_datasets, + ) ) finally: # Clean up temporary tables @@ -611,20 +783,29 @@ def pre_transform_extract( return new_spec, datasets, warnings - def patch_pre_transformed_spec(self, spec1, pre_transformed_spec1, spec2): + def patch_pre_transformed_spec( + self, + spec1: dict[str, Any] | str, + pre_transformed_spec1: dict[str, Any] | str, + spec2: dict[str, Any] | str, + ) -> dict[str, Any] | None: """ - Attempt to patch a Vega spec was returned by the pre_transform_spec method without - rerunning the pre_transform_spec logic. When possible, this can be significantly - faster than rerunning the pre_transform_spec method. - - :param spec1: The input Vega spec to a prior call to pre_transform_spec - :param pre_transformed_spec1: The prior result of passing spec1 to pre_transform_spec - :param spec2: A Vega spec that is assumed to be a small delta compared to spec1 - - :return: dict or None - If the delta between spec1 and spec2 is in the portions of spec1 that were not - modified by pre_transform_spec, then this delta can be applied cleanly to - pre_transform_spec1 and the result is returned. If the delta cannot be + Attempt to patch a Vega spec returned by the pre_transform_spec method. + + This method tries to patch a Vega spec without rerunning the pre_transform_spec + logic. When possible, this can be significantly faster than rerunning the + pre_transform_spec method. + + Args: + spec1: The input Vega spec to a prior call to pre_transform_spec. + pre_transformed_spec1: The prior result of passing spec1 to + pre_transform_spec. + spec2: A Vega spec that is assumed to be a small delta compared to spec1. + + Returns: + If the delta between spec1 and spec2 is in the portions of spec1 that were + not modified by pre_transform_spec, then this delta can be applied cleanly + to pre_transform_spec1 and the result is returned. If the delta cannot be applied cleanly, None is returned and spec2 should be passed through the pre_transform_spec method. """ @@ -634,113 +815,132 @@ def patch_pre_transformed_spec(self, spec1, pre_transformed_spec1, spec2): pre_transformed_spec2 = self.embedded_runtime.patch_pre_transformed_spec( spec1, pre_transformed_spec1, spec2 ) - return pre_transformed_spec2 + return cast(dict[str, Any], pre_transformed_spec2) @property - def worker_threads(self): + def worker_threads(self) -> int: + """ + Get the number of worker threads for the runtime. + + Returns: + Number of threads for the runtime + """ return self._worker_threads @worker_threads.setter - def worker_threads(self, value): + def worker_threads(self, value: int) -> None: """ Restart the runtime with the specified number of worker threads - :param threads: Number of threads for the new runtime + Args: + value: Number of threads for the new runtime """ if value != self._worker_threads: self._worker_threads = value self.reset() @property - def total_memory(self): + def total_memory(self) -> int | None: if self._embedded_runtime: return self._embedded_runtime.total_memory() else: return None @property - def _protected_memory(self): + def _protected_memory(self) -> int | None: if self._embedded_runtime: return self._embedded_runtime.protected_memory() else: return None @property - def _probationary_memory(self): + def _probationary_memory(self) -> int | None: if self._embedded_runtime: return self._embedded_runtime.probationary_memory() else: return None @property - def size(self): + def size(self) -> int | None: if self._embedded_runtime: return self._embedded_runtime.size() else: return None @property - def memory_limit(self): + def memory_limit(self) -> int | None: return self._memory_limit @memory_limit.setter - def memory_limit(self, value): + def memory_limit(self, value: int) -> None: """ Restart the runtime with the specified memory limit - :param threads: Max approximate memory usage of cache + Args: + value: Max approximate memory usage of cache """ if value != self._memory_limit: self._memory_limit = value self.reset() @property - def cache_capacity(self): + def cache_capacity(self) -> int: return self._cache_capacity @cache_capacity.setter - def cache_capacity(self, value): + def cache_capacity(self, value: int) -> None: """ Restart the runtime with the specified cache capacity - :param threads: Max task graph values to cache + Args: + value: Max task graph values to cache """ if value != self._cache_capacity: self._cache_capacity = value self.reset() - def reset(self): + def reset(self) -> None: if self._embedded_runtime is not None: self._embedded_runtime.clear_cache() self._embedded_runtime = None - def __repr__(self): + def __repr__(self) -> str: if self._grpc_channel: return f"VegaFusionRuntime(channel={self._grpc_channel})" else: return ( - f"VegaFusionRuntime(" - f"cache_capacity={self.cache_capacity}, worker_threads={self.worker_threads}" - f")" + f"VegaFusionRuntime(cache_capacity={self.cache_capacity}, " + f"worker_threads={self.worker_threads})" ) -def parse_variables(variables): +def parse_variables( + variables: list[str | tuple[str, list[int]]] | None, +) -> list[tuple[str, list[int]]]: + """ + Parse VegaFusion variables. + + Args: + variables: List of VegaFusion variables. + + Returns: + List of parsed VegaFusion variables. + """ # Build input variables - pre_tx_vars = [] + pre_tx_vars: list[tuple[str, list[int]]] = [] if variables is None: return [] if isinstance(variables, str): variables = [variables] - err_msg = "Elements of variables argument must be strings are two-element tuples" + err_msg = "Elements of variables argument must be strings or two-element tuples" for var in variables: if isinstance(var, str): pre_tx_vars.append((var, [])) elif isinstance(var, (list, tuple)): if len(var) == 2: - pre_tx_vars.append(tuple(var)) + pre_tx_vars.append((var[0], list(var[1]))) else: raise ValueError(err_msg) else: @@ -748,7 +948,9 @@ def parse_variables(variables): return pre_tx_vars -def get_mark_group_for_scope(vega_spec, scope): +def get_mark_group_for_scope( + vega_spec: dict[str, Any], scope: list[int] +) -> dict[str, Any] | None: group = vega_spec # Find group at scope @@ -767,4 +969,5 @@ def get_mark_group_for_scope(vega_spec, scope): return group + runtime = VegaFusionRuntime(64, psutil.virtual_memory().total // 2, psutil.cpu_count()) diff --git a/vegafusion-python/vegafusion/transformer.py b/vegafusion-python/vegafusion/transformer.py index f307b3dc0..a0108eb69 100644 --- a/vegafusion-python/vegafusion/transformer.py +++ b/vegafusion-python/vegafusion/transformer.py @@ -1,24 +1,32 @@ +from __future__ import annotations + import io -import os import sys -import pathlib -from hashlib import sha1 -from tempfile import NamedTemporaryFile -import uuid -from weakref import WeakValueDictionary +from typing import TYPE_CHECKING, Any, Union + +if TYPE_CHECKING: + import pyarrow as pa + import pyarrow.interchange as pi + -DATASET_PREFIXES = ("vegafusion+dataset://", "table://") -BATCH_SIZE = 8096 +DATASET_PREFIXES: tuple[str, ...] = ("vegafusion+dataset://", "table://") +BATCH_SIZE: int = 8096 +DataFrameLike = Any -def to_arrow_table(data): + +def to_arrow_table(data: DataFrameLike) -> pa.Table: """ - Helper to convert a Pandas DataFrame to a PyArrow Table + Helper to convert a pandas DataFrame to a PyArrow Table. + + Args: + data: pandas DataFrame. - :param data: Pandas DataFrame - :return: pyarrow.Table + Returns: + pyarrow.Table: The converted PyArrow Table. """ import pyarrow as pa + pd = sys.modules.get("pandas") # Reset named index(ex) into a column @@ -72,14 +80,21 @@ def to_arrow_table(data): return table -def to_arrow_ipc_bytes(data, stream=False): +def to_arrow_ipc_bytes(data: DataFrameLike, stream: bool = False) -> bytes: """ - Helper to convert a DataFrame to the Arrow IPC binary format + Helper to convert a DataFrame to the Arrow IPC binary format. + + Args: + data: Pandas DataFrame, pyarrow Table, or object that supports + the DataFrame Interchange Protocol. + stream: If True, write IPC Stream format. If False (default), write ipc + file format. - :param data: Pandas DataFrame, pyarrow Table, or object that supports - the DataFrame Interchange Protocol - :param stream: If True, write IPC Stream format. If False (default), write ipc file format. - :return: bytes + Returns: + bytes: The Arrow IPC binary format data. + + Raises: + ValueError: If the input data type is unsupported. """ pa = sys.modules.get("pyarrow", None) pd = sys.modules.get("pandas", None) @@ -95,7 +110,7 @@ def to_arrow_ipc_bytes(data, stream=False): return arrow_table_to_ipc_bytes(table, stream=stream) -def arrow_table_to_ipc_bytes(table, stream=False): +def arrow_table_to_ipc_bytes(table: pa.Table, stream: bool = False) -> bytes: import pyarrow as pa # Next we write the Arrow table as a feather file (The Arrow IPC format on disk). @@ -112,15 +127,15 @@ def arrow_table_to_ipc_bytes(table, stream=False): return bytes_buffer.getvalue() -def to_feather(data, file): +def to_feather(data: DataFrameLike, file: Union[str, io.IOBase]) -> None: """ Helper to convert a Pandas DataFrame to a feather file that is optimized for - use as a VegaFusion data source + use as a VegaFusion data source. - :param data: Pandas DataFrame, pyarrow Table, or object that supports - the DataFrame Interchange Protocol - :param file: File path string or writable file-like object - :return: None + Args: + data: Pandas DataFrame, pyarrow Table, or object that supports + the DataFrame Interchange Protocol. + file: File path string or writable file-like object. """ file_bytes = to_arrow_ipc_bytes(data, stream=False) @@ -132,66 +147,24 @@ def to_feather(data, file): f.write(file_bytes) - -def get_inline_dataset_names(vega_spec): - """ - Get set of the inline datasets names in the provided spec - - :param vega_spec: Vega spec - :return: set of inline dataset names +def import_pyarrow_interchange() -> pi: """ - table_names = set() - for data in vega_spec.get("data", []): - url = data.get("url", "") - for prefix in DATASET_PREFIXES: - if url.startswith(prefix): - name = url[len(prefix):] - table_names.add(name) - - for mark in vega_spec.get("marks", []): - table_names.update(get_inline_dataset_names(mark)) - - return table_names - + Import pyarrow.interchange module. -__inline_tables = WeakValueDictionary() + Returns: + pyarrow.interchange: The pyarrow.interchange module. - -def get_inline_dataset_table(table_name): - return __inline_tables.pop(table_name) - - -def get_inline_datasets_for_spec(vega_spec): - table_names = get_inline_dataset_names(vega_spec) - datasets = {} - for table_name in table_names: - try: - datasets[table_name] = get_inline_dataset_table(table_name) - except KeyError: - # named dataset that was provided by the user - pass - return datasets - - -def is_dataframe_like(data): - pa = sys.modules.get("pyarrow") - pd = sys.modules.get("pandas") - is_pa_table = pa is not None and isinstance(data, pa.Table) - is_pd_table = pd is not None and isinstance(data, pd.DataFrame) - return is_pa_table or is_pd_table or hasattr(data, "__dataframe__") - - -def has_geo_interface(data): - return hasattr(data, "__geo_interface__") - - -def import_pyarrow_interchange(): + Raises: + ImportError: If pyarrow version is less than 11.0.0. + """ try: import pyarrow.interchange as pi + return pi - except ImportError: + except ImportError as e: import pyarrow as pa + raise ImportError( - "Use of the DataFrame Interchange Protocol requires at least version 11.0.0 of pyarrow\n" - f"Found version {pa.__version__}" - ) + "Use of the DataFrame Interchange Protocol requires at least " + f"version 11.0.0 of pyarrow\nFound version {pa.__version__}" + ) from e