Skip to content

Commit

Permalink
Add Python function for column usage
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmmease committed Oct 9, 2024
1 parent 7956756 commit 2d8d116
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 20 deletions.
36 changes: 16 additions & 20 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ platforms = ["osx-arm64", "osx-64", "linux-64", "win-64"]
macos = "12.0"

[tasks]
fmt-rs = "cargo fmt --all"
check-rs-fmt = "cargo fmt --all -- --check"
check-rs-warnings = "export RUSTFLAGS=\"-D warnings\" && cargo check --tests"
check-rs-clippy = "cargo clippy -- -A clippy::borrow_deref_ref"
Expand Down
11 changes: 11 additions & 0 deletions vegafusion-python/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod connection;

use pyo3;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyDict, PyList, PyTuple};
Expand All @@ -19,6 +20,7 @@ use serde_json::json;
use vegafusion_common::data::table::VegaFusionTable;
use vegafusion_core::patch::patch_pre_transformed_spec;
use vegafusion_core::planning::plan::{PlannerConfig, PreTransformSpecWarningSpec, SpecPlan};
use vegafusion_core::planning::projection_pushdown::get_column_usage as rs_get_column_usage;
use vegafusion_core::planning::watch::{ExportUpdateJSON, WatchPlan};
use vegafusion_core::proto::gen::tasks::{TzConfig, Variable};
use vegafusion_core::spec::chart::ChartSpec;
Expand Down Expand Up @@ -614,6 +616,14 @@ impl PyVegaFusionRuntime {
}
}

#[pyfunction]
#[pyo3(signature = (spec))]
pub fn get_column_usage(py: Python, spec: PyObject) -> PyResult<PyObject> {
let spec = parse_json_spec(spec)?;
let usage = rs_get_column_usage(&spec)?;
Ok(pythonize::pythonize(py, &usage)?.into())
}

/// A Python module implemented in Rust. The name of this function must match
/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
/// import the module.
Expand All @@ -622,6 +632,7 @@ fn _vegafusion(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_class::<PyVegaFusionRuntime>()?;
m.add_class::<PySqlConnection>()?;
m.add_class::<PyChartState>()?;
m.add_function(wrap_pyfunction!(get_column_usage, m)?)?;
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
Ok(())
}
Expand Down
17 changes: 17 additions & 0 deletions vegafusion-python/tests/test_get_column_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
from pathlib import Path


import vegafusion as vf

here = Path(__file__).parent

spec_dir = here / ".." / ".." / "vegafusion-runtime" / "tests" / "specs"


def test_get_column_usage():
spec_file = spec_dir / "vegalite" / "concat_marginal_histograms.vg.json"
spec = json.loads(spec_file.read_text("utf8"))
usages = vf.get_column_usage(spec)

assert usages == {"source_0": ["IMDB Rating", "Rotten Tomatoes Rating"]}
3 changes: 3 additions & 0 deletions vegafusion-python/vegafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ._vegafusion import __version__
from .local_tz import get_local_tz, set_local_tz
from .runtime import runtime
from .utils import get_column_usage


def patched_version(distribution_name: str) -> str:
Expand All @@ -20,8 +21,10 @@ def patched_version(distribution_name: str) -> str:
# Patch importlib.metadata.version to handle our dummy package
importlib.metadata.version = patched_version


__all__ = [
"runtime",
"set_local_tz",
"get_local_tz",
"get_column_usage",
]
24 changes: 24 additions & 0 deletions vegafusion-python/vegafusion/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from typing import Any, cast

from ._vegafusion import get_column_usage as _get_column_usage


def get_column_usage(spec: dict[str, Any]) -> dict[str, list[str] | None]:
"""
Compute the columns from each root dataset that are referenced in a
Vega spec.
Args:
spec: Vega spec
Returns:
dict[str, list[str] | None]: Dict from root-level dataset name
to either:
- A list of columns that are referenced in this dataset if this can
be determined precisely
- None if it was not possible to determine the full set of columns
that are referenced from this dataset
"""
return cast("dict[str, list[str] | None]", _get_column_usage(spec))

0 comments on commit 2d8d116

Please sign in to comment.