diff --git a/python/docs/source/examples/bluesky.py b/python/docs/source/examples/bluesky.py index 316a34d31..c260fc960 100644 --- a/python/docs/source/examples/bluesky.py +++ b/python/docs/source/examples/bluesky.py @@ -113,7 +113,7 @@ async def receive_outputs(): # Copied from https://raw.githubusercontent.com/MarshalX/atproto/main/examples/firehose/process_commits.py -def _get_ops_by_type( +def _get_ops_by_type( # noqa: C901, E302 commit: models.ComAtprotoSyncSubscribeRepos.Commit, ) -> dict: # noqa: C901, E302 operation_by_type = { diff --git a/python/mypy.ini b/python/mypy.ini index 07bbced46..dcbe5289f 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -12,4 +12,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-plotly.*] +ignore_missing_imports = True + +[mypy-graphviz.*] ignore_missing_imports = True \ No newline at end of file diff --git a/python/poetry.lock b/python/poetry.lock index a7e817d55..7a7417640 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -722,7 +722,7 @@ develop = ["build", "twine"] name = "graphviz" version = "0.20.1" description = "Simple Python interface for Graphviz" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, @@ -3491,9 +3491,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] +explain = ["graphviz"] plot = ["plotly"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "d8a73b55ee47f657e4f7d51184206b4049c23c70ce517766da7238f16aeb2511" +content-hash = "a690c5a87f12c8dd5dcd1842a32acd3e79b090cc5fc5ccb2ed7e184eafdc644e" diff --git a/python/pyproject.toml b/python/pyproject.toml index a20f6c00c..57fd4fef3 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -41,6 +41,7 @@ plotly = {version = "^5.16.1", optional = true} [tool.poetry.extras] plot = ["plotly"] +explain = ["graphviz"] [tool.poetry.group.dev.dependencies] # Dependencies for building and developing. @@ -61,6 +62,7 @@ pep8-naming = ">=0.12.1" pydocstyle = "^6.3.0" pyupgrade = ">=2.29.1" autoflake = "^2.2.0" +graphviz = { version = "^0.20.1" } [tool.poetry.group.safety] optional = true @@ -76,6 +78,7 @@ optional = true mypy = ">=0.930" pandas-stubs = "^2.0.2" typeguard = ">=2.13.3" +graphviz = { version = "^0.20.1" } [tool.poetry.group.docs] # Dependencies for documentation. diff --git a/python/pysrc/kaskada/_timestream.py b/python/pysrc/kaskada/_timestream.py index efcf6fb84..197689223 100644 --- a/python/pysrc/kaskada/_timestream.py +++ b/python/pysrc/kaskada/_timestream.py @@ -6,6 +6,7 @@ import warnings from datetime import datetime, timedelta from typing import ( + TYPE_CHECKING, Callable, List, Literal, @@ -27,6 +28,9 @@ from ._execution import Execution, ResultIterator, _ExecutionOptions +if TYPE_CHECKING: + import graphviz + #: A literal value that can be used as an argument to a Timestream operation. LiteralValue: TypeAlias = Optional[Union[int, str, float, bool, timedelta, datetime]] @@ -1199,7 +1203,7 @@ def explain( kind: Literal["initial_dfg", "final_dfg", "final_plan"] = "final_plan", results: Optional[Union[kd.results.History, kd.results.Snapshot]] = None, mode: Literal["once", "live"] = "once", - ) -> Union[str, "graphviz.Source"]: + ) -> "graphviz.Source": """Return an explanation of this Timestream will be executed. This is intended for understanding how a given Timestream query will @@ -1212,6 +1216,7 @@ def explain( mode: The execution mode to use. Defaults to `'once'` to produce the results from the currently available data. Use `'live'` to start a standing query that continues to process new data until stopped. + Returns: A GraphViz representation of the execution plan as a string, SVG string, or SVG. Specific representation depends on the `format` argument. @@ -1223,6 +1228,8 @@ def explain( This method is intended for debugging and development purposes only. The API may change in the future. """ + import graphviz + expr = self if not pa.types.is_struct(self.data_type): # The execution engine requires a struct, so wrap this in a record. @@ -1232,17 +1239,9 @@ def explain( results, row_limit=None, max_batch_size=None, mode=mode ) - dot = expr._ffi_expr.plan(kind, options) - try: - import graphviz - - dot = graphviz.Source(dot, engine="dot") - return dot - except ImportError: - raise ValueError( - "`explain` requires `graphviz` python package: " - "install it using pip (or install `kaskada[explain]`)" - ) from None + dot = expr._ffi_expr.explain(kind, options) + dot = graphviz.Source(dot, engine="dot") + return dot def _execute( self, diff --git a/python/pysrc/kaskada/sources/arrow.py b/python/pysrc/kaskada/sources/arrow.py index 41a452e33..3d053e484 100644 --- a/python/pysrc/kaskada/sources/arrow.py +++ b/python/pysrc/kaskada/sources/arrow.py @@ -160,6 +160,7 @@ async def create( """Create a source reading from rows represented as dicts. Args: + rows: The input row(s) as a dictionary or list of dictionaries. time_column: The name of the column containing the time. key_column: The name of the column containing the key. queryable: Whether added rows will be available for running queries.