Skip to content

Commit

Permalink
Switch from RuntimeConfig to RuntimeEnvBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
timsaucer committed Dec 14, 2024
1 parent 05802ab commit 6011e65
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 39 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ It is possible to configure runtime (memory and disk settings) and configuration

```python
runtime = (
RuntimeConfig()
RuntimeEnvBuilder()
.with_disk_manager_os()
.with_fair_spill_pool(10000000)
)
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/db-benchmark/groupby-datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from datafusion import (
col,
functions as f,
RuntimeConfig,
RuntimeEnvBuilder,
SessionConfig,
SessionContext,
)
Expand Down Expand Up @@ -85,7 +85,9 @@ def execute(df):

# create a session context with explicit runtime and config settings
runtime = (
RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(64 * 1024 * 1024 * 1024)
RuntimeEnvBuilder()
.with_disk_manager_os()
.with_fair_spill_pool(64 * 1024 * 1024 * 1024)
)
config = (
SessionConfig()
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/tpch/tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def bench(data_path, query_path):

# create context
# runtime = (
# RuntimeConfig()
# RuntimeEnvBuilder()
# .with_disk_manager_os()
# .with_fair_spill_pool(10000000)
# )
Expand Down
8 changes: 4 additions & 4 deletions docs/source/user-guide/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ Configuration
=============

Let's look at how we can configure DataFusion. When creating a :py:class:`~datafusion.context.SessionContext`, you can pass in
a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.context.RuntimeConfig` object. These two cover a wide range of options.
a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.context.RuntimeEnvBuilder` object. These two cover a wide range of options.

.. code-block:: python
from datafusion import RuntimeConfig, SessionConfig, SessionContext
from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext
# create a session context with default settings
ctx = SessionContext()
print(ctx)
# create a session context with explicit runtime and config settings
runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
config = (
SessionConfig()
.with_create_default_catalog_and_schema(True)
Expand All @@ -48,4 +48,4 @@ a :py:class:`~datafusion.context.SessionConfig` and :py:class:`~datafusion.conte
You can read more about available :py:class:`~datafusion.context.SessionConfig` options in the `rust DataFusion Configuration guide <https://arrow.apache.org/datafusion/user-guide/configs.html>`_,
and about :code:`RuntimeConfig` options in the rust `online API documentation <https://docs.rs/datafusion/latest/datafusion/execution/runtime_env/struct.RuntimeConfig.html>`_.
and about :code:`RuntimeEnvBuilder` options in the rust `online API documentation <https://docs.rs/datafusion/latest/datafusion/execution/runtime_env/struct.RuntimeEnvBuilder.html>`_.
4 changes: 2 additions & 2 deletions examples/create-context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
# specific language governing permissions and limitations
# under the License.

from datafusion import RuntimeConfig, SessionConfig, SessionContext
from datafusion import RuntimeEnvBuilder, SessionConfig, SessionContext

# create a session context with default settings
ctx = SessionContext()
print(ctx)

# create a session context with explicit runtime and config settings
runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
config = (
SessionConfig()
.with_create_default_catalog_and_schema(True)
Expand Down
4 changes: 2 additions & 2 deletions python/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from .context import (
SessionContext,
SessionConfig,
RuntimeConfig,
RuntimeEnvBuilder,
SQLOptions,
)

Expand Down Expand Up @@ -66,7 +66,7 @@
"SessionContext",
"SessionConfig",
"SQLOptions",
"RuntimeConfig",
"RuntimeEnvBuilder",
"Expr",
"ScalarUDF",
"WindowFrame",
Expand Down
55 changes: 33 additions & 22 deletions python/datafusion/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from __future__ import annotations

from ._internal import SessionConfig as SessionConfigInternal
from ._internal import RuntimeConfig as RuntimeConfigInternal
from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal
from ._internal import SQLOptions as SQLOptionsInternal
from ._internal import SessionContext as SessionContextInternal

Expand Down Expand Up @@ -256,56 +256,58 @@ def set(self, key: str, value: str) -> SessionConfig:
return self


class RuntimeConfig:
class RuntimeEnvBuilder:
"""Runtime configuration options."""

def __init__(self) -> None:
"""Create a new :py:class:`RuntimeConfig` with default values."""
self.config_internal = RuntimeConfigInternal()
"""Create a new :py:class:`RuntimeEnvBuilder` with default values."""
self.config_internal = RuntimeEnvBuilderInternal()

def with_disk_manager_disabled(self) -> RuntimeConfig:
def with_disk_manager_disabled(self) -> RuntimeEnvBuilder:
"""Disable the disk manager, attempts to create temporary files will error.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
"""
self.config_internal = self.config_internal.with_disk_manager_disabled()
return self

def with_disk_manager_os(self) -> RuntimeConfig:
def with_disk_manager_os(self) -> RuntimeEnvBuilder:
"""Use the operating system's temporary directory for disk manager.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
"""
self.config_internal = self.config_internal.with_disk_manager_os()
return self

def with_disk_manager_specified(self, *paths: str | pathlib.Path) -> RuntimeConfig:
def with_disk_manager_specified(
self, *paths: str | pathlib.Path
) -> RuntimeEnvBuilder:
"""Use the specified paths for the disk manager's temporary files.
Args:
paths: Paths to use for the disk manager's temporary files.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
"""
paths_list = [str(p) for p in paths]
self.config_internal = self.config_internal.with_disk_manager_specified(
paths_list
)
return self

def with_unbounded_memory_pool(self) -> RuntimeConfig:
def with_unbounded_memory_pool(self) -> RuntimeEnvBuilder:
"""Use an unbounded memory pool.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
"""
self.config_internal = self.config_internal.with_unbounded_memory_pool()
return self

def with_fair_spill_pool(self, size: int) -> RuntimeConfig:
def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder:
"""Use a fair spill pool with the specified size.
This pool works best when you know beforehand the query has multiple spillable
Expand All @@ -326,16 +328,16 @@ def with_fair_spill_pool(self, size: int) -> RuntimeConfig:
size: Size of the memory pool in bytes.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
Examples usage::
config = RuntimeConfig().with_fair_spill_pool(1024)
config = RuntimeEnvBuilder().with_fair_spill_pool(1024)
"""
self.config_internal = self.config_internal.with_fair_spill_pool(size)
return self

def with_greedy_memory_pool(self, size: int) -> RuntimeConfig:
def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder:
"""Use a greedy memory pool with the specified size.
This pool works well for queries that do not need to spill or have a single
Expand All @@ -346,32 +348,39 @@ def with_greedy_memory_pool(self, size: int) -> RuntimeConfig:
size: Size of the memory pool in bytes.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
Example usage::
config = RuntimeConfig().with_greedy_memory_pool(1024)
config = RuntimeEnvBuilder().with_greedy_memory_pool(1024)
"""
self.config_internal = self.config_internal.with_greedy_memory_pool(size)
return self

def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeConfig:
def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder:
"""Use the specified path to create any needed temporary files.
Args:
path: Path to use for temporary files.
Returns:
A new :py:class:`RuntimeConfig` object with the updated setting.
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
Example usage::
config = RuntimeConfig().with_temp_file_path("/tmp")
config = RuntimeEnvBuilder().with_temp_file_path("/tmp")
"""
self.config_internal = self.config_internal.with_temp_file_path(str(path))
return self


@deprecated("Use `RuntimeEnvBuilder` instead.")
class RuntimeConfig(RuntimeEnvBuilder):
"""See `RuntimeEnvBuilder`."""

pass


class SQLOptions:
"""Options to be used when performing SQL queries."""

Expand Down Expand Up @@ -445,7 +454,9 @@ class SessionContext:
"""

def __init__(
self, config: SessionConfig | None = None, runtime: RuntimeConfig | None = None
self,
config: SessionConfig | None = None,
runtime: RuntimeEnvBuilder | None = None,
) -> None:
"""Main interface for executing queries with DataFusion.
Expand Down
10 changes: 5 additions & 5 deletions python/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from datafusion import (
DataFrame,
RuntimeConfig,
RuntimeEnvBuilder,
SessionConfig,
SessionContext,
SQLOptions,
Expand All @@ -43,7 +43,7 @@ def test_create_context_session_config_only():


def test_create_context_runtime_config_only():
SessionContext(runtime=RuntimeConfig())
SessionContext(runtime=RuntimeEnvBuilder())


@pytest.mark.parametrize("path_to_str", (True, False))
Expand All @@ -54,7 +54,7 @@ def test_runtime_configs(tmp_path, path_to_str):
path1 = str(path1) if path_to_str else path1
path2 = str(path2) if path_to_str else path2

runtime = RuntimeConfig().with_disk_manager_specified(path1, path2)
runtime = RuntimeEnvBuilder().with_disk_manager_specified(path1, path2)
config = SessionConfig().with_default_catalog_and_schema("foo", "bar")
ctx = SessionContext(config, runtime)
assert ctx is not None
Expand All @@ -67,7 +67,7 @@ def test_runtime_configs(tmp_path, path_to_str):
def test_temporary_files(tmp_path, path_to_str):
path = str(tmp_path) if path_to_str else tmp_path

runtime = RuntimeConfig().with_temp_file_path(path)
runtime = RuntimeEnvBuilder().with_temp_file_path(path)
config = SessionConfig().with_default_catalog_and_schema("foo", "bar")
ctx = SessionContext(config, runtime)
assert ctx is not None
Expand All @@ -77,7 +77,7 @@ def test_temporary_files(tmp_path, path_to_str):


def test_create_context_with_all_valid_args():
runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
config = (
SessionConfig()
.with_create_default_catalog_and_schema(True)
Expand Down

0 comments on commit 6011e65

Please sign in to comment.