Skip to content

Commit

Permalink
Revert "Add a batch_mode flag to VCF scalable queries (#683)"
Browse files Browse the repository at this point in the history
This reverts commit 9dd8695.
  • Loading branch information
sgillies committed Dec 4, 2024
1 parent e73ba66 commit db14949
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 29 deletions.
20 changes: 2 additions & 18 deletions src/tiledb/cloud/vcf/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ def build_read_dag(
namespace: Optional[str] = None,
resource_class: Optional[str] = None,
verbose: bool = False,
batch_mode: bool = False,
) -> Tuple[tiledb.cloud.dag.DAG, tiledb.cloud.dag.Node]:
"""
Build the DAG for a distributed read on a TileDB-VCF dataset.
Expand All @@ -290,7 +289,6 @@ def build_read_dag(
:param namespace: TileDB-Cloud namespace, defaults to None
:param resource_class: TileDB-Cloud resource class for UDFs, defaults to None
:param verbose: verbose logging, defaults to False
:param batch_mode: run the query with batch UDFs, defaults to False
:return: DAG and result Node
"""

Expand Down Expand Up @@ -340,13 +338,10 @@ def build_read_dag(
logger.debug("num_sample_partitions=%d", num_sample_partitions)
logger.debug("num_region_partitions=%d", num_region_partitions)

mode = tiledb.cloud.dag.Mode.BATCH if batch_mode else tiledb.cloud.dag.Mode.REALTIME

dag = tiledb.cloud.dag.DAG(
namespace=namespace,
name="VCF-Distributed-Query",
max_workers=max_workers,
mode=mode,
)

# If `regions` is a Delayed object, we set the parent nodes to `dag` so the
Expand All @@ -356,12 +351,6 @@ def build_read_dag(
):
regions._DelayedBase__set_all_parent_nodes_same_dag(dag)

result_format = (
tiledb.cloud.UDFResultType.NATIVE
if batch_mode
else tiledb.cloud.UDFResultType.ARROW
)

tables = []
for region in range(num_region_partitions):
for sample in range(num_sample_partitions):
Expand All @@ -384,14 +373,12 @@ def build_read_dag(
name=f"VCF Query - Region {region+1}/{num_region_partitions},"
f" Sample {sample+1}/{num_sample_partitions}",
resource_class=resource_class,
result_format=result_format,
result_format=tiledb.cloud.UDFResultType.ARROW,
)
)

if len(tables) > 1:
submit = dag.submit if batch_mode else dag.submit_local

table = submit(
table = dag.submit_local(
concat_tables_udf,
tables,
config=config,
Expand Down Expand Up @@ -442,7 +429,6 @@ def read(
namespace: Optional[str] = None,
resource_class: Optional[str] = None,
verbose: bool = False,
batch_mode: bool = False,
) -> pa.Table:
"""
Run a distributed read on a TileDB-VCF dataset.
Expand All @@ -465,7 +451,6 @@ def read(
:param namespace: TileDB-Cloud namespace, defaults to None
:param resource_class: TileDB-Cloud resource class for UDFs, defaults to None
:param verbose: verbose logging, defaults to False
:param batch_mode: run the query with batch UDFs, defaults to False
:return: Arrow table containing the query results
"""

Expand All @@ -486,7 +471,6 @@ def read(
namespace=namespace,
resource_class=resource_class,
verbose=verbose,
batch_mode=batch_mode,
)

run_dag(dag, debug=verbose)
Expand Down
12 changes: 1 addition & 11 deletions tests/vcf/test_vcf.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,17 @@
import cloudpickle
import numpy as np
import pytest

import tiledb.cloud.vcf as vcf
import tiledb.cloud.vcf.vcf_toolbox as vtb

# Pickle the vcf module by value, so tests run on the latest code.
cloudpickle.register_pickle_by_value(vcf)


# Run VCF tests with:
# pytest -m vcf --run-vcf -n 8


@vtb.df_transform
def filter_vcf(df, *, filter=None):
return df.query(filter)


@pytest.mark.vcf
@pytest.mark.parametrize("batch_mode", [False, True])
def test_vcf_transform(batch_mode):
def test_vcf_transform():
vcf_uri = "tiledb://TileDB-Inc/vcf-1kg-dragen-v376"

regions = [
Expand Down Expand Up @@ -48,7 +39,6 @@ def test_vcf_transform(batch_mode):
regions=regions,
samples="NA12878",
transform_result=filter_vcf(filter=filter),
batch_mode=batch_mode,
)

assert vcf_table.num_rows == 336
Expand Down

0 comments on commit db14949

Please sign in to comment.