diff --git a/Makefile b/Makefile index f02cd79..88b7519 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ SHELL := /bin/bash PROJECT := graphsense-lib VENV := venv -RELEASE := 'v24.07.2' -RELEASESEM := 'v2.3.2' +RELEASE := 'v24.07.3' +RELEASESEM := 'v2.3.3' all: format lint test build diff --git a/src/graphsenselib/ingest/delta/sink.py b/src/graphsenselib/ingest/delta/sink.py index cd3bf17..08e705c 100644 --- a/src/graphsenselib/ingest/delta/sink.py +++ b/src/graphsenselib/ingest/delta/sink.py @@ -48,7 +48,9 @@ def optimize_table(table_path, storage_options=None, mode="both"): if mode in ["both", "compact"]: logger.debug("Compact table...") # some sources say 1GB, default in the lib is 256MB, we take 512MB - table.optimize.compact(target_size=512 * MB) + # we strive for a manageable amount of Memory consumption, so we limit + # the concurrency + table.optimize.compact(target_size=512 * MB, max_concurrent_tasks=20) if mode in ["both", "vacuum"]: logger.debug("Vacuum table...") table.vacuum(retention_hours=0, enforce_retention_duration=False, dry_run=False)