From 6e7189a104ae54d1c2716a550be7abff412f1163 Mon Sep 17 00:00:00 2001 From: "Michael F." Date: Tue, 2 Jul 2024 17:20:33 +0200 Subject: [PATCH] limit compaction concurrency --- Makefile | 4 ++-- src/graphsenselib/ingest/delta/sink.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index f02cd79..88b7519 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ SHELL := /bin/bash PROJECT := graphsense-lib VENV := venv -RELEASE := 'v24.07.2' -RELEASESEM := 'v2.3.2' +RELEASE := 'v24.07.3' +RELEASESEM := 'v2.3.3' all: format lint test build diff --git a/src/graphsenselib/ingest/delta/sink.py b/src/graphsenselib/ingest/delta/sink.py index cd3bf17..08e705c 100644 --- a/src/graphsenselib/ingest/delta/sink.py +++ b/src/graphsenselib/ingest/delta/sink.py @@ -48,7 +48,9 @@ def optimize_table(table_path, storage_options=None, mode="both"): if mode in ["both", "compact"]: logger.debug("Compact table...") # some sources say 1GB, default in the lib is 256MB, we take 512MB - table.optimize.compact(target_size=512 * MB) + # we strive for a manageable amount of Memory consumption, so we limit + # the concurrency + table.optimize.compact(target_size=512 * MB, max_concurrent_tasks=20) if mode in ["both", "vacuum"]: logger.debug("Vacuum table...") table.vacuum(retention_hours=0, enforce_retention_duration=False, dry_run=False)