diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py index 29f986db05..d9310a3884 100644 --- a/dlt/extract/incremental/transform.py +++ b/dlt/extract/incremental/transform.py @@ -117,6 +117,8 @@ def __call__( def deduplication_disabled(self) -> bool: """Skip deduplication when length of the key is 0 or if lag is applied.""" # disable deduplication if end value is set - state is not saved + if self.range_start == "open": + return True if self.end_value is not None: return True # disable deduplication if lag is applied - destination must deduplicate ranges @@ -232,6 +234,7 @@ def __call__( # new_value is "less" or equal to last_value (the actual max) if last_value == new_value: if self.range_start == "open": + # We only want greater than last_value return None, False, False # use func to compute row_value into last_value compatible processed_row_value = last_value_func((row_value,)) diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py index 73abd84447..5c96098343 100644 --- a/tests/extract/test_incremental.py +++ b/tests/extract/test_incremental.py @@ -3896,6 +3896,32 @@ def some_data( assert items == expected_items +@pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS) +def test_start_range_open_no_deduplication(item_type: TestDataItemFormat) -> None: + @dlt.source + def dummy(): + @dlt.resource + def some_data( + updated_at: dlt.sources.incremental[int] = dlt.sources.incremental( + "updated_at", + range_start="open", + ) + ): + yield [{"updated_at": i} for i in range(3)] + + yield some_data + + pipeline = dlt.pipeline(pipeline_name=uniq_id()) + pipeline.extract(dummy()) + + state = pipeline.state["sources"]["dummy"]["resources"]["some_data"]["incremental"][ + "updated_at" + ] + + # No unique values should be computed + assert state["unique_hashes"] == [] + + @pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS) @pytest.mark.parametrize("last_value_func", [min, max]) def test_end_range_closed(item_type: TestDataItemFormat, last_value_func: Any) -> None: