Skip to content

Commit

Permalink
avoid bug with checkpointing by switching to parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinL committed Nov 26, 2024
1 parent 9161712 commit 5568553
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion splink/internals/connected_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def solve_connected_components(
SELECT representative FROM __splink__representatives_stable_{iteration}
)
"""
pipeline.enqueue_sql(sql, "__splink__representatives_unstable")
pipeline.enqueue_sql(sql, f"__splink__representatives_unstable_{iteration}")
prev_representatives_thinned = db_api.sql_pipeline_to_splink_dataframe(pipeline)

# 1a. Thin neighbours table - we can drop all rows that refer to
Expand Down
4 changes: 2 additions & 2 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def DatabaseAPI(self):
def db_api_args(self):
return {
"spark_session": self.spark,
"num_partitions_on_repartition": 1,
"break_lineage_method": "checkpoint",
"num_partitions_on_repartition": 2,
"break_lineage_method": "parquet",
}

def convert_frame(self, df):
Expand Down

0 comments on commit 5568553

Please sign in to comment.