Skip to content

Commit

Permalink
Merge pull request #3853 from SFDO-Tooling/W-17357226/zero_threshold_…
Browse files Browse the repository at this point in the history
…issue

@W-17357226: Fix for issue where zero threshold defaulted to select
  • Loading branch information
aditya-balachander authored Dec 6, 2024
2 parents 2a30113 + d67fc6b commit 5192132
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 4 deletions.
4 changes: 2 additions & 2 deletions cumulusci/tasks/bulkdata/select_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def annoy_post_process(
# Retrieve the corresponding record from the database
record = query_record_data[neighbor_index]
closest_record_id = record_to_id_map[tuple(record)]
if threshold and (neighbor_distances[idx] >= threshold):
if threshold is not None and (neighbor_distances[idx] >= threshold):
selected_records.append(None)
insertion_candidates.append(load_shaped_records[i])
else:
Expand Down Expand Up @@ -445,7 +445,7 @@ def levenshtein_post_process(
select_record, target_records, similarity_weights
)

if distance_threshold and match_distance > distance_threshold:
if distance_threshold is not None and match_distance > distance_threshold:
# Append load record for insertion if distance exceeds threshold
insertion_candidates.append(load_record)
selected_records.append(None)
Expand Down
189 changes: 187 additions & 2 deletions cumulusci/tasks/bulkdata/tests/test_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,9 @@ def test_process_insert_records_failure(self, download_mock):
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records(self, download_mock):
def test_select_records_similarity_strategy__insert_records__non_zero_threshold(
self, download_mock
):
# Set up mock context and BulkApiDmlOperation
context = mock.Mock()
# Add step with threshold
Expand Down Expand Up @@ -1325,6 +1327,102 @@ def test_select_records_similarity_strategy__insert_records(self, download_mock)
== 1
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records__zero_threshold(
self, download_mock
):
# Set up mock context and BulkApiDmlOperation
context = mock.Mock()
# Add step with threshold
step = BulkApiDmlOperation(
sobject="Contact",
operation=DataOperationType.QUERY,
api_options={"batch_size": 10, "update_key": "LastName"},
context=context,
fields=["Name", "Email"],
selection_strategy=SelectStrategy.SIMILARITY,
threshold=0,
)

# Mock Bulk API responses
step.bulk.endpoint = "https://test"
step.bulk.create_query_job.return_value = "JOB"
step.bulk.query.return_value = "BATCH"
step.bulk.get_query_batch_result_ids.return_value = ["RESULT"]

# Mock the downloaded CSV content with a single record
select_results = io.StringIO(
"""[{"Id":"003000000000001", "Name":"Jawad", "Email":"[email protected]"}]"""
)
insert_results = io.StringIO(
"Id,Success,Created\n003000000000002,true,true\n003000000000003,true,true\n"
)
download_mock.side_effect = [select_results, insert_results]

# Mock the _wait_for_job method to simulate a successful job
step._wait_for_job = mock.Mock()
step._wait_for_job.return_value = DataOperationJobResult(
DataOperationStatus.SUCCESS, [], 0, 0
)

# Prepare input records
records = iter(
[
["Jawad", "[email protected]"],
["Aditya", "[email protected]"],
["Tom", "[email protected]"],
]
)

# Mock sub-operation for BulkApiDmlOperation
insert_step = mock.Mock(spec=BulkApiDmlOperation)
insert_step.start = mock.Mock()
insert_step.load_records = mock.Mock()
insert_step.end = mock.Mock()
insert_step.batch_ids = ["BATCH1"]
insert_step.bulk = mock.Mock()
insert_step.bulk.endpoint = "https://test"
insert_step.job_id = "JOB"

with mock.patch(
"cumulusci.tasks.bulkdata.step.BulkApiDmlOperation",
return_value=insert_step,
):
# Execute the select_records operation
step.start()
step.select_records(records)
step.end()

# Get the results and assert their properties
results = list(step.get_results())

assert len(results) == 3 # Expect 3 results (matching the input records count)
# Assert that all results have the expected ID, success, and created values
assert (
results.count(
DataOperationResult(
id="003000000000001", success=True, error="", created=False
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000002", success=True, error="", created=True
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000003", success=True, error="", created=True
)
)
== 1
)

@mock.patch("cumulusci.tasks.bulkdata.step.download_file")
def test_select_records_similarity_strategy__insert_records__no_select_records(
self, download_mock
Expand Down Expand Up @@ -2807,7 +2905,9 @@ def test_process_insert_records_failure(self):
mock_rest_api_dml_operation.end.assert_not_called()

@responses.activate
def test_select_records_similarity_strategy__insert_records(self):
def test_select_records_similarity_strategy__insert_records__non_zero_threshold(
self,
):
mock_describe_calls()
task = _make_task(
LoadData,
Expand Down Expand Up @@ -2891,6 +2991,91 @@ def test_select_records_similarity_strategy__insert_records(self):
== 1
)

@responses.activate
def test_select_records_similarity_strategy__insert_records__zero_threshold(self):
mock_describe_calls()
task = _make_task(
LoadData,
{
"options": {
"database_url": "sqlite:///test.db",
"mapping": "mapping.yml",
}
},
)
task.project_config.project__package__api_version = CURRENT_SF_API_VERSION
task._init_task()

# Create step with threshold
step = RestApiDmlOperation(
sobject="Contact",
operation=DataOperationType.UPSERT,
api_options={"batch_size": 10},
context=task,
fields=["Name", "Email"],
selection_strategy=SelectStrategy.SIMILARITY,
threshold=0,
)

results_select_call = {
"records": [
{
"Id": "003000000000001",
"Name": "Jawad",
"Email": "[email protected]",
},
],
"done": True,
}

results_insert_call = [
{"id": "003000000000002", "success": True, "created": True},
{"id": "003000000000003", "success": True, "created": True},
]

step.sf.restful = mock.Mock(
side_effect=[results_select_call, results_insert_call]
)
records = iter(
[
["Jawad", "[email protected]"],
["Aditya", "[email protected]"],
["Tom Cruise", "[email protected]"],
]
)
step.start()
step.select_records(records)
step.end()

# Get the results and assert their properties
results = list(step.get_results())
assert len(results) == 3 # Expect 3 results (matching the input records count)
# Assert that all results have the expected ID, success, and created values
assert (
results.count(
DataOperationResult(
id="003000000000001", success=True, error="", created=False
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000002", success=True, error="", created=True
)
)
== 1
)
assert (
results.count(
DataOperationResult(
id="003000000000003", success=True, error="", created=True
)
)
== 1
)

@responses.activate
def test_insert_dml_operation__boolean_conversion(self):
mock_describe_calls()
Expand Down

0 comments on commit 5192132

Please sign in to comment.