From ff72d03c567b76939db8d7450fad2a1d20879b5b Mon Sep 17 00:00:00 2001 From: Beth Skurrie Date: Tue, 2 Apr 2024 11:57:59 +1100 Subject: [PATCH] feat: reduce contention when updating the contract_data_updated_at field for integrations (#671) --- lib/pact_broker/integrations/repository.rb | 39 ++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/lib/pact_broker/integrations/repository.rb b/lib/pact_broker/integrations/repository.rb index 7568eed66..9890171a6 100644 --- a/lib/pact_broker/integrations/repository.rb +++ b/lib/pact_broker/integrations/repository.rb @@ -50,18 +50,43 @@ def delete(consumer_id, provider_id) # @param [PactBroker::Domain::Pacticipant, nil] consumer the consumer for the integration, or nil if for a provider-only event (eg. Pactflow provider contract published) # @param [PactBroker::Domain::Pacticipant] provider the provider for the integration def set_contract_data_updated_at(consumer, provider) - Integration - .where({ consumer_id: consumer&.id, provider_id: provider.id }.compact ) - .update(contract_data_updated_at: Sequel.datetime_class.now) + set_contract_data_updated_at_for_multiple_integrations([OpenStruct.new(consumer: consumer, provider: provider)]) end - # Sets the contract_data_updated_at for the integrations as specified by an array of objects which each have a consumer and provider - # @param [Array] where each object has a consumer and a provider + # Sets the contract_data_updated_at for the integrations as specified by an array of objects which each have a consumer and provider. + # + # The contract_data_updated_at attribute is only ever used for ordering the list of integrations on the index page of the *Pact Broker* UI, + # so that the most recently updated integrations (the ones you're likely working on) are showed at the top of the first page. + # There is often contention around updating it however, which can cause deadlocks, and slow down API responses. + # Because it's not a critical field (eg. it won't change any can-i-deploy results), the easiest way to reduce this contention + # is to just not update it if the row is locked, because if it is locked, the value of contract_data_updated_at is already + # going to be a date from a few seconds ago, which is perfectly fine for the purposes for which we are using the value. + # + # Notes on SKIP LOCKED: + # SKIP LOCKED is only supported by Postgres. + # When executing SELECT ... FOR UPDATE SKIP LOCKED, the SELECT will run immediately, not waiting for any other transactions, + # and only return rows that are not already locked by another transaction. + # The FOR UPDATE is required to make it work this way - SKIP LOCKED on its own does not work. + # + # @param [Array] where each object MAY have a consumer and does have a provider (for Pactflow provider contract published there is no consumer) def set_contract_data_updated_at_for_multiple_integrations(objects_with_consumer_and_provider) - consumer_and_provider_ids = objects_with_consumer_and_provider.collect{ | object | [object.consumer.id, object.provider.id] }.uniq + consumer_and_provider_ids = objects_with_consumer_and_provider.collect{ | object | { consumer_id: object.consumer&.id, provider_id: object.provider.id }.compact }.uniq + + # MySQL doesn't support an UPDATE with a subquery. FFS. Really need to do a major version release and delete the support code. + criteria = if Integration.dataset.supports_skip_locked? + integration_ids_to_update = Integration + .select(:id) + .where(Sequel.|(*consumer_and_provider_ids)) + .for_update + .skip_locked + { id: integration_ids_to_update } + else + Sequel.|(*consumer_and_provider_ids) + end + Integration - .where([:consumer_id, :provider_id] => consumer_and_provider_ids) + .where(criteria) .update(contract_data_updated_at: Sequel.datetime_class.now) end end