Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tt 23249 increment lock_timeout gradually to reduce failing migrations #140

Merged
merged 12 commits into from
Apr 24, 2024
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ SafePgMigrations.config.safe_timeout = 5.seconds # Statement timeout used for al

SafePgMigrations.config.lock_timeout = nil # Lock timeout used for all DDL operations except from CREATE / DROP INDEX. If not set, safe_timeout will be used with a deduction of 1% to ensure that the lock timeout is raised in priority

SafePgMigrations.config.increase_lock_timeout_on_retry # Activate the lock timeout increase feature on retry if set to true. See max_lock_timeout_for_retry for more information.

SafePgMigrations.config.max_lock_timeout_for_retry = 1.second # Max lock timeout for the retries for all DDL operations except from CREATE / DROP INDEX. Each retry will increase the lock_timeout (if increase_lock_timeout_on_retry option is set to true) by (max_lock_timeout_for_retry - lock_timeout) / max_tries

SafePgMigrations.config.blocking_activity_logger_verbose = true # Outputs the raw blocking queries on timeout. When false, outputs information about the lock instead

SafePgMigrations.config.sensitive_logger = nil # When given, sensitive data will be sent to this logger instead of the standard output. Must implement method `info`.
Expand Down
46 changes: 31 additions & 15 deletions lib/safe-pg-migrations/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,66 @@
module SafePgMigrations
class Configuration
attr_accessor(*%i[
backfill_batch_size
backfill_pause
blocking_activity_logger_margin
blocking_activity_logger_verbose
default_value_backfill_threshold
backfill_batch_size
backfill_pause
retry_delay
increase_lock_timeout_on_retry
max_tries
retry_delay
sensitive_logger
])
attr_reader :lock_timeout, :safe_timeout
attr_reader :lock_timeout, :safe_timeout, :max_lock_timeout_for_retry

def initialize
self.default_value_backfill_threshold = nil
self.safe_timeout = 5.seconds
self.lock_timeout = nil
self.blocking_activity_logger_margin = 1.second
self.blocking_activity_logger_verbose = true
self.backfill_batch_size = 100_000
self.backfill_pause = 0.5.second
self.retry_delay = 1.minute
self.blocking_activity_logger_margin = 1.second
self.blocking_activity_logger_verbose = true
self.default_value_backfill_threshold = nil
self.increase_lock_timeout_on_retry = false
self.lock_timeout = nil
self.max_lock_timeout_for_retry = 1.second
self.max_tries = 5
self.retry_delay = 1.minute
self.safe_timeout = 5.seconds
self.sensitive_logger = nil
end

def lock_timeout=(value)
raise 'Setting lock timeout to 0 disables the lock timeout and is dangerous' if value == 0.seconds

unless value.nil? || value < safe_timeout
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than safe timeout (#{safe_timeout})"
unless value.nil? || (value < safe_timeout && value <= max_lock_timeout_for_retry)
raise ArgumentError, "Lock timeout (#{value}) cannot be greater than the safe timeout (#{safe_timeout}) or the
max lock timeout for retry (#{max_lock_timeout_for_retry})"
end

@lock_timeout = value
end

def safe_timeout=(value)
raise 'Setting safe timeout to 0 disables the safe timeout and is dangerous' unless value
unless value && value > 0.seconds
frederic-martin-doctolib marked this conversation as resolved.
Show resolved Hide resolved
raise 'Setting safe timeout to 0 or nil disables the safe timeout and is dangerous'
end

unless lock_timeout.nil? || value > lock_timeout
raise ArgumentError, "Safe timeout (#{value}) cannot be less than lock timeout (#{lock_timeout})"
unless lock_timeout.nil? || (value > lock_timeout && value >= max_lock_timeout_for_retry)
raise ArgumentError, "Safe timeout (#{value}) cannot be lower than the lock timeout (#{lock_timeout}) or the
max lock timeout for retry (#{max_lock_timeout_for_retry})"
end

@safe_timeout = value
end

def max_lock_timeout_for_retry=(value)
unless lock_timeout.nil? || (value >= lock_timeout && value <= safe_timeout)
raise ArgumentError, "Max lock timeout for retry (#{value}) cannot be lower than the lock timeout
(#{lock_timeout}) and greater than the safe timeout (#{safe_timeout})"
end

@max_lock_timeout_for_retry = value
end

def pg_statement_timeout
pg_duration safe_timeout
end
Expand Down
34 changes: 30 additions & 4 deletions lib/safe-pg-migrations/plugins/statement_retrier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,46 @@ module StatementRetrier
private

def retry_if_lock_timeout
remaining_tries = SafePgMigrations.config.max_tries
initial_lock_timeout = SafePgMigrations.config.lock_timeout
number_of_retries = 0
begin
remaining_tries -= 1
number_of_retries += 1
yield
rescue ActiveRecord::LockWaitTimeout
raise if transaction_open? # Retrying is useless if we're inside a transaction.
raise unless remaining_tries > 0
# Retrying is useless if we're inside a transaction.
if transaction_open? || number_of_retries >= SafePgMigrations.config.max_tries
SafePgMigrations.config.lock_timeout = initial_lock_timeout
raise
end

retry_delay = SafePgMigrations.config.retry_delay
Helpers::Logger.say "Retrying in #{retry_delay} seconds...", sub_item: true

if SafePgMigrations.config.increase_lock_timeout_on_retry && !SafePgMigrations.config.lock_timeout.nil?
increase_lock_timeout
end

sleep retry_delay
Helpers::Logger.say 'Retrying now.', sub_item: true
retry
end
end

def increase_lock_timeout
Helpers::Logger.say " Increasing the lock timeout... Currently set to #{SafePgMigrations.config.lock_timeout}",
sub_item: true
SafePgMigrations.config.lock_timeout = (SafePgMigrations.config.lock_timeout + lock_timeout_step)
unless SafePgMigrations.config.lock_timeout < SafePgMigrations.config.max_lock_timeout_for_retry
SafePgMigrations.config.lock_timeout = SafePgMigrations.config.max_lock_timeout_for_retry
end
Helpers::Logger.say " Lock timeout is now set to #{SafePgMigrations.config.lock_timeout}", sub_item: true
end

def lock_timeout_step
max_lock_timeout_for_retry = SafePgMigrations.config.max_lock_timeout_for_retry
lock_timeout = SafePgMigrations.config.lock_timeout
max_tries = SafePgMigrations.config.max_tries
@lock_timeout_step ||= (max_lock_timeout_for_retry - lock_timeout) / (max_tries - 1)
end
end
end
84 changes: 67 additions & 17 deletions test/statement_retrier_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,53 @@
require 'test_helper'

class StatementRetrierTest < Minitest::Test
def test_lock_timeout_increase_on_retry
SafePgMigrations.config.lock_timeout = 0.1.seconds
SafePgMigrations.config.increase_lock_timeout_on_retry = true

calls = calls_for_lock_timeout_migration

assert_equal [
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.1',
' -> Lock timeout is now set to 0.325',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.325',
' -> Lock timeout is now set to 0.55',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.55',
' -> Lock timeout is now set to 0.775',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Increasing the lock timeout... Currently set to 0.775',
' -> Lock timeout is now set to 1',
' -> Retrying now.',
], calls[1..].map(&:first)
end

def test_no_lock_timeout_increase_on_retry_if_disabled
SafePgMigrations.config.lock_timeout = 0.1.seconds
SafePgMigrations.config.increase_lock_timeout_on_retry = false

calls = calls_for_lock_timeout_migration

assert_equal [
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
' -> Retrying in 60 seconds...',
' -> Retrying now.',
], calls[1..].map(&:first)
end

def test_retry_if_lock_timeout
@migration =
Class.new(ActiveRecord::Migration::Current) do
def up
connection.send(:retry_if_lock_timeout) do
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
end
end
end.new
calls = calls_for_lock_timeout_migration

@connection.expects(:sleep).times(4)
calls =
record_calls(@migration, :write) do
run_migration
flunk 'run_migration should raise'
rescue StandardError => e
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
assert_includes e.cause.message, 'canceling statement due to lock timeout'
end
assert_equal [
' -> Retrying in 60 seconds...',
' -> Retrying now.',
Expand All @@ -48,4 +76,26 @@ def test_statement_retry
' -> Retrying now.',
], calls[7..9]
end

private

def calls_for_lock_timeout_migration
@migration = Class.new(ActiveRecord::Migration::Current) do
def up
connection.send(:retry_if_lock_timeout) do
raise ActiveRecord::LockWaitTimeout, 'PG::LockNotAvailable: ERROR: canceling statement due to lock timeout'
end
end
end.new

@connection.expects(:sleep).times(4)

record_calls(@migration, :write) do
run_migration
flunk 'run_migration should raise'
rescue StandardError => e
assert_instance_of ActiveRecord::LockWaitTimeout, e.cause
assert_includes e.cause.message, 'canceling statement due to lock timeout'
end
end
end
Loading