Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP:MySQL Directory Format #84

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 69 additions & 32 deletions borgmatic/hooks/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,39 @@ def database_names_to_dump(database, extra_environment, log_prefix, dry_run):
)


def ensure_directory_exists(path):
if not os.path.exists(path):
os.makedirs(path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can actually replace this whole function with a call to:

os.makedirs(path, exist_ok=True)

And that should have the same effect! More info in the docs: https://docs.python.org/3/library/os.html#os.makedirs



def execute_dump_command(
database, log_prefix, dump_path, database_names, extra_environment, dry_run, dry_run_label
):
'''
Kick off a dump for the given MySQL/MariaDB database (provided as a configuration dict) to a
named pipe constructed from the given dump path and database name. Use the given log prefix in
any log entries.
directory if --tab is used, or to a file if not. Use the given log prefix in any log entries.

Return a subprocess.Popen instance for the dump process ready to spew to a named pipe. But if
this is a dry run, then don't actually dump anything and return None.
Return a subprocess.Popen instance for the dump process ready to spew to a named pipe or directory.
'''
database_name = database['name']
dump_filename = dump.make_data_source_dump_filename(
dump_path, database['name'], database.get('hostname')
)

if os.path.exists(dump_filename):
logger.warning(
f'{log_prefix}: Skipping duplicate dump of MySQL database "{database_name}" to {dump_filename}'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this duplicate-skipping logic also apply to directory-style dumps? They don't have a single filename, but they do have a dump dir that may or may not already exist. (Do not feel strongly.)

use_tab = '--tab' in (database.get('options') or '')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect this to look at the database's configured format instead. e.g. database.get('format') == 'directory' or whatever.

Maybe I'm not understanding the schema.


if use_tab:
ensure_directory_exists(dump_path)
dump_dir = os.path.join(dump_path, database_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason that dump.make_data_source_dump_filename() doesn't work for this purpose? That would have the benefit of using the standard directory layout that other dump formats use (~/.borgmatic/mysql_databases/[hostname]/[dbname]).

ensure_directory_exists(dump_dir)
result_file_option = ('--tab', dump_dir)
else:
dump_filename = dump.make_data_source_dump_filename(
dump_path, database['name'], database.get('hostname')
)
return None
if os.path.exists(dump_filename):
logger.warning(
f'{log_prefix}: Skipping duplicate dump of MySQL database "{database_name}" to {dump_filename}'
)
return None
result_file_option = ('--result-file', dump_filename)

mysql_dump_command = tuple(
shlex.quote(part) for part in shlex.split(database.get('mysql_dump_command') or 'mysqldump')
Expand All @@ -96,16 +108,17 @@ def execute_dump_command(
+ (('--user', database['username']) if 'username' in database else ())
+ ('--databases',)
+ database_names
+ ('--result-file', dump_filename)
+ result_file_option
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense!

)

logger.debug(
f'{log_prefix}: Dumping MySQL database "{database_name}" to {dump_filename}{dry_run_label}'
f'{log_prefix}: Dumping MySQL database "{database_name}" to {dump_path if use_tab else dump_filename}{dry_run_label}'
)
if dry_run:
return None

dump.create_named_pipe_for_dump(dump_filename)
if not use_tab:
dump.create_named_pipe_for_dump(result_file_option[1])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you also need to call execute_command() differently and return differently based on whether use_tab is True? My understanding is that standard file-based dumps stream to a pipe, while directory-based tab dumps ... don't.


return execute_command(
dump_command,
Expand All @@ -116,20 +129,19 @@ def execute_dump_command(

def use_streaming(databases, config, log_prefix):
'''
Given a sequence of MySQL database configuration dicts, a configuration dict (ignored), and a
log prefix (ignored), return whether streaming will be using during dumps.
Given a sequence of MySQL database configuration dicts, return whether streaming will be used during dumps.
Streaming is not used when using --tab.
'''
return any(databases)
return not any('--tab' in (db.get('options') or '') for db in databases)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar question here as above. Shouldn't this use format instead of options? And again, let me know if I'm just missing something.



def dump_data_sources(databases, config, log_prefix, dry_run):
'''
Dump the given MySQL/MariaDB databases to a named pipe. The databases are supplied as a sequence
of dicts, one dict describing each database as per the configuration schema. Use the given
configuration dict to construct the destination path and the given log prefix in any log entries.
Dump the given MySQL/MariaDB databases to a named pipe or directory based on configuration.
The databases are supplied as a sequence of dicts, one dict describing each database as per the configuration schema.
Use the given configuration dict to construct the destination path and the given log prefix in any log entries.

Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Return a sequence of subprocess.Popen instances for the dump processes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't expect the directory format dumps to return any streaming Popen instances since presumably they won't stream! So that logic might need to change in this function for that case.

The existing PostgreSQL hook does something similar for the directory format.

'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
processes = []
Expand Down Expand Up @@ -225,9 +237,11 @@ def restore_data_source_dump(
mysql_restore_command = tuple(
shlex.quote(part) for part in shlex.split(data_source.get('mysql_command') or 'mysql')
)

is_directory_format = data_source.get('format') == 'directory'

restore_command = (
mysql_restore_command
+ ('--batch',)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this still needed/beneficial? We certainly don't want interactive behavior from mysql when it's invoked from borgmatic.

+ (
tuple(data_source['restore_options'].split(' '))
if 'restore_options' in data_source
Expand All @@ -238,18 +252,41 @@ def restore_data_source_dump(
+ (('--protocol', 'tcp') if hostname or port else ())
+ (('--user', username) if username else ())
)

extra_environment = {'MYSQL_PWD': password} if password else None

logger.debug(f"{log_prefix}: Restoring MySQL database {data_source['name']}{dry_run_label}")

if dry_run:
return

# Don't give Borg local path so as to error on warnings, as "borg extract" only gives a warning
# if the restore paths don't exist in the archive.
execute_command_with_processes(
restore_command,
[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
extra_environment=extra_environment,
)
if is_directory_format:
dump_path = make_dump_path(config)
dump_directory = dump.make_data_source_dump_filename(
dump_path, data_source['name'], data_source.get('hostname')
)

if not os.path.exists(dump_directory):
logger.warning(f"{log_prefix}: Dump directory {dump_directory} does not exist.")
return

for filename in os.listdir(dump_directory):
if filename.endswith('.sql'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, I might just be reading this wrong, but shouldn't these be .txt files, not .sql files? https://dev.mysql.com/doc/refman/8.4/en/mysqldump.html#option_mysqldump_tab

file_path = os.path.join(dump_directory, filename)
logger.debug(f"{log_prefix}: Restoring from {file_path}")
with open(file_path, 'r') as sql_file:
execute_command_with_processes(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you want the _with_processes() variant for this use case. That's intended for passing in, say, an extract_process so as to extract a single file into a restore command via background streaming. But that's not what you're doing here; this is presumably a synchronous call (... it's not streaming from borg extract). So, for that, maybe try just execute_command()?

restore_command,
[],
output_log_level=logging.DEBUG,
input_file=sql_file,
extra_environment=extra_environment,
)
else:
execute_command_with_processes(
restore_command,
[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
extra_environment=extra_environment,
)
42 changes: 42 additions & 0 deletions tests/unit/hooks/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,48 @@ def test_use_streaming_false_for_no_databases():
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())


def test_use_streaming_true_for_any_non_directory_format_databases():
assert module.use_streaming(
databases=[{'format': 'stuff'}, {'format': 'directory'}, {}],
config=flexmock(),
log_prefix=flexmock(),
)


def test_use_streaming_true_for_all_directory_format_databases():
assert module.use_streaming(
databases=[{'format': 'directory'}, {'format': 'directory'}],
config=flexmock(),
log_prefix=flexmock(),
)


def test_dump_data_sources_runs_mysql_dump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_data_source_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.os.path).should_receive('exists').and_return(False)
flexmock(module.dump).should_receive('create_parent_directory_for_dump')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()

flexmock(module).should_receive('execute_command').with_args(
(
'mysqldump',
'--add-drop-database',
'--databases',
'foo',
'--tab',
'databases/localhost/foo',
),
extra_environment=None,
).and_return(flexmock()).once()

assert module.dump_data_sources(databases, {}, 'test.yaml', dry_run=False) == []


def test_dump_data_sources_dumps_each_database():
databases = [{'name': 'foo'}, {'name': 'bar'}]
processes = [flexmock(), flexmock()]
Expand Down