From f5a448c7c2d8392ceefe053c87a8d029a13ee9f2 Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Mon, 20 Feb 2023 15:18:51 -0800 Subject: [PATCH] Fix for potential data loss (data not getting backed up) when dumping large "directory" format PostgreSQL/MongoDB databases (#643). --- NEWS | 9 +++++++++ borgmatic/hooks/mongodb.py | 21 ++++++++++++--------- borgmatic/hooks/postgresql.py | 18 ++++++++++-------- setup.py | 2 +- tests/end-to-end/test_database.py | 3 +++ tests/unit/hooks/test_mongodb.py | 21 +++++++++------------ tests/unit/hooks/test_postgresql.py | 6 ++---- 7 files changed, 46 insertions(+), 34 deletions(-) diff --git a/NEWS b/NEWS index 9dca334..0b1e303 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,12 @@ +1.7.7 + * #643: Fix for potential data loss (data not getting backed up) when dumping large "directory" + format PostgreSQL/MongoDB databases. Prior to the fix, these dumps would not finish writing to + disk before Borg consumed them. Now, the dumping process completes before Borg starts. This only + applies to "directory" format databases; other formats still stream to Borg without using + temporary disk space. + * Fix MongoDB "directory" format to work with mongodump/mongorestore without error. Prior to this + fix, only the "archive" format worked. + 1.7.6 * #393, #438, #560: Optionally dump "all" PostgreSQL/MySQL databases to separate files instead of one combined dump file, allowing more convenient restores of individual databases. You can enable diff --git a/borgmatic/hooks/mongodb.py b/borgmatic/hooks/mongodb.py index cb1ec94..8c3cab7 100644 --- a/borgmatic/hooks/mongodb.py +++ b/borgmatic/hooks/mongodb.py @@ -45,13 +45,14 @@ def dump_databases(databases, log_prefix, location_config, dry_run): if dry_run: continue + command = build_dump_command(database, dump_filename, dump_format) + if dump_format == 'directory': dump.create_parent_directory_for_dump(dump_filename) + execute_command(command, shell=True) else: dump.create_named_pipe_for_dump(dump_filename) - - command = build_dump_command(database, dump_filename, dump_format) - processes.append(execute_command(command, shell=True, run_to_completion=False)) + processes.append(execute_command(command, shell=True, run_to_completion=False)) return processes @@ -61,9 +62,9 @@ def build_dump_command(database, dump_filename, dump_format): Return the mongodump command from a single database configuration. ''' all_databases = database['name'] == 'all' - command = ['mongodump', '--archive'] + command = ['mongodump'] if dump_format == 'directory': - command.append(dump_filename) + command.extend(('--out', dump_filename)) if 'hostname' in database: command.extend(('--host', database['hostname'])) if 'port' in database: @@ -79,7 +80,7 @@ def build_dump_command(database, dump_filename, dump_format): if 'options' in database: command.extend(database['options'].split(' ')) if dump_format != 'directory': - command.extend(('>', dump_filename)) + command.extend(('--archive', '>', dump_filename)) return command @@ -145,9 +146,11 @@ def build_restore_command(extract_process, database, dump_filename): ''' Return the mongorestore command from a single database configuration. ''' - command = ['mongorestore', '--archive'] - if not extract_process: - command.append(dump_filename) + command = ['mongorestore'] + if extract_process: + command.append('--archive') + else: + command.extend(('--dir', dump_filename)) if database['name'] != 'all': command.extend(('--drop', '--db', database['name'])) if 'hostname' in database: diff --git a/borgmatic/hooks/postgresql.py b/borgmatic/hooks/postgresql.py index d8ed919..98c1108 100644 --- a/borgmatic/hooks/postgresql.py +++ b/borgmatic/hooks/postgresql.py @@ -141,17 +141,19 @@ def dump_databases(databases, log_prefix, location_config, dry_run): if dump_format == 'directory': dump.create_parent_directory_for_dump(dump_filename) + execute_command( + command, shell=True, extra_environment=extra_environment, + ) else: dump.create_named_pipe_for_dump(dump_filename) - - processes.append( - execute_command( - command, - shell=True, - extra_environment=extra_environment, - run_to_completion=False, + processes.append( + execute_command( + command, + shell=True, + extra_environment=extra_environment, + run_to_completion=False, + ) ) - ) return processes diff --git a/setup.py b/setup.py index 0140d77..c322219 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import find_packages, setup -VERSION = '1.7.6' +VERSION = '1.7.7' setup( diff --git a/tests/end-to-end/test_database.py b/tests/end-to-end/test_database.py index a286af8..f981b40 100644 --- a/tests/end-to-end/test_database.py +++ b/tests/end-to-end/test_database.py @@ -14,6 +14,7 @@ def write_configuration( repository_path, borgmatic_source_directory, postgresql_dump_format='custom', + mongodb_dump_format='archive', ): ''' Write out borgmatic configuration into a file at the config path. Set the options so as to work @@ -67,6 +68,7 @@ hooks: username: root password: test authentication_database: admin + format: {mongodb_dump_format} - name: all hostname: mongodb username: root @@ -136,6 +138,7 @@ def test_database_dump_and_restore_with_directory_format(): repository_path, borgmatic_source_directory, postgresql_dump_format='directory', + mongodb_dump_format='directory', ) subprocess.check_call( diff --git a/tests/unit/hooks/test_mongodb.py b/tests/unit/hooks/test_mongodb.py index 6e1cb3d..9544a09 100644 --- a/tests/unit/hooks/test_mongodb.py +++ b/tests/unit/hooks/test_mongodb.py @@ -17,7 +17,7 @@ def test_dump_databases_runs_mongodump_for_each_database(): for name, process in zip(('foo', 'bar'), processes): flexmock(module).should_receive('execute_command').with_args( - ['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)], + ['mongodump', '--db', name, '--archive', '>', 'databases/localhost/{}'.format(name)], shell=True, run_to_completion=False, ).and_return(process).once() @@ -49,13 +49,13 @@ def test_dump_databases_runs_mongodump_with_hostname_and_port(): flexmock(module).should_receive('execute_command').with_args( [ 'mongodump', - '--archive', '--host', 'database.example.org', '--port', '5433', '--db', 'foo', + '--archive', '>', 'databases/database.example.org/foo', ], @@ -85,7 +85,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password(): flexmock(module).should_receive('execute_command').with_args( [ 'mongodump', - '--archive', '--username', 'mongo', '--password', @@ -94,6 +93,7 @@ def test_dump_databases_runs_mongodump_with_username_and_password(): 'admin', '--db', 'foo', + '--archive', '>', 'databases/localhost/foo', ], @@ -106,7 +106,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password(): def test_dump_databases_runs_mongodump_with_directory_format(): databases = [{'name': 'foo', 'format': 'directory'}] - process = flexmock() flexmock(module).should_receive('make_dump_path').and_return('') flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' @@ -115,12 +114,10 @@ def test_dump_databases_runs_mongodump_with_directory_format(): flexmock(module.dump).should_receive('create_named_pipe_for_dump').never() flexmock(module).should_receive('execute_command').with_args( - ['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'], - shell=True, - run_to_completion=False, - ).and_return(process).once() + ['mongodump', '--out', 'databases/localhost/foo', '--db', 'foo'], shell=True, + ).and_return(flexmock()).once() - assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [] def test_dump_databases_runs_mongodump_with_options(): @@ -133,7 +130,7 @@ def test_dump_databases_runs_mongodump_with_options(): flexmock(module.dump).should_receive('create_named_pipe_for_dump') flexmock(module).should_receive('execute_command').with_args( - ['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'], + ['mongodump', '--db', 'foo', '--stuff=such', '--archive', '>', 'databases/localhost/foo'], shell=True, run_to_completion=False, ).and_return(process).once() @@ -305,12 +302,12 @@ def test_restore_database_dump_with_dry_run_skips_restore(): def test_restore_database_dump_without_extract_process_restores_from_disk(): - database_config = [{'name': 'foo'}] + database_config = [{'name': 'foo', 'format': 'directory'}] flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path') flexmock(module).should_receive('execute_command_with_processes').with_args( - ['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'], + ['mongorestore', '--dir', '/dump/path', '--drop', '--db', 'foo'], processes=[], output_log_level=logging.DEBUG, input_file=None, diff --git a/tests/unit/hooks/test_postgresql.py b/tests/unit/hooks/test_postgresql.py index ce7a025..628e8c0 100644 --- a/tests/unit/hooks/test_postgresql.py +++ b/tests/unit/hooks/test_postgresql.py @@ -270,7 +270,6 @@ def test_make_extra_environment_maps_options_to_environment(): def test_dump_databases_runs_pg_dump_with_directory_format(): databases = [{'name': 'foo', 'format': 'directory'}] - process = flexmock() flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) @@ -295,10 +294,9 @@ def test_dump_databases_runs_pg_dump_with_directory_format(): ), shell=True, extra_environment={'PGSSLMODE': 'disable'}, - run_to_completion=False, - ).and_return(process).once() + ).and_return(flexmock()).once() - assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [] def test_dump_databases_runs_pg_dump_with_options():