Add mongodb dump hook

This commit is contained in:
Andrea Ghensi 2021-12-26 01:00:58 +01:00
parent a1673d1fa1
commit 6b7653484b
6 changed files with 527 additions and 5 deletions

View file

@ -773,6 +773,71 @@ properties:
mysqldump/mysql commands (from either MySQL or MariaDB). See
https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html or
https://mariadb.com/kb/en/library/mysqldump/ for details.
mongodb_databases:
type: array
items:
type: object
required: ['name']
additionalProperties: false
properties:
name:
type: string
description: |
Database name (required if using this hook). Or
"all" to dump all databases on the host. Note
that using this database hook implicitly enables
both read_special and one_file_system (see
above) to support dump and restore streaming.
example: users
hostname:
type: string
description: |
Database hostname to connect to. Defaults to
connecting to localhost.
example: database.example.org
port:
type: integer
description: Port to connect to. Defaults to 27017.
example: 27017
username:
type: string
description: |
Username with which to connect to the database.
Skip it if no authentication is needed.
example: dbuser
password:
type: string
description: |
Password with which to connect to the database.
Skip it if no authentication is needed.
example: trustsome1
format:
type: string
enum: ['archive', 'directory']
description: |
Database dump output format. One of "archive",
or "directory". Defaults to "archive" (unlike
raw pg_dump). See pg_dump documentation for
details. Note that format is ignored when the
database name is "all".
example: directory
options:
type: string
description: |
Additional mongodump options to pass
directly to the dump command, without performing
any validation on them. See mongodump
documentation for details.
example: --role=someone
description: |
List of one or more MongoDB databases to dump before
creating a backup, run once per configuration file. The
database dumps are added to your source directories at
runtime, backed up, and removed afterwards. Requires
mongodump/mongorestore commands. See
https://docs.mongodb.com/database-tools/mongodump/ and
https://docs.mongodb.com/database-tools/mongorestore/ for
details.
healthchecks:
type: string
description: |

View file

@ -1,6 +1,6 @@
import logging
from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql
from borgmatic.hooks import cronhub, cronitor, healthchecks, mongodb, mysql, pagerduty, postgresql
logger = logging.getLogger(__name__)
@ -11,6 +11,7 @@ HOOK_NAME_TO_MODULE = {
'pagerduty': pagerduty,
'postgresql_databases': postgresql,
'mysql_databases': mysql,
'mongodb_databases': mongodb,
}

View file

@ -6,7 +6,7 @@ from borgmatic.borg.create import DEFAULT_BORGMATIC_SOURCE_DIRECTORY
logger = logging.getLogger(__name__)
DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases')
DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases', 'mongodb_databases')
def make_database_dump_path(borgmatic_source_directory, database_hook_name):

158
borgmatic/hooks/mongodb.py Normal file
View file

@ -0,0 +1,158 @@
import logging
from borgmatic.execute import execute_command, execute_command_with_processes
from borgmatic.hooks import dump
logger = logging.getLogger(__name__)
def make_dump_path(location_config): # pragma: no cover
'''
Make the dump path from the given location configuration and the name of this hook.
'''
return dump.make_database_dump_path(
location_config.get('borgmatic_source_directory'), 'mongodb_databases'
)
def dump_databases(databases, log_prefix, location_config, dry_run):
'''
Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of
dicts, one dict describing each database as per the configuration schema. Use the given log
prefix in any log entries. Use the given location configuration dict to construct the
destination path.
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
logger.info('{}: Dumping MongoDB databases{}'.format(log_prefix, dry_run_label))
processes = []
for database in databases:
name = database['name']
dump_filename = dump.make_database_dump_filename(
make_dump_path(location_config), name, database.get('hostname')
)
dump_format = database.get('format', 'custom')
logger.debug(
'{}: Dumping MongoDB database {} to {}{}'.format(
log_prefix, name, dump_filename, dry_run_label
)
)
if dry_run:
continue
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
else:
dump.create_named_pipe_for_dump(dump_filename)
command = build_dump_command(database, dump_filename, dump_format)
processes.append(execute_command(command, shell=True, run_to_completion=False))
return processes
def build_dump_command(database, dump_filename, dump_format):
'''
Return the mongodump command from a single database configuration.
'''
all_databases = database['name'] == 'all'
command = ['mongodump', '--archive']
if dump_format == 'directory':
command.append(dump_filename)
if 'hostname' in database:
command.extend(('--host', database['hostname']))
if 'port' in database:
command.extend(('--port', str(database['port'])))
if 'username' in database:
command.extend(('--username', database['username']))
if 'password' in database:
command.extend(('--password', database['password']))
if not all_databases:
command.extend(('--db', database['name']))
if 'options' in database:
command.extend(database['options'].split(' '))
if dump_format != 'directory':
command.extend(('>', dump_filename))
return command
def remove_database_dumps(databases, log_prefix, location_config, dry_run): # pragma: no cover
'''
Remove all database dump files for this hook regardless of the given databases. Use the log
prefix in any log entries. Use the given location configuration dict to construct the
destination path. If this is a dry run, then don't actually remove anything.
'''
dump.remove_database_dumps(make_dump_path(location_config), 'MongoDB', log_prefix, dry_run)
def make_database_dump_pattern(
databases, log_prefix, location_config, name=None
): # pragma: no cover
'''
Given a sequence of configurations dicts, a prefix to log with, a location configuration dict,
and a database name to match, return the corresponding glob patterns to match the database dump
in an archive.
'''
return dump.make_database_dump_filename(make_dump_path(location_config), name, hostname='*')
def restore_database_dump(database_config, log_prefix, location_config, dry_run, extract_process):
'''
Restore the given MongoDB database from an extract stream. The database is supplied as a
one-element sequence containing a dict describing the database, as per the configuration schema.
Use the given log prefix in any log entries. If this is a dry run, then don't actually restore
anything. Trigger the given active extract process (an instance of subprocess.Popen) to produce
output to consume.
If the extract process is None, then restore the dump from the filesystem rather than from an
extract stream.
'''
dry_run_label = ' (dry run; not actually restoring anything)' if dry_run else ''
if len(database_config) != 1:
raise ValueError('The database configuration value is invalid')
database = database_config[0]
dump_filename = dump.make_database_dump_filename(
make_dump_path(location_config), database['name'], database.get('hostname')
)
restore_command = build_restore_command(extract_process, database, dump_filename)
logger.debug(
'{}: Restoring MongoDB database {}{}'.format(log_prefix, database['name'], dry_run_label)
)
if dry_run:
return
execute_command_with_processes(
restore_command,
[extract_process] if extract_process else [],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout if extract_process else None,
borg_local_path=location_config.get('local_path', 'borg'),
)
def build_restore_command(extract_process, database, dump_filename):
'''
Return the mongorestore command from a single database configuration.
'''
command = ['mongorestore', '--archive']
if not extract_process:
command.append(dump_filename)
if database['name'] != 'all':
command.extend(('--drop', '--db', database['name']))
if 'hostname' in database:
command.extend(('--host', database['hostname']))
if 'port' in database:
command.extend(('--port', str(database['port'])))
if 'username' in database:
command.extend(('--username', database['username']))
if 'password' in database:
command.extend(('--password', database['password']))
return command

View file

@ -47,13 +47,21 @@ hooks:
hostname: mysql
username: root
password: test
mongodb_databases:
- name: test
hostname: mongodb
username: root
password: test
- name: all
hostname: mongodb
username: root
password: test
'''.format(
config_path, repository_path, borgmatic_source_directory, postgresql_dump_format
)
config_file = open(config_path, 'w')
with open(config_path, 'w') as config_file:
config_file.write(config)
config_file.close()
def test_database_dump_and_restore():

View file

@ -0,0 +1,290 @@
import logging
import pytest
from flexmock import flexmock
from borgmatic.hooks import mongodb as module
def test_dump_databases_runs_mongodump_for_each_database():
databases = [{'name': 'foo'}, {'name': 'bar'}]
processes = [flexmock(), flexmock()]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
for name, process in zip(('foo', 'bar'), processes):
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == processes
def test_dump_databases_with_dry_run_skips_mongodump():
databases = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('execute_command').never()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=True) == []
def test_dump_databases_runs_mongodump_with_hostname_and_port():
databases = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/database.example.org/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--host',
'database.example.org',
'--port',
'5433',
'--db',
'foo',
'>',
'databases/database.example.org/foo',
],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_username_and_password():
databases = [{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--username',
'mongo',
'--password',
'trustsome1',
'--db',
'foo',
'>',
'databases/localhost/foo',
],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_parent_directory_for_dump')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_options():
databases = [{'name': 'foo', 'options': '--stuff=such'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodumpall_for_all_databases():
databases = [{'name': 'all'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/all'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '>', 'databases/localhost/all'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_restore_database_dump_runs_pg_restore():
database_config = [{'name': 'foo'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive', '--drop', '--db', 'foo'],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_errors_on_multiple_database_config():
database_config = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').never()
flexmock(module).should_receive('execute_command').never()
with pytest.raises(ValueError):
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=flexmock()
)
def test_restore_database_dump_runs_pg_restore_with_hostname_and_port():
database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
[
'mongorestore',
'--archive',
'--drop',
'--db',
'foo',
'--host',
'database.example.org',
'--port',
'5433',
],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_runs_pg_restore_with_username_and_password():
database_config = [{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
[
'mongorestore',
'--archive',
'--drop',
'--db',
'foo',
'--username',
'mongo',
'--password',
'trustsome1',
],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_runs_psql_for_all_database_dump():
database_config = [{'name': 'all'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive'],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_with_dry_run_skips_restore():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').never()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=True, extract_process=flexmock()
)
def test_restore_database_dump_without_extract_process_restores_from_disk():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'],
processes=[],
output_log_level=logging.DEBUG,
input_file=None,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=None
)