Add MongoDB database hook (#288).

Reviewed-on: https://projects.torsion.org/borgmatic-collective/borgmatic/pulls/483
This commit is contained in:
Dan Helfman 2022-01-04 23:50:25 +00:00
commit 07d7ae60d5
9 changed files with 567 additions and 16 deletions

View file

@ -12,6 +12,11 @@ services:
environment:
MYSQL_ROOT_PASSWORD: test
MYSQL_DATABASE: test
- name: mongodb
image: mongo:5.0.5
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: test
clone:
skip_verify: true

View file

@ -773,6 +773,81 @@ properties:
mysqldump/mysql commands (from either MySQL or MariaDB). See
https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html or
https://mariadb.com/kb/en/library/mysqldump/ for details.
mongodb_databases:
type: array
items:
type: object
required: ['name']
additionalProperties: false
properties:
name:
type: string
description: |
Database name (required if using this hook). Or
"all" to dump all databases on the host. Note
that using this database hook implicitly enables
both read_special and one_file_system (see
above) to support dump and restore streaming.
example: users
hostname:
type: string
description: |
Database hostname to connect to. Defaults to
connecting to localhost.
example: database.example.org
port:
type: integer
description: Port to connect to. Defaults to 27017.
example: 27017
username:
type: string
description: |
Username with which to connect to the database.
Skip it if no authentication is needed.
example: dbuser
password:
type: string
description: |
Password with which to connect to the database.
Skip it if no authentication is needed.
example: trustsome1
auth_db:
type: string
description: |
Authentication database where the specified
username has been created.
If no authentication database is specified,
the databse provided in "name" will be used.
If "name" is "all", the "admin" database will
be used.
example: admin
format:
type: string
enum: ['archive', 'directory']
description: |
Database dump output format. One of "archive",
or "directory". Defaults to "archive". See
mongodump documentation for details. Note that
format is ignored when the database name is
"all".
example: directory
options:
type: string
description: |
Additional mongodump options to pass
directly to the dump command, without performing
any validation on them. See mongodump
documentation for details.
example: --role=someone
description: |
List of one or more MongoDB databases to dump before
creating a backup, run once per configuration file. The
database dumps are added to your source directories at
runtime, backed up, and removed afterwards. Requires
mongodump/mongorestore commands. See
https://docs.mongodb.com/database-tools/mongodump/ and
https://docs.mongodb.com/database-tools/mongorestore/ for
details.
healthchecks:
type: string
description: |

View file

@ -1,6 +1,6 @@
import logging
from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql
from borgmatic.hooks import cronhub, cronitor, healthchecks, mongodb, mysql, pagerduty, postgresql
logger = logging.getLogger(__name__)
@ -11,6 +11,7 @@ HOOK_NAME_TO_MODULE = {
'pagerduty': pagerduty,
'postgresql_databases': postgresql,
'mysql_databases': mysql,
'mongodb_databases': mongodb,
}

View file

@ -6,7 +6,7 @@ from borgmatic.borg.create import DEFAULT_BORGMATIC_SOURCE_DIRECTORY
logger = logging.getLogger(__name__)
DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases')
DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases', 'mongodb_databases')
def make_database_dump_path(borgmatic_source_directory, database_hook_name):

162
borgmatic/hooks/mongodb.py Normal file
View file

@ -0,0 +1,162 @@
import logging
from borgmatic.execute import execute_command, execute_command_with_processes
from borgmatic.hooks import dump
logger = logging.getLogger(__name__)
def make_dump_path(location_config): # pragma: no cover
'''
Make the dump path from the given location configuration and the name of this hook.
'''
return dump.make_database_dump_path(
location_config.get('borgmatic_source_directory'), 'mongodb_databases'
)
def dump_databases(databases, log_prefix, location_config, dry_run):
'''
Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of
dicts, one dict describing each database as per the configuration schema. Use the given log
prefix in any log entries. Use the given location configuration dict to construct the
destination path.
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
logger.info('{}: Dumping MongoDB databases{}'.format(log_prefix, dry_run_label))
processes = []
for database in databases:
name = database['name']
dump_filename = dump.make_database_dump_filename(
make_dump_path(location_config), name, database.get('hostname')
)
dump_format = database.get('format', 'archive')
logger.debug(
'{}: Dumping MongoDB database {} to {}{}'.format(
log_prefix, name, dump_filename, dry_run_label
)
)
if dry_run:
continue
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
else:
dump.create_named_pipe_for_dump(dump_filename)
command = build_dump_command(database, dump_filename, dump_format)
processes.append(execute_command(command, shell=True, run_to_completion=False))
return processes
def build_dump_command(database, dump_filename, dump_format):
'''
Return the mongodump command from a single database configuration.
'''
all_databases = database['name'] == 'all'
command = ['mongodump', '--archive']
if dump_format == 'directory':
command.append(dump_filename)
if 'hostname' in database:
command.extend(('--host', database['hostname']))
if 'port' in database:
command.extend(('--port', str(database['port'])))
if 'username' in database:
command.extend(('--username', database['username']))
if 'password' in database:
command.extend(('--password', database['password']))
if 'auth_db' in database:
command.extend(('--authenticationDatabase', database['auth_db']))
if not all_databases:
command.extend(('--db', database['name']))
if 'options' in database:
command.extend(database['options'].split(' '))
if dump_format != 'directory':
command.extend(('>', dump_filename))
return command
def remove_database_dumps(databases, log_prefix, location_config, dry_run): # pragma: no cover
'''
Remove all database dump files for this hook regardless of the given databases. Use the log
prefix in any log entries. Use the given location configuration dict to construct the
destination path. If this is a dry run, then don't actually remove anything.
'''
dump.remove_database_dumps(make_dump_path(location_config), 'MongoDB', log_prefix, dry_run)
def make_database_dump_pattern(
databases, log_prefix, location_config, name=None
): # pragma: no cover
'''
Given a sequence of configurations dicts, a prefix to log with, a location configuration dict,
and a database name to match, return the corresponding glob patterns to match the database dump
in an archive.
'''
return dump.make_database_dump_filename(make_dump_path(location_config), name, hostname='*')
def restore_database_dump(database_config, log_prefix, location_config, dry_run, extract_process):
'''
Restore the given MongoDB database from an extract stream. The database is supplied as a
one-element sequence containing a dict describing the database, as per the configuration schema.
Use the given log prefix in any log entries. If this is a dry run, then don't actually restore
anything. Trigger the given active extract process (an instance of subprocess.Popen) to produce
output to consume.
If the extract process is None, then restore the dump from the filesystem rather than from an
extract stream.
'''
dry_run_label = ' (dry run; not actually restoring anything)' if dry_run else ''
if len(database_config) != 1:
raise ValueError('The database configuration value is invalid')
database = database_config[0]
dump_filename = dump.make_database_dump_filename(
make_dump_path(location_config), database['name'], database.get('hostname')
)
restore_command = build_restore_command(extract_process, database, dump_filename)
logger.debug(
'{}: Restoring MongoDB database {}{}'.format(log_prefix, database['name'], dry_run_label)
)
if dry_run:
return
execute_command_with_processes(
restore_command,
[extract_process] if extract_process else [],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout if extract_process else None,
borg_local_path=location_config.get('local_path', 'borg'),
)
def build_restore_command(extract_process, database, dump_filename):
'''
Return the mongorestore command from a single database configuration.
'''
command = ['mongorestore', '--archive']
if not extract_process:
command.append(dump_filename)
if database['name'] != 'all':
command.extend(('--drop', '--db', database['name']))
if 'hostname' in database:
command.extend(('--host', database['hostname']))
if 'port' in database:
command.extend(('--port', str(database['port'])))
if 'username' in database:
command.extend(('--username', database['username']))
if 'password' in database:
command.extend(('--password', database['password']))
if 'auth_db' in database:
command.extend(('--authenticationDatabase', database['auth_db']))
return command

View file

@ -10,7 +10,7 @@
set -e
apk add --no-cache python3 py3-pip borgbackup postgresql-client mariadb-client
apk add --no-cache python3 py3-pip borgbackup postgresql-client mariadb-client mongodb-tools
# If certain dependencies of black are available in this version of Alpine, install them.
apk add --no-cache py3-typed-ast py3-regex || true
python3 -m pip install --upgrade pip==21.3.1 setuptools==58.2.0

View file

@ -10,6 +10,11 @@ services:
environment:
MYSQL_ROOT_PASSWORD: test
MYSQL_DATABASE: test
mongodb:
image: mongo:5.0.5
environment:
MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: test
tests:
image: alpine:3.13
volumes:

View file

@ -47,13 +47,22 @@ hooks:
hostname: mysql
username: root
password: test
mongodb_databases:
- name: test
hostname: mongodb
username: root
password: test
auth_db: admin
- name: all
hostname: mongodb
username: root
password: test
'''.format(
config_path, repository_path, borgmatic_source_directory, postgresql_dump_format
)
config_file = open(config_path, 'w')
config_file.write(config)
config_file.close()
with open(config_path, 'w') as config_file:
config_file.write(config)
def test_database_dump_and_restore():
@ -69,15 +78,15 @@ def test_database_dump_and_restore():
write_configuration(config_path, repository_path, borgmatic_source_directory)
subprocess.check_call(
'borgmatic -v 2 --config {} init --encryption repokey'.format(config_path).split(' ')
['borgmatic', '-v', '2', '--config', config_path, 'init', '--encryption', 'repokey']
)
# Run borgmatic to generate a backup archive including a database dump.
subprocess.check_call('borgmatic create --config {} -v 2'.format(config_path).split(' '))
subprocess.check_call(['borgmatic', 'create', '--config', config_path, '-v', '2'])
# Get the created archive name.
output = subprocess.check_output(
'borgmatic --config {} list --json'.format(config_path).split(' ')
['borgmatic', '--config', config_path, 'list', '--json']
).decode(sys.stdout.encoding)
parsed_output = json.loads(output)
@ -87,9 +96,7 @@ def test_database_dump_and_restore():
# Restore the database from the archive.
subprocess.check_call(
'borgmatic --config {} restore --archive {}'.format(config_path, archive_name).split(
' '
)
['borgmatic', '--config', config_path, 'restore', '--archive', archive_name]
)
finally:
os.chdir(original_working_directory)
@ -114,15 +121,15 @@ def test_database_dump_and_restore_with_directory_format():
)
subprocess.check_call(
'borgmatic -v 2 --config {} init --encryption repokey'.format(config_path).split(' ')
['borgmatic', '-v', '2', '--config', config_path, 'init', '--encryption', 'repokey']
)
# Run borgmatic to generate a backup archive including a database dump.
subprocess.check_call('borgmatic create --config {} -v 2'.format(config_path).split(' '))
subprocess.check_call(['borgmatic', 'create', '--config', config_path, '-v', '2'])
# Restore the database from the archive.
subprocess.check_call(
'borgmatic --config {} restore --archive latest'.format(config_path).split(' ')
['borgmatic', '--config', config_path, 'restore', '--archive', 'latest']
)
finally:
os.chdir(original_working_directory)
@ -142,7 +149,7 @@ def test_database_dump_with_error_causes_borgmatic_to_exit():
write_configuration(config_path, repository_path, borgmatic_source_directory)
subprocess.check_call(
'borgmatic -v 2 --config {} init --encryption repokey'.format(config_path).split(' ')
['borgmatic', '-v', '2', '--config', config_path, 'init', '--encryption', 'repokey']
)
# Run borgmatic with a config override such that the database dump fails.

View file

@ -0,0 +1,296 @@
import logging
import pytest
from flexmock import flexmock
from borgmatic.hooks import mongodb as module
def test_dump_databases_runs_mongodump_for_each_database():
databases = [{'name': 'foo'}, {'name': 'bar'}]
processes = [flexmock(), flexmock()]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
for name, process in zip(('foo', 'bar'), processes):
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == processes
def test_dump_databases_with_dry_run_skips_mongodump():
databases = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
).and_return('databases/localhost/bar')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('execute_command').never()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=True) == []
def test_dump_databases_runs_mongodump_with_hostname_and_port():
databases = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/database.example.org/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--host',
'database.example.org',
'--port',
'5433',
'--db',
'foo',
'>',
'databases/database.example.org/foo',
],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_username_and_password():
databases = [{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1', 'auth_db': "admin"}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--username',
'mongo',
'--password',
'trustsome1',
'--authenticationDatabase',
'admin',
'--db',
'foo',
'>',
'databases/localhost/foo',
],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_parent_directory_for_dump')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodump_with_options():
databases = [{'name': 'foo', 'options': '--stuff=such'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_dump_databases_runs_mongodumpall_for_all_databases():
databases = [{'name': 'all'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/all'
)
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '>', 'databases/localhost/all'],
shell=True,
run_to_completion=False,
).and_return(process).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
def test_restore_database_dump_runs_pg_restore():
database_config = [{'name': 'foo'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive', '--drop', '--db', 'foo'],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_errors_on_multiple_database_config():
database_config = [{'name': 'foo'}, {'name': 'bar'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').never()
flexmock(module).should_receive('execute_command').never()
with pytest.raises(ValueError):
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=flexmock()
)
def test_restore_database_dump_runs_pg_restore_with_hostname_and_port():
database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
[
'mongorestore',
'--archive',
'--drop',
'--db',
'foo',
'--host',
'database.example.org',
'--port',
'5433',
],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_runs_pg_restore_with_username_and_password():
database_config = [
{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1', 'auth_db': 'admin'}
]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
[
'mongorestore',
'--archive',
'--drop',
'--db',
'foo',
'--username',
'mongo',
'--password',
'trustsome1',
'--authenticationDatabase',
'admin',
],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_runs_psql_for_all_database_dump():
database_config = [{'name': 'all'}]
extract_process = flexmock(stdout=flexmock())
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive'],
processes=[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process
)
def test_restore_database_dump_with_dry_run_skips_restore():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename')
flexmock(module).should_receive('execute_command_with_processes').never()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=True, extract_process=flexmock()
)
def test_restore_database_dump_without_extract_process_restores_from_disk():
database_config = [{'name': 'foo'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'],
processes=[],
output_log_level=logging.DEBUG,
input_file=None,
borg_local_path='borg',
).once()
module.restore_database_dump(
database_config, 'test.yaml', {}, dry_run=False, extract_process=None
)