From af1cc27988f16bde56b4b07487b72002a8788236 Mon Sep 17 00:00:00 2001 From: Divyansh Singh Date: Thu, 2 Mar 2023 23:55:16 +0530 Subject: [PATCH] feat: add dump-restore support for sqlite databases --- borgmatic/config/schema.yaml | 25 ++++++ borgmatic/hooks/dispatch.py | 2 + borgmatic/hooks/dump.py | 7 +- borgmatic/hooks/sqlite.py | 122 +++++++++++++++++++++++++++ docs/how-to/backup-your-databases.md | 15 +++- tests/end-to-end/test_database.py | 3 + tests/unit/hooks/test_sqlite.py | 75 ++++++++++++++++ 7 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 borgmatic/hooks/sqlite.py create mode 100644 tests/unit/hooks/test_sqlite.py diff --git a/borgmatic/config/schema.yaml b/borgmatic/config/schema.yaml index d7acc1c..6f399ed 100644 --- a/borgmatic/config/schema.yaml +++ b/borgmatic/config/schema.yaml @@ -931,6 +931,31 @@ properties: mysqldump/mysql commands (from either MySQL or MariaDB). See https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html or https://mariadb.com/kb/en/library/mysqldump/ for details. + sqlite_databases: + type: array + items: + type: object + required: ['path','name'] + additionalProperties: false + properties: + path: + type: string + description: | + Path to the SQLite database file to dump. If + relative, it is relative to the current working + directory. If absolute, it is relative to the + root of the filesystem. Note that using this + database hook implicitly enables both + read_special and one_file_system (see above) to + support dump and restore streaming. + example: /var/lib/sqlite/users.db + name: + type: string + description: | + This is used to tag the database dump file with + a name. It is not used to identify the database + file itself. + example: users mongodb_databases: type: array items: diff --git a/borgmatic/hooks/dispatch.py b/borgmatic/hooks/dispatch.py index 41dcee0..88a99eb 100644 --- a/borgmatic/hooks/dispatch.py +++ b/borgmatic/hooks/dispatch.py @@ -9,6 +9,7 @@ from borgmatic.hooks import ( ntfy, pagerduty, postgresql, + sqlite, ) logger = logging.getLogger(__name__) @@ -22,6 +23,7 @@ HOOK_NAME_TO_MODULE = { 'ntfy': ntfy, 'pagerduty': pagerduty, 'postgresql_databases': postgresql, + 'sqlite_databases': sqlite, } diff --git a/borgmatic/hooks/dump.py b/borgmatic/hooks/dump.py index d4c8e3f..43686d3 100644 --- a/borgmatic/hooks/dump.py +++ b/borgmatic/hooks/dump.py @@ -6,7 +6,12 @@ from borgmatic.borg.state import DEFAULT_BORGMATIC_SOURCE_DIRECTORY logger = logging.getLogger(__name__) -DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases', 'mongodb_databases') +DATABASE_HOOK_NAMES = ( + 'postgresql_databases', + 'mysql_databases', + 'mongodb_databases', + 'sqlite_databases', +) def make_database_dump_path(borgmatic_source_directory, database_hook_name): diff --git a/borgmatic/hooks/sqlite.py b/borgmatic/hooks/sqlite.py new file mode 100644 index 0000000..f9b83ae --- /dev/null +++ b/borgmatic/hooks/sqlite.py @@ -0,0 +1,122 @@ +import logging +import os +from subprocess import CalledProcessError + +from borgmatic.execute import execute_command, execute_command_with_processes +from borgmatic.hooks import dump + +logger = logging.getLogger(__name__) + + +def make_dump_path(location_config): # pragma: no cover + ''' + Make the dump path from the given location configuration and the name of this hook. + ''' + return dump.make_database_dump_path( + location_config.get('borgmatic_source_directory'), 'sqlite_databases' + ) + + +def dump_databases(databases, log_prefix, location_config, dry_run): # pragma: no cover + ''' + Dump the given SQLite3 databases to a file. The databases are supplied as a sequence of + configuration dicts, as per the configuration schema. Use the given log prefix in any log + entries. Use the given location configuration dict to construct the destination path. If this + is a dry run, then don't actually dump anything. + ''' + dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else '' + processes = [] + + logger.info('{}: Dumping SQLite databases{}'.format(log_prefix, dry_run_label)) + + for database in databases: + database_path = database['path'] + database_filename = database['name'] + dump_path = make_dump_path(location_config) + dump_filename = dump.make_database_dump_filename(dump_path, database_filename) + if os.path.exists(dump_filename): + logger.warning( + f'{log_prefix}: Skipping duplicate dump of SQLite database at {database_path} to {dump_filename}' + ) + continue + + command = ( + 'sqlite3', + database_path, + '.dump', + '>', + dump_filename, + ) + logger.debug( + f'{log_prefix}: Dumping SQLite database at {database_path} to {dump_filename}{dry_run_label}' + ) + if dry_run: + continue + + dump.create_parent_directory_for_dump(dump_filename) + processes.append(execute_command(command, shell=True, run_to_completion=False)) + + return processes + + +def remove_database_dumps(databases, log_prefix, location_config, dry_run): # pragma: no cover + ''' + Remove the given SQLite3 database dumps from the filesystem. The databases are supplied as a + sequence of configuration dicts, as per the configuration schema. Use the given log prefix in + any log entries. Use the given location configuration dict to construct the destination path. + If this is a dry run, then don't actually remove anything. + ''' + dump.remove_database_dumps(make_dump_path(location_config), 'SQLite', log_prefix, dry_run) + + +def make_database_dump_pattern( + databases, log_prefix, location_config, name=None +): # pragma: no cover + ''' + Make a pattern that matches the given SQLite3 databases. The databases are supplied as a + sequence of configuration dicts, as per the configuration schema. + ''' + return dump.make_database_dump_filename(make_dump_path(location_config), name) + + +def restore_database_dump(database_config, log_prefix, location_config, dry_run, extract_process): + ''' + Restore the given SQLite3 database from an extract stream. The database is supplied as a + one-element sequence containing a dict describing the database, as per the configuration schema. + Use the given log prefix in any log entries. If this is a dry run, then don't actually restore + anything. Trigger the given active extract process (an instance of subprocess.Popen) to produce + output to consume. + ''' + dry_run_label = ' (dry run; not actually restoring anything)' if dry_run else '' + + if len(database_config) != 1: + raise ValueError('The database configuration value is invalid') + + database_path = database_config[0]['path'] + + logger.debug(f'{log_prefix}: Restoring SQLite database at {database_path}{dry_run_label}') + if dry_run: + return + + remove_command = ( + 'rm', + database_path, + ) + try: + execute_command(remove_command, shell=True) + except CalledProcessError: + logger.info(f'{log_prefix}: Database does not exist at {database_path}, skipping removal') + + restore_command = ( + 'sqlite3', + database_path, + ) + + # Don't give Borg local path so as to error on warnings, as "borg extract" only gives a warning + # if the restore paths don't exist in the archive. + execute_command_with_processes( + restore_command, + [extract_process], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout, + ) diff --git a/docs/how-to/backup-your-databases.md b/docs/how-to/backup-your-databases.md index 1c7397b..5bf2002 100644 --- a/docs/how-to/backup-your-databases.md +++ b/docs/how-to/backup-your-databases.md @@ -15,8 +15,8 @@ consistent snapshot that is more suited for backups. Fortunately, borgmatic includes built-in support for creating database dumps prior to running backups. For example, here is everything you need to dump and -backup a couple of local PostgreSQL databases, a MySQL/MariaDB database, and a -MongoDB database: +backup a couple of local PostgreSQL databases, a MySQL/MariaDB database, a +MongoDB database and a SQLite database: ```yaml hooks: @@ -27,6 +27,9 @@ hooks: - name: posts mongodb_databases: - name: messages + sqlite_databases: + - path: /var/lib/sqlite3/mydb.sqlite + - name: mydb ``` As part of each backup, borgmatic streams a database dump for each configured @@ -74,6 +77,9 @@ hooks: password: trustsome1 authentication_database: mongousers options: "--ssl" + sqlite_databases: + - path: /var/lib/sqlite3/mydb.sqlite + - name: mydb ``` See your [borgmatic configuration @@ -97,7 +103,8 @@ hooks: ``` Note that you may need to use a `username` of the `postgres` superuser for -this to work with PostgreSQL. +this to work with PostgreSQL. Also, the `all` database name is not supported +for SQLite databases. New in version 1.7.6 With PostgreSQL and MySQL, you can optionally dump "all" databases to separate @@ -154,7 +161,7 @@ bring back any missing configuration files in order to restore a database. ## Supported databases -As of now, borgmatic supports PostgreSQL, MySQL/MariaDB, and MongoDB databases +As of now, borgmatic supports PostgreSQL, MySQL/MariaDB, MongoDB databases and SQLite databases directly. But see below about general-purpose preparation and cleanup hooks as a work-around with other database systems. Also, please [file a ticket](https://torsion.org/borgmatic/#issues) for additional database systems diff --git a/tests/end-to-end/test_database.py b/tests/end-to-end/test_database.py index f981b40..3dcb82a 100644 --- a/tests/end-to-end/test_database.py +++ b/tests/end-to-end/test_database.py @@ -73,6 +73,9 @@ hooks: hostname: mongodb username: root password: test + sqlite_databases: + - path: /tmp/sqlite_test.db + - name: sqlite_test ''' with open(config_path, 'w') as config_file: diff --git a/tests/unit/hooks/test_sqlite.py b/tests/unit/hooks/test_sqlite.py new file mode 100644 index 0000000..15d5911 --- /dev/null +++ b/tests/unit/hooks/test_sqlite.py @@ -0,0 +1,75 @@ +import logging + +import pytest +from flexmock import flexmock + +from borgmatic.hooks import sqlite as module + + +def test_dump_databases_logs_and_skips_if_dump_already_exists(): + databases = [{'path': '/path/to/database', 'name': 'database'}] + + flexmock(module).should_receive('make_dump_path').and_return('/path/to/dump') + flexmock(module).should_receive('dump.make_database_dump_filename').and_return( + '/path/to/dump/database' + ) + flexmock(module.os.path).should_receive('exists').and_return(True) + flexmock(logging).should_receive('info') + flexmock(logging).should_receive('warning') + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run = False) == [] + +def test_dump_databases_dumps_each_database(): + databases = [ + {'path': '/path/to/database1', 'name': 'database1'}, + {'path': '/path/to/database2', 'name': 'database2'}, + ] + + flexmock(module).should_receive('make_dump_path').and_return('/path/to/dump') + flexmock(module).should_receive('dump.make_database_dump_filename').and_return( + '/path/to/dump/database' + ) + flexmock(module.os.path).should_receive('exists').and_return(False) + flexmock(logging).should_receive('info') + flexmock(logging).should_receive('warning') + flexmock(module).should_receive('dump.create_parent_directory_for_dump') + flexmock(module).should_receive('execute_command').and_return('process') + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run = False) == ['process', 'process'] + +def test_dump_databases_does_not_dump_if_dry_run(): + databases = [{'path': '/path/to/database', 'name': 'database'}] + + flexmock(module).should_receive('make_dump_path').and_return('/path/to/dump') + flexmock(module).should_receive('dump.make_database_dump_filename').and_return( + '/path/to/dump/database' + ) + flexmock(module.os.path).should_receive('exists').and_return(False) + flexmock(logging).should_receive('info') + flexmock(logging).should_receive('warning') + flexmock(module).should_receive('dump.create_parent_directory_for_dump') + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run = True) == [] + +def test_restore_database_dump_restores_database(): + database_config = [{'path': '/path/to/database', 'name': 'database'}] + extract_process = flexmock(stdout = flexmock()) + + flexmock(module).should_receive('execute_command_with_processes').and_return('process') + + module.restore_database_dump(database_config, 'test.yaml', {}, dry_run = False, extract_process = extract_process) + +def test_restore_database_dump_does_not_restore_database_if_dry_run(): + database_config = [{'path': '/path/to/database', 'name': 'database'}] + extract_process = flexmock(stdout = flexmock()) + + flexmock(module).should_receive('execute_command_with_processes').never() + + module.restore_database_dump(database_config, 'test.yaml', {}, dry_run = True, extract_process = extract_process) + +def test_restore_database_dump_raises_error_if_database_config_is_invalid(): + database_config = [] + extract_process = flexmock(stdout = flexmock()) + + with pytest.raises(ValueError): + module.restore_database_dump(database_config, 'test.yaml', {}, dry_run = False, extract_process = extract_process) \ No newline at end of file