Add spot consistency check (#656).
Reviewed-on: https://projects.torsion.org/borgmatic-collective/borgmatic/pulls/849
This commit is contained in:
commit
f9182514d8
27 changed files with 3186 additions and 3046 deletions
6
NEWS
6
NEWS
|
@ -1,4 +1,9 @@
|
||||||
1.8.10.dev0
|
1.8.10.dev0
|
||||||
|
* #656 (beta): Add a "spot" consistency check that compares file counts and contents between your
|
||||||
|
source files and the latest archive, ensuring they fall within configured tolerances. This can
|
||||||
|
catch problems like incorrect excludes, inadvertent deletes, files changed by malware, etc. See
|
||||||
|
the documentation for more information:
|
||||||
|
https://torsion.org/borgmatic/docs/how-to/deal-with-very-large-backups/#spot-check
|
||||||
* #842: When a command hook exits with a soft failure, ping the log and finish states for any
|
* #842: When a command hook exits with a soft failure, ping the log and finish states for any
|
||||||
configured monitoring hooks.
|
configured monitoring hooks.
|
||||||
* #843: Add documentation link to Loki dashboard for borgmatic:
|
* #843: Add documentation link to Loki dashboard for borgmatic:
|
||||||
|
@ -8,6 +13,7 @@
|
||||||
* Add documentation about backing up containerized databases by configuring borgmatic to exec into
|
* Add documentation about backing up containerized databases by configuring borgmatic to exec into
|
||||||
a container to run a dump command:
|
a container to run a dump command:
|
||||||
https://torsion.org/borgmatic/docs/how-to/backup-your-databases/#containers
|
https://torsion.org/borgmatic/docs/how-to/backup-your-databases/#containers
|
||||||
|
*
|
||||||
|
|
||||||
1.8.9
|
1.8.9
|
||||||
* #311: Add custom dump/restore command options for MySQL and MariaDB.
|
* #311: Add custom dump/restore command options for MySQL and MariaDB.
|
||||||
|
|
|
@ -1,12 +1,569 @@
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import random
|
||||||
|
|
||||||
import borgmatic.borg.check
|
import borgmatic.borg.check
|
||||||
|
import borgmatic.borg.create
|
||||||
|
import borgmatic.borg.environment
|
||||||
|
import borgmatic.borg.extract
|
||||||
|
import borgmatic.borg.list
|
||||||
|
import borgmatic.borg.rlist
|
||||||
|
import borgmatic.borg.state
|
||||||
import borgmatic.config.validate
|
import borgmatic.config.validate
|
||||||
|
import borgmatic.execute
|
||||||
import borgmatic.hooks.command
|
import borgmatic.hooks.command
|
||||||
|
|
||||||
|
DEFAULT_CHECKS = (
|
||||||
|
{'name': 'repository', 'frequency': '1 month'},
|
||||||
|
{'name': 'archives', 'frequency': '1 month'},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_checks(config, only_checks=None):
|
||||||
|
'''
|
||||||
|
Given a configuration dict with a "checks" sequence of dicts and an optional list of override
|
||||||
|
checks, return a tuple of named checks to run.
|
||||||
|
|
||||||
|
For example, given a config of:
|
||||||
|
|
||||||
|
{'checks': ({'name': 'repository'}, {'name': 'archives'})}
|
||||||
|
|
||||||
|
This will be returned as:
|
||||||
|
|
||||||
|
('repository', 'archives')
|
||||||
|
|
||||||
|
If no "checks" option is present in the config, return the DEFAULT_CHECKS. If a checks value
|
||||||
|
has a name of "disabled", return an empty tuple, meaning that no checks should be run.
|
||||||
|
'''
|
||||||
|
checks = only_checks or tuple(
|
||||||
|
check_config['name'] for check_config in (config.get('checks', None) or DEFAULT_CHECKS)
|
||||||
|
)
|
||||||
|
checks = tuple(check.lower() for check in checks)
|
||||||
|
|
||||||
|
if 'disabled' in checks:
|
||||||
|
logger.warning(
|
||||||
|
'The "disabled" value for the "checks" option is deprecated and will be removed from a future release; use "skip_actions" instead'
|
||||||
|
)
|
||||||
|
if len(checks) > 1:
|
||||||
|
logger.warning(
|
||||||
|
'Multiple checks are configured, but one of them is "disabled"; not running any checks'
|
||||||
|
)
|
||||||
|
return ()
|
||||||
|
|
||||||
|
return checks
|
||||||
|
|
||||||
|
|
||||||
|
def parse_frequency(frequency):
|
||||||
|
'''
|
||||||
|
Given a frequency string with a number and a unit of time, return a corresponding
|
||||||
|
datetime.timedelta instance or None if the frequency is None or "always".
|
||||||
|
|
||||||
|
For instance, given "3 weeks", return datetime.timedelta(weeks=3)
|
||||||
|
|
||||||
|
Raise ValueError if the given frequency cannot be parsed.
|
||||||
|
'''
|
||||||
|
if not frequency:
|
||||||
|
return None
|
||||||
|
|
||||||
|
frequency = frequency.strip().lower()
|
||||||
|
|
||||||
|
if frequency == 'always':
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
number, time_unit = frequency.split(' ')
|
||||||
|
number = int(number)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
|
||||||
|
|
||||||
|
if not time_unit.endswith('s'):
|
||||||
|
time_unit += 's'
|
||||||
|
|
||||||
|
if time_unit == 'months':
|
||||||
|
number *= 30
|
||||||
|
time_unit = 'days'
|
||||||
|
elif time_unit == 'years':
|
||||||
|
number *= 365
|
||||||
|
time_unit = 'days'
|
||||||
|
|
||||||
|
try:
|
||||||
|
return datetime.timedelta(**{time_unit: number})
|
||||||
|
except TypeError:
|
||||||
|
raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
|
||||||
|
|
||||||
|
|
||||||
|
def filter_checks_on_frequency(
|
||||||
|
config,
|
||||||
|
borg_repository_id,
|
||||||
|
checks,
|
||||||
|
force,
|
||||||
|
archives_check_id=None,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a configuration dict with a "checks" sequence of dicts, a Borg repository ID, a sequence
|
||||||
|
of checks, whether to force checks to run, and an ID for the archives check potentially being
|
||||||
|
run (if any), filter down those checks based on the configured "frequency" for each check as
|
||||||
|
compared to its check time file.
|
||||||
|
|
||||||
|
In other words, a check whose check time file's timestamp is too new (based on the configured
|
||||||
|
frequency) will get cut from the returned sequence of checks. Example:
|
||||||
|
|
||||||
|
config = {
|
||||||
|
'checks': [
|
||||||
|
{
|
||||||
|
'name': 'archives',
|
||||||
|
'frequency': '2 weeks',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
When this function is called with that config and "archives" in checks, "archives" will get
|
||||||
|
filtered out of the returned result if its check time file is newer than 2 weeks old, indicating
|
||||||
|
that it's not yet time to run that check again.
|
||||||
|
|
||||||
|
Raise ValueError if a frequency cannot be parsed.
|
||||||
|
'''
|
||||||
|
if not checks:
|
||||||
|
return checks
|
||||||
|
|
||||||
|
filtered_checks = list(checks)
|
||||||
|
|
||||||
|
if force:
|
||||||
|
return tuple(filtered_checks)
|
||||||
|
|
||||||
|
for check_config in config.get('checks', DEFAULT_CHECKS):
|
||||||
|
check = check_config['name']
|
||||||
|
if checks and check not in checks:
|
||||||
|
continue
|
||||||
|
|
||||||
|
frequency_delta = parse_frequency(check_config.get('frequency'))
|
||||||
|
if not frequency_delta:
|
||||||
|
continue
|
||||||
|
|
||||||
|
check_time = probe_for_check_time(config, borg_repository_id, check, archives_check_id)
|
||||||
|
if not check_time:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If we've not yet reached the time when the frequency dictates we're ready for another
|
||||||
|
# check, skip this check.
|
||||||
|
if datetime.datetime.now() < check_time + frequency_delta:
|
||||||
|
remaining = check_time + frequency_delta - datetime.datetime.now()
|
||||||
|
logger.info(
|
||||||
|
f'Skipping {check} check due to configured frequency; {remaining} until next check (use --force to check anyway)'
|
||||||
|
)
|
||||||
|
filtered_checks.remove(check)
|
||||||
|
|
||||||
|
return tuple(filtered_checks)
|
||||||
|
|
||||||
|
|
||||||
|
def make_archives_check_id(archive_filter_flags):
|
||||||
|
'''
|
||||||
|
Given a sequence of flags to filter archives, return a unique hash corresponding to those
|
||||||
|
particular flags. If there are no flags, return None.
|
||||||
|
'''
|
||||||
|
if not archive_filter_flags:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return hashlib.sha256(' '.join(archive_filter_flags).encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def make_check_time_path(config, borg_repository_id, check_type, archives_check_id=None):
|
||||||
|
'''
|
||||||
|
Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
|
||||||
|
"archives", etc.), and a unique hash of the archives filter flags, return a path for recording
|
||||||
|
that check's time (the time of that check last occurring).
|
||||||
|
'''
|
||||||
|
borgmatic_source_directory = os.path.expanduser(
|
||||||
|
config.get(
|
||||||
|
'borgmatic_source_directory', borgmatic.borg.state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if check_type in ('archives', 'data'):
|
||||||
|
return os.path.join(
|
||||||
|
borgmatic_source_directory,
|
||||||
|
'checks',
|
||||||
|
borg_repository_id,
|
||||||
|
check_type,
|
||||||
|
archives_check_id if archives_check_id else 'all',
|
||||||
|
)
|
||||||
|
|
||||||
|
return os.path.join(
|
||||||
|
borgmatic_source_directory,
|
||||||
|
'checks',
|
||||||
|
borg_repository_id,
|
||||||
|
check_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_check_time(path): # pragma: no cover
|
||||||
|
'''
|
||||||
|
Record a check time of now as the modification time of the given path.
|
||||||
|
'''
|
||||||
|
logger.debug(f'Writing check time at {path}')
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(path), mode=0o700, exist_ok=True)
|
||||||
|
pathlib.Path(path, mode=0o600).touch()
|
||||||
|
|
||||||
|
|
||||||
|
def read_check_time(path):
|
||||||
|
'''
|
||||||
|
Return the check time based on the modification time of the given path. Return None if the path
|
||||||
|
doesn't exist.
|
||||||
|
'''
|
||||||
|
logger.debug(f'Reading check time from {path}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
return datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def probe_for_check_time(config, borg_repository_id, check, archives_check_id):
|
||||||
|
'''
|
||||||
|
Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
|
||||||
|
"archives", etc.), and a unique hash of the archives filter flags, return a the corresponding
|
||||||
|
check time or None if such a check time does not exist.
|
||||||
|
|
||||||
|
When the check type is "archives" or "data", this function probes two different paths to find
|
||||||
|
the check time, e.g.:
|
||||||
|
|
||||||
|
~/.borgmatic/checks/1234567890/archives/9876543210
|
||||||
|
~/.borgmatic/checks/1234567890/archives/all
|
||||||
|
|
||||||
|
... and returns the maximum modification time of the files found (if any). The first path
|
||||||
|
represents a more specific archives check time (a check on a subset of archives), and the second
|
||||||
|
is a fallback to the last "all" archives check.
|
||||||
|
|
||||||
|
For other check types, this function reads from a single check time path, e.g.:
|
||||||
|
|
||||||
|
~/.borgmatic/checks/1234567890/repository
|
||||||
|
'''
|
||||||
|
check_times = (
|
||||||
|
read_check_time(group[0])
|
||||||
|
for group in itertools.groupby(
|
||||||
|
(
|
||||||
|
make_check_time_path(config, borg_repository_id, check, archives_check_id),
|
||||||
|
make_check_time_path(config, borg_repository_id, check),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return max(check_time for check_time in check_times if check_time)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade_check_times(config, borg_repository_id):
|
||||||
|
'''
|
||||||
|
Given a configuration dict and a Borg repository ID, upgrade any corresponding check times on
|
||||||
|
disk from old-style paths to new-style paths.
|
||||||
|
|
||||||
|
Currently, the only upgrade performed is renaming an archive or data check path that looks like:
|
||||||
|
|
||||||
|
~/.borgmatic/checks/1234567890/archives
|
||||||
|
|
||||||
|
to:
|
||||||
|
|
||||||
|
~/.borgmatic/checks/1234567890/archives/all
|
||||||
|
'''
|
||||||
|
for check_type in ('archives', 'data'):
|
||||||
|
new_path = make_check_time_path(config, borg_repository_id, check_type, 'all')
|
||||||
|
old_path = os.path.dirname(new_path)
|
||||||
|
temporary_path = f'{old_path}.temp'
|
||||||
|
|
||||||
|
if not os.path.isfile(old_path) and not os.path.isfile(temporary_path):
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.debug(f'Upgrading archives check time from {old_path} to {new_path}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.rename(old_path, temporary_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
os.mkdir(old_path)
|
||||||
|
os.rename(temporary_path, new_path)
|
||||||
|
|
||||||
|
|
||||||
|
def collect_spot_check_source_paths(
|
||||||
|
repository, config, local_borg_version, global_arguments, local_path, remote_path
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a repository configuration dict, a configuration dict, the local Borg version, global
|
||||||
|
arguments as an argparse.Namespace instance, the local Borg path, and the remote Borg path,
|
||||||
|
collect the source paths that Borg would use in an actual create (but only include files and
|
||||||
|
symlinks).
|
||||||
|
'''
|
||||||
|
stream_processes = any(
|
||||||
|
borgmatic.hooks.dispatch.call_hooks(
|
||||||
|
'use_streaming',
|
||||||
|
config,
|
||||||
|
repository['path'],
|
||||||
|
borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
|
||||||
|
).values()
|
||||||
|
)
|
||||||
|
|
||||||
|
(create_flags, create_positional_arguments, pattern_file, exclude_file) = (
|
||||||
|
borgmatic.borg.create.make_base_create_command(
|
||||||
|
dry_run=True,
|
||||||
|
repository_path=repository['path'],
|
||||||
|
config=config,
|
||||||
|
config_paths=(),
|
||||||
|
local_borg_version=local_borg_version,
|
||||||
|
global_arguments=global_arguments,
|
||||||
|
borgmatic_source_directories=(),
|
||||||
|
local_path=local_path,
|
||||||
|
remote_path=remote_path,
|
||||||
|
list_files=True,
|
||||||
|
stream_processes=stream_processes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
borg_environment = borgmatic.borg.environment.make_environment(config)
|
||||||
|
|
||||||
|
try:
|
||||||
|
working_directory = os.path.expanduser(config.get('working_directory'))
|
||||||
|
except TypeError:
|
||||||
|
working_directory = None
|
||||||
|
|
||||||
|
paths_output = borgmatic.execute.execute_command_and_capture_output(
|
||||||
|
create_flags + create_positional_arguments,
|
||||||
|
capture_stderr=True,
|
||||||
|
working_directory=working_directory,
|
||||||
|
extra_environment=borg_environment,
|
||||||
|
borg_local_path=local_path,
|
||||||
|
borg_exit_codes=config.get('borg_exit_codes'),
|
||||||
|
)
|
||||||
|
|
||||||
|
paths = tuple(
|
||||||
|
path_line.split(' ', 1)[1]
|
||||||
|
for path_line in paths_output.split('\n')
|
||||||
|
if path_line and path_line.startswith('- ') or path_line.startswith('+ ')
|
||||||
|
)
|
||||||
|
|
||||||
|
return tuple(path for path in paths if os.path.isfile(path) or os.path.islink(path))
|
||||||
|
|
||||||
|
|
||||||
|
BORG_DIRECTORY_FILE_TYPE = 'd'
|
||||||
|
|
||||||
|
|
||||||
|
def collect_spot_check_archive_paths(
|
||||||
|
repository, archive, config, local_borg_version, global_arguments, local_path, remote_path
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a repository configuration dict, the name of the latest archive, a configuration dict, the
|
||||||
|
local Borg version, global arguments as an argparse.Namespace instance, the local Borg path, and
|
||||||
|
the remote Borg path, collect the paths from the given archive (but only include files and
|
||||||
|
symlinks).
|
||||||
|
'''
|
||||||
|
borgmatic_source_directory = os.path.expanduser(
|
||||||
|
config.get(
|
||||||
|
'borgmatic_source_directory', borgmatic.borg.state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return tuple(
|
||||||
|
path
|
||||||
|
for line in borgmatic.borg.list.capture_archive_listing(
|
||||||
|
repository['path'],
|
||||||
|
archive,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
path_format='{type} /{path}{NL}', # noqa: FS003
|
||||||
|
local_path=local_path,
|
||||||
|
remote_path=remote_path,
|
||||||
|
)
|
||||||
|
for (file_type, path) in (line.split(' ', 1),)
|
||||||
|
if file_type != BORG_DIRECTORY_FILE_TYPE
|
||||||
|
if pathlib.Path(borgmatic_source_directory) not in pathlib.Path(path).parents
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def compare_spot_check_hashes(
|
||||||
|
repository,
|
||||||
|
archive,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
log_label,
|
||||||
|
source_paths,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a repository configuration dict, the name of the latest archive, a configuration dict, the
|
||||||
|
local Borg version, global arguments as an argparse.Namespace instance, the local Borg path, the
|
||||||
|
remote Borg path, a log label, and spot check source paths, compare the hashes for a sampling of
|
||||||
|
the source paths with hashes from corresponding paths in the given archive. Return a sequence of
|
||||||
|
the paths that fail that hash comparison.
|
||||||
|
'''
|
||||||
|
# Based on the configured sample percentage, come up with a list of random sample files from the
|
||||||
|
# source directories.
|
||||||
|
spot_check_config = next(check for check in config['checks'] if check['name'] == 'spot')
|
||||||
|
sample_count = max(
|
||||||
|
int(len(source_paths) * (min(spot_check_config['data_sample_percentage'], 100) / 100)), 1
|
||||||
|
)
|
||||||
|
source_sample_paths = tuple(random.sample(source_paths, sample_count))
|
||||||
|
existing_source_sample_paths = {
|
||||||
|
source_path for source_path in source_sample_paths if os.path.exists(source_path)
|
||||||
|
}
|
||||||
|
logger.debug(
|
||||||
|
f'{log_label}: Sampling {sample_count} source paths (~{spot_check_config["data_sample_percentage"]}%) for spot check'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hash each file in the sample paths (if it exists).
|
||||||
|
hash_output = borgmatic.execute.execute_command_and_capture_output(
|
||||||
|
(spot_check_config.get('xxh64sum_command', 'xxh64sum'),)
|
||||||
|
+ tuple(path for path in source_sample_paths if path in existing_source_sample_paths)
|
||||||
|
)
|
||||||
|
|
||||||
|
source_hashes = dict(
|
||||||
|
(reversed(line.split(' ', 1)) for line in hash_output.splitlines()),
|
||||||
|
**{path: '' for path in source_sample_paths if path not in existing_source_sample_paths},
|
||||||
|
)
|
||||||
|
|
||||||
|
archive_hashes = dict(
|
||||||
|
reversed(line.split(' ', 1))
|
||||||
|
for line in borgmatic.borg.list.capture_archive_listing(
|
||||||
|
repository['path'],
|
||||||
|
archive,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
list_paths=source_sample_paths,
|
||||||
|
path_format='{xxh64} /{path}{NL}', # noqa: FS003
|
||||||
|
local_path=local_path,
|
||||||
|
remote_path=remote_path,
|
||||||
|
)
|
||||||
|
if line
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compare the source hashes with the archive hashes to see how many match.
|
||||||
|
failing_paths = []
|
||||||
|
|
||||||
|
for path, source_hash in source_hashes.items():
|
||||||
|
archive_hash = archive_hashes.get(path)
|
||||||
|
|
||||||
|
if archive_hash is not None and archive_hash == source_hash:
|
||||||
|
continue
|
||||||
|
|
||||||
|
failing_paths.append(path)
|
||||||
|
|
||||||
|
return tuple(failing_paths)
|
||||||
|
|
||||||
|
|
||||||
|
def spot_check(
|
||||||
|
repository,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a repository dict, a loaded configuration dict, the local Borg version, global arguments
|
||||||
|
as an argparse.Namespace instance, the local Borg path, and the remote Borg path, perform a spot
|
||||||
|
check for the latest archive in the given repository.
|
||||||
|
|
||||||
|
A spot check compares file counts and also the hashes for a random sampling of source files on
|
||||||
|
disk to those stored in the latest archive. If any differences are beyond configured tolerances,
|
||||||
|
then the check fails.
|
||||||
|
'''
|
||||||
|
log_label = f'{repository.get("label", repository["path"])}'
|
||||||
|
logger.debug(f'{log_label}: Running spot check')
|
||||||
|
spot_check_config = next(check for check in config['checks'] if check['name'] == 'spot')
|
||||||
|
|
||||||
|
if spot_check_config['data_tolerance_percentage'] > spot_check_config['data_sample_percentage']:
|
||||||
|
raise ValueError(
|
||||||
|
'The data_tolerance_percentage must be less than or equal to the data_sample_percentage'
|
||||||
|
)
|
||||||
|
|
||||||
|
source_paths = collect_spot_check_source_paths(
|
||||||
|
repository,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
)
|
||||||
|
logger.debug(f'{log_label}: {len(source_paths)} total source paths for spot check')
|
||||||
|
|
||||||
|
archive = borgmatic.borg.rlist.resolve_archive_name(
|
||||||
|
repository['path'],
|
||||||
|
'latest',
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
)
|
||||||
|
logger.debug(f'{log_label}: Using archive {archive} for spot check')
|
||||||
|
|
||||||
|
archive_paths = collect_spot_check_archive_paths(
|
||||||
|
repository,
|
||||||
|
archive,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
)
|
||||||
|
logger.debug(f'{log_label}: {len(archive_paths)} total archive paths for spot check')
|
||||||
|
|
||||||
|
# Calculate the percentage delta between the source paths count and the archive paths count, and
|
||||||
|
# compare that delta to the configured count tolerance percentage.
|
||||||
|
count_delta_percentage = abs(len(source_paths) - len(archive_paths)) / len(source_paths) * 100
|
||||||
|
|
||||||
|
if count_delta_percentage > spot_check_config['count_tolerance_percentage']:
|
||||||
|
logger.debug(
|
||||||
|
f'{log_label}: Paths in source paths but not latest archive: {", ".join(set(source_paths) - set(archive_paths)) or "none"}'
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f'{log_label}: Paths in latest archive but not source paths: {", ".join(set(archive_paths) - set(source_paths)) or "none"}'
|
||||||
|
)
|
||||||
|
raise ValueError(
|
||||||
|
f'Spot check failed: {count_delta_percentage:.2f}% file count delta between source paths and latest archive (tolerance is {spot_check_config["count_tolerance_percentage"]}%)'
|
||||||
|
)
|
||||||
|
|
||||||
|
failing_paths = compare_spot_check_hashes(
|
||||||
|
repository,
|
||||||
|
archive,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
log_label,
|
||||||
|
source_paths,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Error if the percentage of failing hashes exceeds the configured tolerance percentage.
|
||||||
|
logger.debug(f'{log_label}: {len(failing_paths)} non-matching spot check hashes')
|
||||||
|
data_tolerance_percentage = spot_check_config['data_tolerance_percentage']
|
||||||
|
failing_percentage = (len(failing_paths) / len(source_paths)) * 100
|
||||||
|
|
||||||
|
if failing_percentage > data_tolerance_percentage:
|
||||||
|
logger.debug(
|
||||||
|
f'{log_label}: Source paths with data not matching the latest archive: {", ".join(failing_paths)}'
|
||||||
|
)
|
||||||
|
raise ValueError(
|
||||||
|
f'Spot check failed: {failing_percentage:.2f}% of source paths with data not matching the latest archive (tolerance is {data_tolerance_percentage}%)'
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f'{log_label}: Spot check passed with a {count_delta_percentage:.2f}% file count delta and a {failing_percentage:.2f}% file data delta'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_check(
|
def run_check(
|
||||||
config_filename,
|
config_filename,
|
||||||
repository,
|
repository,
|
||||||
|
@ -20,6 +577,8 @@ def run_check(
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Run the "check" action for the given repository.
|
Run the "check" action for the given repository.
|
||||||
|
|
||||||
|
Raise ValueError if the Borg repository ID cannot be determined.
|
||||||
'''
|
'''
|
||||||
if check_arguments.repository and not borgmatic.config.validate.repositories_match(
|
if check_arguments.repository and not borgmatic.config.validate.repositories_match(
|
||||||
repository, check_arguments.repository
|
repository, check_arguments.repository
|
||||||
|
@ -34,16 +593,69 @@ def run_check(
|
||||||
global_arguments.dry_run,
|
global_arguments.dry_run,
|
||||||
**hook_context,
|
**hook_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f'{repository.get("label", repository["path"])}: Running consistency checks')
|
logger.info(f'{repository.get("label", repository["path"])}: Running consistency checks')
|
||||||
borgmatic.borg.check.check_archives(
|
repository_id = borgmatic.borg.check.get_repository_id(
|
||||||
repository['path'],
|
repository['path'],
|
||||||
config,
|
config,
|
||||||
local_borg_version,
|
local_borg_version,
|
||||||
check_arguments,
|
|
||||||
global_arguments,
|
global_arguments,
|
||||||
local_path=local_path,
|
local_path=local_path,
|
||||||
remote_path=remote_path,
|
remote_path=remote_path,
|
||||||
)
|
)
|
||||||
|
upgrade_check_times(config, repository_id)
|
||||||
|
configured_checks = parse_checks(config, check_arguments.only_checks)
|
||||||
|
archive_filter_flags = borgmatic.borg.check.make_archive_filter_flags(
|
||||||
|
local_borg_version, config, configured_checks, check_arguments
|
||||||
|
)
|
||||||
|
archives_check_id = make_archives_check_id(archive_filter_flags)
|
||||||
|
checks = filter_checks_on_frequency(
|
||||||
|
config,
|
||||||
|
repository_id,
|
||||||
|
configured_checks,
|
||||||
|
check_arguments.force,
|
||||||
|
archives_check_id,
|
||||||
|
)
|
||||||
|
borg_specific_checks = set(checks).intersection({'repository', 'archives', 'data'})
|
||||||
|
|
||||||
|
if borg_specific_checks:
|
||||||
|
borgmatic.borg.check.check_archives(
|
||||||
|
repository['path'],
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
check_arguments,
|
||||||
|
global_arguments,
|
||||||
|
borg_specific_checks,
|
||||||
|
archive_filter_flags,
|
||||||
|
local_path=local_path,
|
||||||
|
remote_path=remote_path,
|
||||||
|
)
|
||||||
|
for check in borg_specific_checks:
|
||||||
|
write_check_time(make_check_time_path(config, repository_id, check, archives_check_id))
|
||||||
|
|
||||||
|
if 'extract' in checks:
|
||||||
|
borgmatic.borg.extract.extract_last_archive_dry_run(
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
repository['path'],
|
||||||
|
config.get('lock_wait'),
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
)
|
||||||
|
write_check_time(make_check_time_path(config, repository_id, 'extract'))
|
||||||
|
|
||||||
|
if 'spot' in checks:
|
||||||
|
spot_check(
|
||||||
|
repository,
|
||||||
|
config,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
)
|
||||||
|
write_check_time(make_check_time_path(config, repository_id, 'spot'))
|
||||||
|
|
||||||
borgmatic.hooks.command.execute_hook(
|
borgmatic.hooks.command.execute_hook(
|
||||||
config.get('after_check'),
|
config.get('after_check'),
|
||||||
config.get('umask'),
|
config.get('umask'),
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import logging
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
|
@ -1,172 +1,26 @@
|
||||||
import argparse
|
import argparse
|
||||||
import datetime
|
|
||||||
import hashlib
|
|
||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
from borgmatic.borg import environment, extract, feature, flags, rinfo, state
|
from borgmatic.borg import environment, feature, flags, rinfo
|
||||||
from borgmatic.execute import DO_NOT_CAPTURE, execute_command
|
from borgmatic.execute import DO_NOT_CAPTURE, execute_command
|
||||||
|
|
||||||
DEFAULT_CHECKS = (
|
|
||||||
{'name': 'repository', 'frequency': '1 month'},
|
|
||||||
{'name': 'archives', 'frequency': '1 month'},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def parse_checks(config, only_checks=None):
|
def make_archive_filter_flags(local_borg_version, config, checks, check_arguments):
|
||||||
'''
|
'''
|
||||||
Given a configuration dict with a "checks" sequence of dicts and an optional list of override
|
Given the local Borg version, a configuration dict, a parsed sequence of checks, and check
|
||||||
checks, return a tuple of named checks to run.
|
arguments as an argparse.Namespace instance, transform the checks into tuple of command-line
|
||||||
|
flags for filtering archives in a check command.
|
||||||
|
|
||||||
For example, given a config of:
|
If "check_last" is set in the configuration and "archives" is in checks, then include a "--last"
|
||||||
|
flag. And if "prefix" is set in configuration and "archives" is in checks, then include a
|
||||||
{'checks': ({'name': 'repository'}, {'name': 'archives'})}
|
"--match-archives" flag.
|
||||||
|
|
||||||
This will be returned as:
|
|
||||||
|
|
||||||
('repository', 'archives')
|
|
||||||
|
|
||||||
If no "checks" option is present in the config, return the DEFAULT_CHECKS. If a checks value
|
|
||||||
has a name of "disabled", return an empty tuple, meaning that no checks should be run.
|
|
||||||
'''
|
'''
|
||||||
checks = only_checks or tuple(
|
check_last = config.get('check_last', None)
|
||||||
check_config['name'] for check_config in (config.get('checks', None) or DEFAULT_CHECKS)
|
prefix = config.get('prefix')
|
||||||
)
|
|
||||||
checks = tuple(check.lower() for check in checks)
|
|
||||||
|
|
||||||
if 'disabled' in checks:
|
|
||||||
logger.warning(
|
|
||||||
'The "disabled" value for the "checks" option is deprecated and will be removed from a future release; use "skip_actions" instead'
|
|
||||||
)
|
|
||||||
if len(checks) > 1:
|
|
||||||
logger.warning(
|
|
||||||
'Multiple checks are configured, but one of them is "disabled"; not running any checks'
|
|
||||||
)
|
|
||||||
return ()
|
|
||||||
|
|
||||||
return checks
|
|
||||||
|
|
||||||
|
|
||||||
def parse_frequency(frequency):
|
|
||||||
'''
|
|
||||||
Given a frequency string with a number and a unit of time, return a corresponding
|
|
||||||
datetime.timedelta instance or None if the frequency is None or "always".
|
|
||||||
|
|
||||||
For instance, given "3 weeks", return datetime.timedelta(weeks=3)
|
|
||||||
|
|
||||||
Raise ValueError if the given frequency cannot be parsed.
|
|
||||||
'''
|
|
||||||
if not frequency:
|
|
||||||
return None
|
|
||||||
|
|
||||||
frequency = frequency.strip().lower()
|
|
||||||
|
|
||||||
if frequency == 'always':
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
number, time_unit = frequency.split(' ')
|
|
||||||
number = int(number)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
|
|
||||||
|
|
||||||
if not time_unit.endswith('s'):
|
|
||||||
time_unit += 's'
|
|
||||||
|
|
||||||
if time_unit == 'months':
|
|
||||||
number *= 30
|
|
||||||
time_unit = 'days'
|
|
||||||
elif time_unit == 'years':
|
|
||||||
number *= 365
|
|
||||||
time_unit = 'days'
|
|
||||||
|
|
||||||
try:
|
|
||||||
return datetime.timedelta(**{time_unit: number})
|
|
||||||
except TypeError:
|
|
||||||
raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
|
|
||||||
|
|
||||||
|
|
||||||
def filter_checks_on_frequency(
|
|
||||||
config,
|
|
||||||
borg_repository_id,
|
|
||||||
checks,
|
|
||||||
force,
|
|
||||||
archives_check_id=None,
|
|
||||||
):
|
|
||||||
'''
|
|
||||||
Given a configuration dict with a "checks" sequence of dicts, a Borg repository ID, a sequence
|
|
||||||
of checks, whether to force checks to run, and an ID for the archives check potentially being
|
|
||||||
run (if any), filter down those checks based on the configured "frequency" for each check as
|
|
||||||
compared to its check time file.
|
|
||||||
|
|
||||||
In other words, a check whose check time file's timestamp is too new (based on the configured
|
|
||||||
frequency) will get cut from the returned sequence of checks. Example:
|
|
||||||
|
|
||||||
config = {
|
|
||||||
'checks': [
|
|
||||||
{
|
|
||||||
'name': 'archives',
|
|
||||||
'frequency': '2 weeks',
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
When this function is called with that config and "archives" in checks, "archives" will get
|
|
||||||
filtered out of the returned result if its check time file is newer than 2 weeks old, indicating
|
|
||||||
that it's not yet time to run that check again.
|
|
||||||
|
|
||||||
Raise ValueError if a frequency cannot be parsed.
|
|
||||||
'''
|
|
||||||
if not checks:
|
|
||||||
return checks
|
|
||||||
|
|
||||||
filtered_checks = list(checks)
|
|
||||||
|
|
||||||
if force:
|
|
||||||
return tuple(filtered_checks)
|
|
||||||
|
|
||||||
for check_config in config.get('checks', DEFAULT_CHECKS):
|
|
||||||
check = check_config['name']
|
|
||||||
if checks and check not in checks:
|
|
||||||
continue
|
|
||||||
|
|
||||||
frequency_delta = parse_frequency(check_config.get('frequency'))
|
|
||||||
if not frequency_delta:
|
|
||||||
continue
|
|
||||||
|
|
||||||
check_time = probe_for_check_time(config, borg_repository_id, check, archives_check_id)
|
|
||||||
if not check_time:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If we've not yet reached the time when the frequency dictates we're ready for another
|
|
||||||
# check, skip this check.
|
|
||||||
if datetime.datetime.now() < check_time + frequency_delta:
|
|
||||||
remaining = check_time + frequency_delta - datetime.datetime.now()
|
|
||||||
logger.info(
|
|
||||||
f'Skipping {check} check due to configured frequency; {remaining} until next check (use --force to check anyway)'
|
|
||||||
)
|
|
||||||
filtered_checks.remove(check)
|
|
||||||
|
|
||||||
return tuple(filtered_checks)
|
|
||||||
|
|
||||||
|
|
||||||
def make_archive_filter_flags(
|
|
||||||
local_borg_version, config, checks, check_arguments, check_last=None, prefix=None
|
|
||||||
):
|
|
||||||
'''
|
|
||||||
Given the local Borg version, a configuration dict, a parsed sequence of checks, check arguments
|
|
||||||
as an argparse.Namespace instance, the check last value, and a consistency check prefix,
|
|
||||||
transform the checks into tuple of command-line flags for filtering archives in a check command.
|
|
||||||
|
|
||||||
If a check_last value is given and "archives" is in checks, then include a "--last" flag. And if
|
|
||||||
a prefix value is given and "archives" is in checks, then include a "--match-archives" flag.
|
|
||||||
'''
|
|
||||||
if 'archives' in checks or 'data' in checks:
|
if 'archives' in checks or 'data' in checks:
|
||||||
return (('--last', str(check_last)) if check_last else ()) + (
|
return (('--last', str(check_last)) if check_last else ()) + (
|
||||||
(
|
(
|
||||||
|
@ -196,17 +50,6 @@ def make_archive_filter_flags(
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
|
|
||||||
def make_archives_check_id(archive_filter_flags):
|
|
||||||
'''
|
|
||||||
Given a sequence of flags to filter archives, return a unique hash corresponding to those
|
|
||||||
particular flags. If there are no flags, return None.
|
|
||||||
'''
|
|
||||||
if not archive_filter_flags:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return hashlib.sha256(' '.join(archive_filter_flags).encode()).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def make_check_flags(checks, archive_filter_flags):
|
def make_check_flags(checks, archive_filter_flags):
|
||||||
'''
|
'''
|
||||||
Given a parsed sequence of checks and a sequence of flags to filter archives, transform the
|
Given a parsed sequence of checks and a sequence of flags to filter archives, transform the
|
||||||
|
@ -240,144 +83,17 @@ def make_check_flags(checks, archive_filter_flags):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def make_check_time_path(config, borg_repository_id, check_type, archives_check_id=None):
|
def get_repository_id(
|
||||||
'''
|
repository_path, config, local_borg_version, global_arguments, local_path, remote_path
|
||||||
Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
|
|
||||||
"archives", etc.), and a unique hash of the archives filter flags, return a path for recording
|
|
||||||
that check's time (the time of that check last occurring).
|
|
||||||
'''
|
|
||||||
borgmatic_source_directory = os.path.expanduser(
|
|
||||||
config.get('borgmatic_source_directory', state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY)
|
|
||||||
)
|
|
||||||
|
|
||||||
if check_type in ('archives', 'data'):
|
|
||||||
return os.path.join(
|
|
||||||
borgmatic_source_directory,
|
|
||||||
'checks',
|
|
||||||
borg_repository_id,
|
|
||||||
check_type,
|
|
||||||
archives_check_id if archives_check_id else 'all',
|
|
||||||
)
|
|
||||||
|
|
||||||
return os.path.join(
|
|
||||||
borgmatic_source_directory,
|
|
||||||
'checks',
|
|
||||||
borg_repository_id,
|
|
||||||
check_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def write_check_time(path): # pragma: no cover
|
|
||||||
'''
|
|
||||||
Record a check time of now as the modification time of the given path.
|
|
||||||
'''
|
|
||||||
logger.debug(f'Writing check time at {path}')
|
|
||||||
|
|
||||||
os.makedirs(os.path.dirname(path), mode=0o700, exist_ok=True)
|
|
||||||
pathlib.Path(path, mode=0o600).touch()
|
|
||||||
|
|
||||||
|
|
||||||
def read_check_time(path):
|
|
||||||
'''
|
|
||||||
Return the check time based on the modification time of the given path. Return None if the path
|
|
||||||
doesn't exist.
|
|
||||||
'''
|
|
||||||
logger.debug(f'Reading check time from {path}')
|
|
||||||
|
|
||||||
try:
|
|
||||||
return datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
|
|
||||||
except FileNotFoundError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def probe_for_check_time(config, borg_repository_id, check, archives_check_id):
|
|
||||||
'''
|
|
||||||
Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
|
|
||||||
"archives", etc.), and a unique hash of the archives filter flags, return a the corresponding
|
|
||||||
check time or None if such a check time does not exist.
|
|
||||||
|
|
||||||
When the check type is "archives" or "data", this function probes two different paths to find
|
|
||||||
the check time, e.g.:
|
|
||||||
|
|
||||||
~/.borgmatic/checks/1234567890/archives/9876543210
|
|
||||||
~/.borgmatic/checks/1234567890/archives/all
|
|
||||||
|
|
||||||
... and returns the maximum modification time of the files found (if any). The first path
|
|
||||||
represents a more specific archives check time (a check on a subset of archives), and the second
|
|
||||||
is a fallback to the last "all" archives check.
|
|
||||||
|
|
||||||
For other check types, this function reads from a single check time path, e.g.:
|
|
||||||
|
|
||||||
~/.borgmatic/checks/1234567890/repository
|
|
||||||
'''
|
|
||||||
check_times = (
|
|
||||||
read_check_time(group[0])
|
|
||||||
for group in itertools.groupby(
|
|
||||||
(
|
|
||||||
make_check_time_path(config, borg_repository_id, check, archives_check_id),
|
|
||||||
make_check_time_path(config, borg_repository_id, check),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
return max(check_time for check_time in check_times if check_time)
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def upgrade_check_times(config, borg_repository_id):
|
|
||||||
'''
|
|
||||||
Given a configuration dict and a Borg repository ID, upgrade any corresponding check times on
|
|
||||||
disk from old-style paths to new-style paths.
|
|
||||||
|
|
||||||
Currently, the only upgrade performed is renaming an archive or data check path that looks like:
|
|
||||||
|
|
||||||
~/.borgmatic/checks/1234567890/archives
|
|
||||||
|
|
||||||
to:
|
|
||||||
|
|
||||||
~/.borgmatic/checks/1234567890/archives/all
|
|
||||||
'''
|
|
||||||
for check_type in ('archives', 'data'):
|
|
||||||
new_path = make_check_time_path(config, borg_repository_id, check_type, 'all')
|
|
||||||
old_path = os.path.dirname(new_path)
|
|
||||||
temporary_path = f'{old_path}.temp'
|
|
||||||
|
|
||||||
if not os.path.isfile(old_path) and not os.path.isfile(temporary_path):
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.debug(f'Upgrading archives check time from {old_path} to {new_path}')
|
|
||||||
|
|
||||||
try:
|
|
||||||
os.rename(old_path, temporary_path)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
os.mkdir(old_path)
|
|
||||||
os.rename(temporary_path, new_path)
|
|
||||||
|
|
||||||
|
|
||||||
def check_archives(
|
|
||||||
repository_path,
|
|
||||||
config,
|
|
||||||
local_borg_version,
|
|
||||||
check_arguments,
|
|
||||||
global_arguments,
|
|
||||||
local_path='borg',
|
|
||||||
remote_path=None,
|
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Given a local or remote repository path, a configuration dict, the local Borg version, check
|
Given a local or remote repository path, a configuration dict, the local Borg version, global
|
||||||
arguments as an argparse.Namespace instance, global arguments, and local/remote commands to run,
|
arguments, and local/remote commands to run, return the corresponding Borg repository ID.
|
||||||
check the contained Borg archives for consistency.
|
|
||||||
|
|
||||||
If there are no consistency checks to run, skip running them.
|
Raise ValueError if the Borg repository ID cannot be determined.
|
||||||
|
|
||||||
Raises ValueError if the Borg repository ID cannot be determined.
|
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
borg_repository_id = json.loads(
|
return json.loads(
|
||||||
rinfo.display_repository_info(
|
rinfo.display_repository_info(
|
||||||
repository_path,
|
repository_path,
|
||||||
config,
|
config,
|
||||||
|
@ -391,82 +107,63 @@ def check_archives(
|
||||||
except (json.JSONDecodeError, KeyError):
|
except (json.JSONDecodeError, KeyError):
|
||||||
raise ValueError(f'Cannot determine Borg repository ID for {repository_path}')
|
raise ValueError(f'Cannot determine Borg repository ID for {repository_path}')
|
||||||
|
|
||||||
upgrade_check_times(config, borg_repository_id)
|
|
||||||
|
|
||||||
check_last = config.get('check_last', None)
|
def check_archives(
|
||||||
prefix = config.get('prefix')
|
repository_path,
|
||||||
configured_checks = parse_checks(config, check_arguments.only_checks)
|
config,
|
||||||
lock_wait = None
|
local_borg_version,
|
||||||
|
check_arguments,
|
||||||
|
global_arguments,
|
||||||
|
checks,
|
||||||
|
archive_filter_flags,
|
||||||
|
local_path='borg',
|
||||||
|
remote_path=None,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given a local or remote repository path, a configuration dict, the local Borg version, check
|
||||||
|
arguments as an argparse.Namespace instance, global arguments, a set of named Borg checks to run
|
||||||
|
(some combination "repository", "archives", and/or "data"), archive filter flags, and
|
||||||
|
local/remote commands to run, check the contained Borg archives for consistency.
|
||||||
|
'''
|
||||||
|
lock_wait = config.get('lock_wait')
|
||||||
extra_borg_options = config.get('extra_borg_options', {}).get('check', '')
|
extra_borg_options = config.get('extra_borg_options', {}).get('check', '')
|
||||||
archive_filter_flags = make_archive_filter_flags(
|
|
||||||
local_borg_version, config, configured_checks, check_arguments, check_last, prefix
|
|
||||||
)
|
|
||||||
archives_check_id = make_archives_check_id(archive_filter_flags)
|
|
||||||
|
|
||||||
checks = filter_checks_on_frequency(
|
verbosity_flags = ()
|
||||||
config,
|
if logger.isEnabledFor(logging.INFO):
|
||||||
borg_repository_id,
|
verbosity_flags = ('--info',)
|
||||||
configured_checks,
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
check_arguments.force,
|
verbosity_flags = ('--debug', '--show-rc')
|
||||||
archives_check_id,
|
|
||||||
|
full_command = (
|
||||||
|
(local_path, 'check')
|
||||||
|
+ (('--repair',) if check_arguments.repair else ())
|
||||||
|
+ make_check_flags(checks, archive_filter_flags)
|
||||||
|
+ (('--remote-path', remote_path) if remote_path else ())
|
||||||
|
+ (('--log-json',) if global_arguments.log_json else ())
|
||||||
|
+ (('--lock-wait', str(lock_wait)) if lock_wait else ())
|
||||||
|
+ verbosity_flags
|
||||||
|
+ (('--progress',) if check_arguments.progress else ())
|
||||||
|
+ (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
|
||||||
|
+ flags.make_repository_flags(repository_path, local_borg_version)
|
||||||
)
|
)
|
||||||
|
|
||||||
if set(checks).intersection({'repository', 'archives', 'data'}):
|
borg_environment = environment.make_environment(config)
|
||||||
lock_wait = config.get('lock_wait')
|
borg_exit_codes = config.get('borg_exit_codes')
|
||||||
|
|
||||||
verbosity_flags = ()
|
# The Borg repair option triggers an interactive prompt, which won't work when output is
|
||||||
if logger.isEnabledFor(logging.INFO):
|
# captured. And progress messes with the terminal directly.
|
||||||
verbosity_flags = ('--info',)
|
if check_arguments.repair or check_arguments.progress:
|
||||||
if logger.isEnabledFor(logging.DEBUG):
|
execute_command(
|
||||||
verbosity_flags = ('--debug', '--show-rc')
|
full_command,
|
||||||
|
output_file=DO_NOT_CAPTURE,
|
||||||
full_command = (
|
extra_environment=borg_environment,
|
||||||
(local_path, 'check')
|
borg_local_path=local_path,
|
||||||
+ (('--repair',) if check_arguments.repair else ())
|
borg_exit_codes=borg_exit_codes,
|
||||||
+ make_check_flags(checks, archive_filter_flags)
|
|
||||||
+ (('--remote-path', remote_path) if remote_path else ())
|
|
||||||
+ (('--log-json',) if global_arguments.log_json else ())
|
|
||||||
+ (('--lock-wait', str(lock_wait)) if lock_wait else ())
|
|
||||||
+ verbosity_flags
|
|
||||||
+ (('--progress',) if check_arguments.progress else ())
|
|
||||||
+ (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
|
|
||||||
+ flags.make_repository_flags(repository_path, local_borg_version)
|
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
borg_environment = environment.make_environment(config)
|
execute_command(
|
||||||
borg_exit_codes = config.get('borg_exit_codes')
|
full_command,
|
||||||
|
extra_environment=borg_environment,
|
||||||
# The Borg repair option triggers an interactive prompt, which won't work when output is
|
borg_local_path=local_path,
|
||||||
# captured. And progress messes with the terminal directly.
|
borg_exit_codes=borg_exit_codes,
|
||||||
if check_arguments.repair or check_arguments.progress:
|
|
||||||
execute_command(
|
|
||||||
full_command,
|
|
||||||
output_file=DO_NOT_CAPTURE,
|
|
||||||
extra_environment=borg_environment,
|
|
||||||
borg_local_path=local_path,
|
|
||||||
borg_exit_codes=borg_exit_codes,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
execute_command(
|
|
||||||
full_command,
|
|
||||||
extra_environment=borg_environment,
|
|
||||||
borg_local_path=local_path,
|
|
||||||
borg_exit_codes=borg_exit_codes,
|
|
||||||
)
|
|
||||||
|
|
||||||
for check in checks:
|
|
||||||
write_check_time(
|
|
||||||
make_check_time_path(config, borg_repository_id, check, archives_check_id)
|
|
||||||
)
|
|
||||||
|
|
||||||
if 'extract' in checks:
|
|
||||||
extract.extract_last_archive_dry_run(
|
|
||||||
config,
|
|
||||||
local_borg_version,
|
|
||||||
global_arguments,
|
|
||||||
repository_path,
|
|
||||||
lock_wait,
|
|
||||||
local_path,
|
|
||||||
remote_path,
|
|
||||||
)
|
)
|
||||||
write_check_time(make_check_time_path(config, borg_repository_id, 'extract'))
|
|
||||||
|
|
|
@ -275,11 +275,11 @@ def collect_special_file_paths(
|
||||||
create_command, config, local_path, working_directory, borg_environment, skip_directories
|
create_command, config, local_path, working_directory, borg_environment, skip_directories
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Given a Borg create command as a tuple, a local Borg path, a working directory, a dict of
|
Given a Borg create command as a tuple, a configuration dict, a local Borg path, a working
|
||||||
environment variables to pass to Borg, and a sequence of parent directories to skip, collect the
|
directory, a dict of environment variables to pass to Borg, and a sequence of parent directories
|
||||||
paths for any special files (character devices, block devices, and named pipes / FIFOs) that
|
to skip, collect the paths for any special files (character devices, block devices, and named
|
||||||
Borg would encounter during a create. These are all paths that could cause Borg to hang if its
|
pipes / FIFOs) that Borg would encounter during a create. These are all paths that could cause
|
||||||
--read-special flag is used.
|
Borg to hang if its --read-special flag is used.
|
||||||
'''
|
'''
|
||||||
# Omit "--exclude-nodump" from the Borg dry run command, because that flag causes Borg to open
|
# Omit "--exclude-nodump" from the Borg dry run command, because that flag causes Borg to open
|
||||||
# files including any named pipe we've created.
|
# files including any named pipe we've created.
|
||||||
|
@ -320,35 +320,31 @@ def check_all_source_directories_exist(source_directories):
|
||||||
raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
|
raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
|
||||||
|
|
||||||
|
|
||||||
def create_archive(
|
def make_base_create_command(
|
||||||
dry_run,
|
dry_run,
|
||||||
repository_path,
|
repository_path,
|
||||||
config,
|
config,
|
||||||
config_paths,
|
config_paths,
|
||||||
local_borg_version,
|
local_borg_version,
|
||||||
global_arguments,
|
global_arguments,
|
||||||
|
borgmatic_source_directories,
|
||||||
local_path='borg',
|
local_path='borg',
|
||||||
remote_path=None,
|
remote_path=None,
|
||||||
progress=False,
|
progress=False,
|
||||||
stats=False,
|
|
||||||
json=False,
|
json=False,
|
||||||
list_files=False,
|
list_files=False,
|
||||||
stream_processes=None,
|
stream_processes=None,
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
|
Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
|
||||||
sequence of loaded configuration paths, the local Borg version, and global arguments as an
|
sequence of loaded configuration paths, the local Borg version, global arguments as an
|
||||||
argparse.Namespace instance, create a Borg archive and return Borg's JSON output (if any).
|
argparse.Namespace instance, and a sequence of borgmatic source directories, return a tuple of
|
||||||
|
(base Borg create command flags, Borg create command positional arguments, open pattern file
|
||||||
If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
|
handle, open exclude file handle).
|
||||||
create command while also triggering the given processes to produce output.
|
|
||||||
'''
|
'''
|
||||||
borgmatic.logger.add_custom_log_levels()
|
|
||||||
borgmatic_source_directories = expand_directories(
|
|
||||||
collect_borgmatic_source_directories(config.get('borgmatic_source_directory'))
|
|
||||||
)
|
|
||||||
if config.get('source_directories_must_exist', False):
|
if config.get('source_directories_must_exist', False):
|
||||||
check_all_source_directories_exist(config.get('source_directories'))
|
check_all_source_directories_exist(config.get('source_directories'))
|
||||||
|
|
||||||
sources = deduplicate_directories(
|
sources = deduplicate_directories(
|
||||||
map_directories_to_devices(
|
map_directories_to_devices(
|
||||||
expand_directories(
|
expand_directories(
|
||||||
|
@ -364,11 +360,6 @@ def create_archive(
|
||||||
|
|
||||||
ensure_files_readable(config.get('patterns_from'), config.get('exclude_from'))
|
ensure_files_readable(config.get('patterns_from'), config.get('exclude_from'))
|
||||||
|
|
||||||
try:
|
|
||||||
working_directory = os.path.expanduser(config.get('working_directory'))
|
|
||||||
except TypeError:
|
|
||||||
working_directory = None
|
|
||||||
|
|
||||||
pattern_file = (
|
pattern_file = (
|
||||||
write_pattern_file(config.get('patterns'), sources)
|
write_pattern_file(config.get('patterns'), sources)
|
||||||
if config.get('patterns') or config.get('patterns_from')
|
if config.get('patterns') or config.get('patterns_from')
|
||||||
|
@ -411,11 +402,6 @@ def create_archive(
|
||||||
('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
|
('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
|
||||||
)
|
)
|
||||||
|
|
||||||
if stream_processes and config.get('read_special') is False:
|
|
||||||
logger.warning(
|
|
||||||
f'{repository_path}: Ignoring configured "read_special" value of false, as true is needed for database hooks.'
|
|
||||||
)
|
|
||||||
|
|
||||||
create_flags = (
|
create_flags = (
|
||||||
tuple(local_path.split(' '))
|
tuple(local_path.split(' '))
|
||||||
+ ('create',)
|
+ ('create',)
|
||||||
|
@ -451,22 +437,19 @@ def create_archive(
|
||||||
repository_path, archive_name_format, local_borg_version
|
repository_path, archive_name_format, local_borg_version
|
||||||
) + (sources if not pattern_file else ())
|
) + (sources if not pattern_file else ())
|
||||||
|
|
||||||
if json:
|
|
||||||
output_log_level = None
|
|
||||||
elif list_files or (stats and not dry_run):
|
|
||||||
output_log_level = logging.ANSWER
|
|
||||||
else:
|
|
||||||
output_log_level = logging.INFO
|
|
||||||
|
|
||||||
# The progress output isn't compatible with captured and logged output, as progress messes with
|
|
||||||
# the terminal directly.
|
|
||||||
output_file = DO_NOT_CAPTURE if progress else None
|
|
||||||
|
|
||||||
borg_environment = environment.make_environment(config)
|
|
||||||
|
|
||||||
# If database hooks are enabled (as indicated by streaming processes), exclude files that might
|
# If database hooks are enabled (as indicated by streaming processes), exclude files that might
|
||||||
# cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
|
# cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
|
||||||
if stream_processes and not config.get('read_special'):
|
if stream_processes and not config.get('read_special'):
|
||||||
|
logger.warning(
|
||||||
|
f'{repository_path}: Ignoring configured "read_special" value of false, as true is needed for database hooks.'
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
working_directory = os.path.expanduser(config.get('working_directory'))
|
||||||
|
except TypeError:
|
||||||
|
working_directory = None
|
||||||
|
|
||||||
|
borg_environment = environment.make_environment(config)
|
||||||
|
|
||||||
logger.debug(f'{repository_path}: Collecting special file paths')
|
logger.debug(f'{repository_path}: Collecting special file paths')
|
||||||
special_file_paths = collect_special_file_paths(
|
special_file_paths = collect_special_file_paths(
|
||||||
create_flags + create_positional_arguments,
|
create_flags + create_positional_arguments,
|
||||||
|
@ -489,6 +472,73 @@ def create_archive(
|
||||||
)
|
)
|
||||||
create_flags += make_exclude_flags(config, exclude_file.name)
|
create_flags += make_exclude_flags(config, exclude_file.name)
|
||||||
|
|
||||||
|
return (create_flags, create_positional_arguments, pattern_file, exclude_file)
|
||||||
|
|
||||||
|
|
||||||
|
def create_archive(
|
||||||
|
dry_run,
|
||||||
|
repository_path,
|
||||||
|
config,
|
||||||
|
config_paths,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
local_path='borg',
|
||||||
|
remote_path=None,
|
||||||
|
progress=False,
|
||||||
|
stats=False,
|
||||||
|
json=False,
|
||||||
|
list_files=False,
|
||||||
|
stream_processes=None,
|
||||||
|
):
|
||||||
|
'''
|
||||||
|
Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
|
||||||
|
sequence of loaded configuration paths, the local Borg version, and global arguments as an
|
||||||
|
argparse.Namespace instance, create a Borg archive and return Borg's JSON output (if any).
|
||||||
|
|
||||||
|
If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
|
||||||
|
create command while also triggering the given processes to produce output.
|
||||||
|
'''
|
||||||
|
borgmatic.logger.add_custom_log_levels()
|
||||||
|
borgmatic_source_directories = expand_directories(
|
||||||
|
collect_borgmatic_source_directories(config.get('borgmatic_source_directory'))
|
||||||
|
)
|
||||||
|
|
||||||
|
(create_flags, create_positional_arguments, pattern_file, exclude_file) = (
|
||||||
|
make_base_create_command(
|
||||||
|
dry_run,
|
||||||
|
repository_path,
|
||||||
|
config,
|
||||||
|
config_paths,
|
||||||
|
local_borg_version,
|
||||||
|
global_arguments,
|
||||||
|
borgmatic_source_directories,
|
||||||
|
local_path,
|
||||||
|
remote_path,
|
||||||
|
progress,
|
||||||
|
json,
|
||||||
|
list_files,
|
||||||
|
stream_processes,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if json:
|
||||||
|
output_log_level = None
|
||||||
|
elif list_files or (stats and not dry_run):
|
||||||
|
output_log_level = logging.ANSWER
|
||||||
|
else:
|
||||||
|
output_log_level = logging.INFO
|
||||||
|
|
||||||
|
# The progress output isn't compatible with captured and logged output, as progress messes with
|
||||||
|
# the terminal directly.
|
||||||
|
output_file = DO_NOT_CAPTURE if progress else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
working_directory = os.path.expanduser(config.get('working_directory'))
|
||||||
|
except TypeError:
|
||||||
|
working_directory = None
|
||||||
|
|
||||||
|
borg_environment = environment.make_environment(config)
|
||||||
|
|
||||||
create_flags += (
|
create_flags += (
|
||||||
(('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
|
(('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
|
||||||
+ (('--stats',) if stats and not json and not dry_run else ())
|
+ (('--stats',) if stats and not json and not dry_run else ())
|
||||||
|
|
|
@ -95,14 +95,15 @@ def capture_archive_listing(
|
||||||
local_borg_version,
|
local_borg_version,
|
||||||
global_arguments,
|
global_arguments,
|
||||||
list_paths=None,
|
list_paths=None,
|
||||||
|
path_format=None,
|
||||||
local_path='borg',
|
local_path='borg',
|
||||||
remote_path=None,
|
remote_path=None,
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Given a local or remote repository path, an archive name, a configuration dict, the local Borg
|
Given a local or remote repository path, an archive name, a configuration dict, the local Borg
|
||||||
version, global arguments as an argparse.Namespace, the archive paths in which to list files, and
|
version, global arguments as an argparse.Namespace, the archive paths in which to list files,
|
||||||
local and remote Borg paths, capture the output of listing that archive and return it as a list
|
the Borg path format to use for the output, and local and remote Borg paths, capture the output
|
||||||
of file paths.
|
of listing that archive and return it as a list of file paths.
|
||||||
'''
|
'''
|
||||||
borg_environment = environment.make_environment(config)
|
borg_environment = environment.make_environment(config)
|
||||||
|
|
||||||
|
@ -118,7 +119,7 @@ def capture_archive_listing(
|
||||||
paths=[f'sh:{path}' for path in list_paths] if list_paths else None,
|
paths=[f'sh:{path}' for path in list_paths] if list_paths else None,
|
||||||
find_paths=None,
|
find_paths=None,
|
||||||
json=None,
|
json=None,
|
||||||
format='{path}{NL}', # noqa: FS003
|
format=path_format or '{path}{NL}', # noqa: FS003
|
||||||
),
|
),
|
||||||
global_arguments,
|
global_arguments,
|
||||||
local_path,
|
local_path,
|
||||||
|
|
|
@ -614,10 +614,10 @@ def make_parsers():
|
||||||
check_group.add_argument(
|
check_group.add_argument(
|
||||||
'--only',
|
'--only',
|
||||||
metavar='CHECK',
|
metavar='CHECK',
|
||||||
choices=('repository', 'archives', 'data', 'extract'),
|
choices=('repository', 'archives', 'data', 'extract', 'spot'),
|
||||||
dest='only_checks',
|
dest='only_checks',
|
||||||
action='append',
|
action='append',
|
||||||
help='Run a particular consistency check (repository, archives, data, or extract) instead of configured checks (subject to configured frequency, can specify flag multiple times)',
|
help='Run a particular consistency check (repository, archives, data, extract, or spot) instead of configured checks (subject to configured frequency, can specify flag multiple times)',
|
||||||
)
|
)
|
||||||
check_group.add_argument(
|
check_group.add_argument(
|
||||||
'--force',
|
'--force',
|
||||||
|
|
|
@ -21,6 +21,19 @@ def insert_newline_before_comment(config, field_name):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_properties(schema):
|
||||||
|
'''
|
||||||
|
Given a schema dict, return its properties. But if it's got sub-schemas with multiple different
|
||||||
|
potential properties, returned their merged properties instead.
|
||||||
|
'''
|
||||||
|
if 'oneOf' in schema:
|
||||||
|
return dict(
|
||||||
|
collections.ChainMap(*[sub_schema['properties'] for sub_schema in schema['oneOf']])
|
||||||
|
)
|
||||||
|
|
||||||
|
return schema['properties']
|
||||||
|
|
||||||
|
|
||||||
def schema_to_sample_configuration(schema, level=0, parent_is_sequence=False):
|
def schema_to_sample_configuration(schema, level=0, parent_is_sequence=False):
|
||||||
'''
|
'''
|
||||||
Given a loaded configuration schema, generate and return sample config for it. Include comments
|
Given a loaded configuration schema, generate and return sample config for it. Include comments
|
||||||
|
@ -40,7 +53,7 @@ def schema_to_sample_configuration(schema, level=0, parent_is_sequence=False):
|
||||||
config = ruamel.yaml.comments.CommentedMap(
|
config = ruamel.yaml.comments.CommentedMap(
|
||||||
[
|
[
|
||||||
(field_name, schema_to_sample_configuration(sub_schema, level + 1))
|
(field_name, schema_to_sample_configuration(sub_schema, level + 1))
|
||||||
for field_name, sub_schema in schema['properties'].items()
|
for field_name, sub_schema in get_properties(schema).items()
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
indent = (level * INDENT) + (SEQUENCE_INDENT if parent_is_sequence else 0)
|
indent = (level * INDENT) + (SEQUENCE_INDENT if parent_is_sequence else 0)
|
||||||
|
@ -151,7 +164,7 @@ def add_comments_to_configuration_sequence(config, schema, indent=0):
|
||||||
return
|
return
|
||||||
|
|
||||||
for field_name in config[0].keys():
|
for field_name in config[0].keys():
|
||||||
field_schema = schema['items']['properties'].get(field_name, {})
|
field_schema = get_properties(schema['items']).get(field_name, {})
|
||||||
description = field_schema.get('description')
|
description = field_schema.get('description')
|
||||||
|
|
||||||
# No description to use? Skip it.
|
# No description to use? Skip it.
|
||||||
|
@ -178,7 +191,7 @@ def add_comments_to_configuration_object(config, schema, indent=0, skip_first=Fa
|
||||||
if skip_first and index == 0:
|
if skip_first and index == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
field_schema = schema['properties'].get(field_name, {})
|
field_schema = get_properties(schema).get(field_name, {})
|
||||||
description = field_schema.get('description', '').strip()
|
description = field_schema.get('description', '').strip()
|
||||||
|
|
||||||
# If this is an optional key, add an indicator to the comment flagging it to be commented
|
# If this is an optional key, add an indicator to the comment flagging it to be commented
|
||||||
|
|
|
@ -503,37 +503,120 @@ properties:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: object
|
type: object
|
||||||
required: ['name']
|
oneOf:
|
||||||
additionalProperties: false
|
- required: [name]
|
||||||
properties:
|
additionalProperties: false
|
||||||
name:
|
properties:
|
||||||
type: string
|
name:
|
||||||
enum:
|
type: string
|
||||||
- repository
|
enum:
|
||||||
- archives
|
- repository
|
||||||
- data
|
- archives
|
||||||
- extract
|
- data
|
||||||
- disabled
|
- extract
|
||||||
description: |
|
- disabled
|
||||||
Name of consistency check to run: "repository",
|
description: |
|
||||||
"archives", "data", and/or "extract". "repository"
|
Name of consistency check to run: "repository",
|
||||||
checks the consistency of the repository, "archives"
|
"archives", "data", "spot", and/or "extract".
|
||||||
checks all of the archives, "data" verifies the
|
"repository" checks the consistency of the
|
||||||
integrity of the data within the archives, and "extract"
|
repository, "archives" checks all of the
|
||||||
does an extraction dry-run of the most recent archive.
|
archives, "data" verifies the integrity of the
|
||||||
Note that "data" implies "archives". See "skip_actions"
|
data within the archives, "spot" checks that
|
||||||
for disabling checks altogether.
|
some percentage of source files are found in the
|
||||||
example: repository
|
most recent archive (with identical contents),
|
||||||
frequency:
|
and "extract" does an extraction dry-run of the
|
||||||
type: string
|
most recent archive. Note that "data" implies
|
||||||
description: |
|
"archives". See "skip_actions" for disabling
|
||||||
How frequently to run this type of consistency check (as
|
checks altogether.
|
||||||
a best effort). The value is a number followed by a unit
|
example: spot
|
||||||
of time. E.g., "2 weeks" to run this consistency check
|
frequency:
|
||||||
no more than every two weeks for a given repository or
|
type: string
|
||||||
"1 month" to run it no more than monthly. Defaults to
|
description: |
|
||||||
"always": running this check every time checks are run.
|
How frequently to run this type of consistency
|
||||||
example: 2 weeks
|
check (as a best effort). The value is a number
|
||||||
|
followed by a unit of time. E.g., "2 weeks" to
|
||||||
|
run this consistency check no more than every
|
||||||
|
two weeks for a given repository or "1 month" to
|
||||||
|
run it no more than monthly. Defaults to
|
||||||
|
"always": running this check every time checks
|
||||||
|
are run.
|
||||||
|
example: 2 weeks
|
||||||
|
- required:
|
||||||
|
- name
|
||||||
|
- count_tolerance_percentage
|
||||||
|
- data_sample_percentage
|
||||||
|
- data_tolerance_percentage
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- spot
|
||||||
|
description: |
|
||||||
|
Name of consistency check to run: "repository",
|
||||||
|
"archives", "data", "spot", and/or "extract".
|
||||||
|
"repository" checks the consistency of the
|
||||||
|
repository, "archives" checks all of the
|
||||||
|
archives, "data" verifies the integrity of the
|
||||||
|
data within the archives, "spot" checks that
|
||||||
|
some percentage of source files are found in the
|
||||||
|
most recent archive (with identical contents),
|
||||||
|
and "extract" does an extraction dry-run of the
|
||||||
|
most recent archive. Note that "data" implies
|
||||||
|
"archives". See "skip_actions" for disabling
|
||||||
|
checks altogether.
|
||||||
|
example: repository
|
||||||
|
frequency:
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
How frequently to run this type of consistency
|
||||||
|
check (as a best effort). The value is a number
|
||||||
|
followed by a unit of time. E.g., "2 weeks" to
|
||||||
|
run this consistency check no more than every
|
||||||
|
two weeks for a given repository or "1 month" to
|
||||||
|
run it no more than monthly. Defaults to
|
||||||
|
"always": running this check every time checks
|
||||||
|
are run.
|
||||||
|
example: 2 weeks
|
||||||
|
count_tolerance_percentage:
|
||||||
|
type: number
|
||||||
|
description: |
|
||||||
|
The percentage delta between the source
|
||||||
|
directories file count and the most recent backup
|
||||||
|
archive file count that is allowed before the
|
||||||
|
entire consistency check fails. This can catch
|
||||||
|
problems like incorrect excludes, inadvertent
|
||||||
|
deletes, etc. Only applies to the "spot" check.
|
||||||
|
example: 10
|
||||||
|
data_sample_percentage:
|
||||||
|
type: number
|
||||||
|
description: |
|
||||||
|
The percentage of total files in the source
|
||||||
|
directories to randomly sample and compare to
|
||||||
|
their corresponding files in the most recent
|
||||||
|
backup archive. Only applies to the "spot" check.
|
||||||
|
example: 1
|
||||||
|
data_tolerance_percentage:
|
||||||
|
type: number
|
||||||
|
description: |
|
||||||
|
The percentage of total files in the source
|
||||||
|
directories that can fail a spot check comparison
|
||||||
|
without failing the entire consistency check. This
|
||||||
|
can catch problems like source files that have
|
||||||
|
been bulk-changed by malware, backups that have
|
||||||
|
been tampered with, etc. The value must be lower
|
||||||
|
than or equal to the "contents_sample_percentage".
|
||||||
|
Only applies to the "spot" check.
|
||||||
|
example: 0.5
|
||||||
|
xxh64sum_command:
|
||||||
|
type: string
|
||||||
|
description: |
|
||||||
|
Command to use instead of "xxh64sum" to hash
|
||||||
|
source files, usually found in an OS package named
|
||||||
|
"xxhash". Do not substitute with a different hash
|
||||||
|
type (SHA, MD5, etc.) or the check will never
|
||||||
|
succeed. Only applies to the "spot" check.
|
||||||
|
example: /usr/local/bin/xxh64sum
|
||||||
description: |
|
description: |
|
||||||
List of one or more consistency checks to run on a periodic basis
|
List of one or more consistency checks to run on a periodic basis
|
||||||
(if "frequency" is set) or every time borgmatic runs checks (if
|
(if "frequency" is set) or every time borgmatic runs checks (if
|
||||||
|
|
|
@ -4,6 +4,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import select
|
import select
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import textwrap
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -219,13 +220,22 @@ def log_outputs(processes, exclude_stdouts, output_log_level, borg_local_path, b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MAX_LOGGED_COMMAND_LENGTH = 1000
|
||||||
|
|
||||||
|
|
||||||
def log_command(full_command, input_file=None, output_file=None, environment=None):
|
def log_command(full_command, input_file=None, output_file=None, environment=None):
|
||||||
'''
|
'''
|
||||||
Log the given command (a sequence of command/argument strings), along with its input/output file
|
Log the given command (a sequence of command/argument strings), along with its input/output file
|
||||||
paths and extra environment variables (with omitted values in case they contain passwords).
|
paths and extra environment variables (with omitted values in case they contain passwords).
|
||||||
'''
|
'''
|
||||||
logger.debug(
|
logger.debug(
|
||||||
' '.join(tuple(f'{key}=***' for key in (environment or {}).keys()) + tuple(full_command))
|
textwrap.shorten(
|
||||||
|
' '.join(
|
||||||
|
tuple(f'{key}=***' for key in (environment or {}).keys()) + tuple(full_command)
|
||||||
|
),
|
||||||
|
width=MAX_LOGGED_COMMAND_LENGTH,
|
||||||
|
placeholder=' ...',
|
||||||
|
)
|
||||||
+ (f" < {getattr(input_file, 'name', '')}" if input_file else '')
|
+ (f" < {getattr(input_file, 'name', '')}" if input_file else '')
|
||||||
+ (f" > {getattr(output_file, 'name', '')}" if output_file else '')
|
+ (f" > {getattr(output_file, 'name', '')}" if output_file else '')
|
||||||
)
|
)
|
||||||
|
|
|
@ -115,6 +115,14 @@ def execute_dump_command(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def use_streaming(databases, config, log_prefix):
|
||||||
|
'''
|
||||||
|
Given a sequence of MariaDB database configuration dicts, a configuration dict (ignored), and a
|
||||||
|
log prefix (ignored), return whether streaming will be using during dumps.
|
||||||
|
'''
|
||||||
|
return any(databases)
|
||||||
|
|
||||||
|
|
||||||
def dump_data_sources(databases, config, log_prefix, dry_run):
|
def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
'''
|
'''
|
||||||
Dump the given MariaDB databases to a named pipe. The databases are supplied as a sequence of
|
Dump the given MariaDB databases to a named pipe. The databases are supplied as a sequence of
|
||||||
|
|
|
@ -16,6 +16,14 @@ def make_dump_path(config): # pragma: no cover
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def use_streaming(databases, config, log_prefix):
|
||||||
|
'''
|
||||||
|
Given a sequence of MongoDB database configuration dicts, a configuration dict (ignored), and a
|
||||||
|
log prefix (ignored), return whether streaming will be using during dumps.
|
||||||
|
'''
|
||||||
|
return any(database.get('format') != 'directory' for database in databases)
|
||||||
|
|
||||||
|
|
||||||
def dump_data_sources(databases, config, log_prefix, dry_run):
|
def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
'''
|
'''
|
||||||
Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of
|
Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of
|
||||||
|
|
|
@ -114,6 +114,14 @@ def execute_dump_command(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def use_streaming(databases, config, log_prefix):
|
||||||
|
'''
|
||||||
|
Given a sequence of MySQL database configuration dicts, a configuration dict (ignored), and a
|
||||||
|
log prefix (ignored), return whether streaming will be using during dumps.
|
||||||
|
'''
|
||||||
|
return any(databases)
|
||||||
|
|
||||||
|
|
||||||
def dump_data_sources(databases, config, log_prefix, dry_run):
|
def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
'''
|
'''
|
||||||
Dump the given MySQL/MariaDB databases to a named pipe. The databases are supplied as a sequence
|
Dump the given MySQL/MariaDB databases to a named pipe. The databases are supplied as a sequence
|
||||||
|
|
|
@ -96,6 +96,14 @@ def database_names_to_dump(database, extra_environment, log_prefix, dry_run):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def use_streaming(databases, config, log_prefix):
|
||||||
|
'''
|
||||||
|
Given a sequence of PostgreSQL database configuration dicts, a configuration dict (ignored), and
|
||||||
|
a log prefix (ignored), return whether streaming will be using during dumps.
|
||||||
|
'''
|
||||||
|
return any(database.get('format') != 'directory' for database in databases)
|
||||||
|
|
||||||
|
|
||||||
def dump_data_sources(databases, config, log_prefix, dry_run):
|
def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
'''
|
'''
|
||||||
Dump the given PostgreSQL databases to a named pipe. The databases are supplied as a sequence of
|
Dump the given PostgreSQL databases to a named pipe. The databases are supplied as a sequence of
|
||||||
|
|
|
@ -17,9 +17,17 @@ def make_dump_path(config): # pragma: no cover
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def use_streaming(databases, config, log_prefix):
|
||||||
|
'''
|
||||||
|
Given a sequence of SQLite database configuration dicts, a configuration dict (ignored), and a
|
||||||
|
log prefix (ignored), return whether streaming will be using during dumps.
|
||||||
|
'''
|
||||||
|
return any(databases)
|
||||||
|
|
||||||
|
|
||||||
def dump_data_sources(databases, config, log_prefix, dry_run):
|
def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
'''
|
'''
|
||||||
Dump the given SQLite3 databases to a named pipe. The databases are supplied as a sequence of
|
Dump the given SQLite databases to a named pipe. The databases are supplied as a sequence of
|
||||||
configuration dicts, as per the configuration schema. Use the given configuration dict to
|
configuration dicts, as per the configuration schema. Use the given configuration dict to
|
||||||
construct the destination path and the given log prefix in any log entries.
|
construct the destination path and the given log prefix in any log entries.
|
||||||
|
|
||||||
|
@ -71,7 +79,7 @@ def dump_data_sources(databases, config, log_prefix, dry_run):
|
||||||
|
|
||||||
def remove_data_source_dumps(databases, config, log_prefix, dry_run): # pragma: no cover
|
def remove_data_source_dumps(databases, config, log_prefix, dry_run): # pragma: no cover
|
||||||
'''
|
'''
|
||||||
Remove the given SQLite3 database dumps from the filesystem. The databases are supplied as a
|
Remove the given SQLite database dumps from the filesystem. The databases are supplied as a
|
||||||
sequence of configuration dicts, as per the configuration schema. Use the given configuration
|
sequence of configuration dicts, as per the configuration schema. Use the given configuration
|
||||||
dict to construct the destination path and the given log prefix in any log entries. If this is a
|
dict to construct the destination path and the given log prefix in any log entries. If this is a
|
||||||
dry run, then don't actually remove anything.
|
dry run, then don't actually remove anything.
|
||||||
|
@ -81,8 +89,8 @@ def remove_data_source_dumps(databases, config, log_prefix, dry_run): # pragma:
|
||||||
|
|
||||||
def make_data_source_dump_pattern(databases, config, log_prefix, name=None): # pragma: no cover
|
def make_data_source_dump_pattern(databases, config, log_prefix, name=None): # pragma: no cover
|
||||||
'''
|
'''
|
||||||
Make a pattern that matches the given SQLite3 databases. The databases are supplied as a
|
Make a pattern that matches the given SQLite databases. The databases are supplied as a sequence
|
||||||
sequence of configuration dicts, as per the configuration schema.
|
of configuration dicts, as per the configuration schema.
|
||||||
'''
|
'''
|
||||||
return dump.make_data_source_dump_filename(make_dump_path(config), name)
|
return dump.make_data_source_dump_filename(make_dump_path(config), name)
|
||||||
|
|
||||||
|
|
|
@ -91,8 +91,9 @@ Here are the available checks from fastest to slowest:
|
||||||
|
|
||||||
* `repository`: Checks the consistency of the repository itself.
|
* `repository`: Checks the consistency of the repository itself.
|
||||||
* `archives`: Checks all of the archives in the repository.
|
* `archives`: Checks all of the archives in the repository.
|
||||||
* `extract`: Performs an extraction dry-run of the most recent archive.
|
* `extract`: Performs an extraction dry-run of the latest archive.
|
||||||
* `data`: Verifies the data integrity of all archives contents, decrypting and decompressing all data.
|
* `data`: Verifies the data integrity of all archives contents, decrypting and decompressing all data.
|
||||||
|
* `spot`: Compares file counts and contents between your source files and the latest archive.
|
||||||
|
|
||||||
Note that the `data` check is a more thorough version of the `archives` check,
|
Note that the `data` check is a more thorough version of the `archives` check,
|
||||||
so enabling the `data` check implicitly enables the `archives` check as well.
|
so enabling the `data` check implicitly enables the `archives` check as well.
|
||||||
|
@ -102,6 +103,84 @@ documentation](https://borgbackup.readthedocs.io/en/stable/usage/check.html)
|
||||||
for more information.
|
for more information.
|
||||||
|
|
||||||
|
|
||||||
|
### Spot check
|
||||||
|
|
||||||
|
The various consistency checks all have trade-offs around speed and
|
||||||
|
thoroughness, but most of them don't even look at your original source
|
||||||
|
files—arguably one important way to ensure your backups contain the files
|
||||||
|
you'll want to restore in the case of catastrophe (or just an accidentally
|
||||||
|
deleted file). Because if something goes wrong with your source files, most
|
||||||
|
consistency checks will still pass with flying colors and you won't discover
|
||||||
|
there's a problem until you go to restore.
|
||||||
|
|
||||||
|
<span class="minilink minilink-addedin">New in version 1.8.10</span> <span
|
||||||
|
class="minilink minilink-addedin">Beta feature</span> That's where the spot
|
||||||
|
check comes in. This check actually compares your source file counts and data
|
||||||
|
against those in the latest archive, potentially catching problems like
|
||||||
|
incorrect excludes, inadvertent deletes, files changed by malware, etc.
|
||||||
|
|
||||||
|
However, because an exhaustive comparison of all source files against the
|
||||||
|
latest archive might be too slow, the spot check supports *sampling* a
|
||||||
|
percentage of your source files for the comparison, ensuring it falls within
|
||||||
|
configured tolerances.
|
||||||
|
|
||||||
|
Here's how it works. Start by installing the `xxhash` OS package if you don't
|
||||||
|
already have it, so the spot check can run the `xxh64sum` command and
|
||||||
|
efficiently hash files for comparison. Then add something like the following
|
||||||
|
to your borgmatic configuration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
checks:
|
||||||
|
- name: spot
|
||||||
|
count_tolerance_percentage: 10
|
||||||
|
data_sample_percentage: 1
|
||||||
|
data_tolerance_percentage: 0.5
|
||||||
|
```
|
||||||
|
|
||||||
|
The `count_tolerance_percentage` is the percentage delta between the source
|
||||||
|
directories file count and the latest backup archive file count that is
|
||||||
|
allowed before the entire consistency check fails. For instance, if the spot
|
||||||
|
check runs and finds 100 source files on disk and 105 files in the latest
|
||||||
|
archive, that would be within the configured 10% count tolerance and the check
|
||||||
|
would succeed. But if there were 100 source files and 200 archive files, the
|
||||||
|
check would fail. (100 source files and only 50 archive files would also
|
||||||
|
fail.)
|
||||||
|
|
||||||
|
The `data_sample_percentage` is the percentage of total files in the source
|
||||||
|
directories to randomly sample and compare to their corresponding files in the
|
||||||
|
latest backup archive. A higher value allows a more accurate check—and a
|
||||||
|
slower one. The comparison is performed by hashing the selected files in each
|
||||||
|
of the source paths and counting hashes that don't match the latest archive.
|
||||||
|
For instance, if you have 1,000 source files and your sample percentage is 1%,
|
||||||
|
then only 10 source files will be compared against the latest archive. These
|
||||||
|
sampled files are selected randomly each time, so in effect the spot check is
|
||||||
|
probabilistic.
|
||||||
|
|
||||||
|
The `data_tolerance_percentage` is the percentage of total files in the source
|
||||||
|
directories that can fail a spot check data comparison without failing the
|
||||||
|
entire consistency check. The value must be lower than or equal to the
|
||||||
|
`contents_sample_percentage`.
|
||||||
|
|
||||||
|
All three options are required when using the spot check. And because the
|
||||||
|
check relies on these configured tolerances, it may not be a
|
||||||
|
set-it-and-forget-it type of consistency check, at least until you get the
|
||||||
|
tolerances dialed in so there are minimal false positives or negatives. It is
|
||||||
|
recommended you run `borgmatic check` several times after configuring the spot
|
||||||
|
check, tweaking your tolerances as needed. For certain workloads where your
|
||||||
|
source files experience wild swings of file contents or counts, the spot check
|
||||||
|
may not suitable at all.
|
||||||
|
|
||||||
|
What if you add, delete, or change a bunch of your source files and you don't
|
||||||
|
want the spot check to fail the next time it's run? Run `borgmatic create` to
|
||||||
|
create a new backup, thereby allowing the next spot check to run against an
|
||||||
|
archive that contains your recent changes.
|
||||||
|
|
||||||
|
As long as the spot check feature is in beta, it may be subject to breaking
|
||||||
|
changes. But feel free to use it in production if you're okay with that
|
||||||
|
caveat, and please [provide any
|
||||||
|
feedback](https://torsion.org/borgmatic/#issues) you have on this feature.
|
||||||
|
|
||||||
|
|
||||||
### Check frequency
|
### Check frequency
|
||||||
|
|
||||||
<span class="minilink minilink-addedin">New in version 1.6.2</span> You can
|
<span class="minilink minilink-addedin">New in version 1.6.2</span> You can
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,4 @@
|
||||||
import pytest
|
import pytest
|
||||||
from flexmock import flexmock
|
|
||||||
|
|
||||||
from borgmatic.actions import json as module
|
from borgmatic.actions import json as module
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -6,9 +6,48 @@ from flexmock import flexmock
|
||||||
from borgmatic.config import generate as module
|
from borgmatic.config import generate as module
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_properties_with_simple_object():
|
||||||
|
schema = {
|
||||||
|
'type': 'object',
|
||||||
|
'properties': OrderedDict(
|
||||||
|
[
|
||||||
|
('field1', {'example': 'Example'}),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
assert module.get_properties(schema) == schema['properties']
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_properties_merges_one_of_list_properties():
|
||||||
|
schema = {
|
||||||
|
'type': 'object',
|
||||||
|
'oneOf': [
|
||||||
|
{
|
||||||
|
'properties': OrderedDict(
|
||||||
|
[
|
||||||
|
('field1', {'example': 'Example 1'}),
|
||||||
|
('field2', {'example': 'Example 2'}),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'properties': OrderedDict(
|
||||||
|
[
|
||||||
|
('field2', {'example': 'Example 2'}),
|
||||||
|
('field3', {'example': 'Example 3'}),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
assert module.get_properties(schema) == dict(
|
||||||
|
schema['oneOf'][0]['properties'], **schema['oneOf'][1]['properties']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_schema_to_sample_configuration_generates_config_map_with_examples():
|
def test_schema_to_sample_configuration_generates_config_map_with_examples():
|
||||||
flexmock(module.ruamel.yaml.comments).should_receive('CommentedMap').replace_with(OrderedDict)
|
|
||||||
flexmock(module).should_receive('add_comments_to_configuration_object')
|
|
||||||
schema = {
|
schema = {
|
||||||
'type': 'object',
|
'type': 'object',
|
||||||
'properties': OrderedDict(
|
'properties': OrderedDict(
|
||||||
|
@ -19,6 +58,9 @@ def test_schema_to_sample_configuration_generates_config_map_with_examples():
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
flexmock(module).should_receive('get_properties').and_return(schema['properties'])
|
||||||
|
flexmock(module.ruamel.yaml.comments).should_receive('CommentedMap').replace_with(OrderedDict)
|
||||||
|
flexmock(module).should_receive('add_comments_to_configuration_object')
|
||||||
|
|
||||||
config = module.schema_to_sample_configuration(schema)
|
config = module.schema_to_sample_configuration(schema)
|
||||||
|
|
||||||
|
@ -42,9 +84,6 @@ def test_schema_to_sample_configuration_generates_config_sequence_of_strings_wit
|
||||||
|
|
||||||
|
|
||||||
def test_schema_to_sample_configuration_generates_config_sequence_of_maps_with_examples():
|
def test_schema_to_sample_configuration_generates_config_sequence_of_maps_with_examples():
|
||||||
flexmock(module.ruamel.yaml.comments).should_receive('CommentedSeq').replace_with(list)
|
|
||||||
flexmock(module).should_receive('add_comments_to_configuration_sequence')
|
|
||||||
flexmock(module).should_receive('add_comments_to_configuration_object')
|
|
||||||
schema = {
|
schema = {
|
||||||
'type': 'array',
|
'type': 'array',
|
||||||
'items': {
|
'items': {
|
||||||
|
@ -54,6 +93,10 @@ def test_schema_to_sample_configuration_generates_config_sequence_of_maps_with_e
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
flexmock(module).should_receive('get_properties').and_return(schema['items']['properties'])
|
||||||
|
flexmock(module.ruamel.yaml.comments).should_receive('CommentedSeq').replace_with(list)
|
||||||
|
flexmock(module).should_receive('add_comments_to_configuration_sequence')
|
||||||
|
flexmock(module).should_receive('add_comments_to_configuration_object')
|
||||||
|
|
||||||
config = module.schema_to_sample_configuration(schema)
|
config = module.schema_to_sample_configuration(schema)
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,16 @@ def test_database_names_to_dump_queries_mariadb_for_database_names():
|
||||||
assert names == ('foo', 'bar')
|
assert names == ('foo', 'bar')
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_true_for_any_databases():
|
||||||
|
assert module.use_streaming(
|
||||||
|
databases=[flexmock(), flexmock()], config=flexmock(), log_prefix=flexmock()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_no_databases():
|
||||||
|
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())
|
||||||
|
|
||||||
|
|
||||||
def test_dump_data_sources_dumps_each_database():
|
def test_dump_data_sources_dumps_each_database():
|
||||||
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
||||||
processes = [flexmock(), flexmock()]
|
processes = [flexmock(), flexmock()]
|
||||||
|
|
|
@ -5,6 +5,26 @@ from flexmock import flexmock
|
||||||
from borgmatic.hooks import mongodb as module
|
from borgmatic.hooks import mongodb as module
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_true_for_any_non_directory_format_databases():
|
||||||
|
assert module.use_streaming(
|
||||||
|
databases=[{'format': 'stuff'}, {'format': 'directory'}, {}],
|
||||||
|
config=flexmock(),
|
||||||
|
log_prefix=flexmock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_all_directory_format_databases():
|
||||||
|
assert not module.use_streaming(
|
||||||
|
databases=[{'format': 'directory'}, {'format': 'directory'}],
|
||||||
|
config=flexmock(),
|
||||||
|
log_prefix=flexmock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_no_databases():
|
||||||
|
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())
|
||||||
|
|
||||||
|
|
||||||
def test_dump_data_sources_runs_mongodump_for_each_database():
|
def test_dump_data_sources_runs_mongodump_for_each_database():
|
||||||
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
||||||
processes = [flexmock(), flexmock()]
|
processes = [flexmock(), flexmock()]
|
||||||
|
|
|
@ -44,6 +44,16 @@ def test_database_names_to_dump_queries_mysql_for_database_names():
|
||||||
assert names == ('foo', 'bar')
|
assert names == ('foo', 'bar')
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_true_for_any_databases():
|
||||||
|
assert module.use_streaming(
|
||||||
|
databases=[flexmock(), flexmock()], config=flexmock(), log_prefix=flexmock()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_no_databases():
|
||||||
|
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())
|
||||||
|
|
||||||
|
|
||||||
def test_dump_data_sources_dumps_each_database():
|
def test_dump_data_sources_dumps_each_database():
|
||||||
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
||||||
processes = [flexmock(), flexmock()]
|
processes = [flexmock(), flexmock()]
|
||||||
|
|
|
@ -199,6 +199,26 @@ def test_database_names_to_dump_with_all_and_psql_command_uses_custom_command():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_true_for_any_non_directory_format_databases():
|
||||||
|
assert module.use_streaming(
|
||||||
|
databases=[{'format': 'stuff'}, {'format': 'directory'}, {}],
|
||||||
|
config=flexmock(),
|
||||||
|
log_prefix=flexmock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_all_directory_format_databases():
|
||||||
|
assert not module.use_streaming(
|
||||||
|
databases=[{'format': 'directory'}, {'format': 'directory'}],
|
||||||
|
config=flexmock(),
|
||||||
|
log_prefix=flexmock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_no_databases():
|
||||||
|
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())
|
||||||
|
|
||||||
|
|
||||||
def test_dump_data_sources_runs_pg_dump_for_each_database():
|
def test_dump_data_sources_runs_pg_dump_for_each_database():
|
||||||
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
databases = [{'name': 'foo'}, {'name': 'bar'}]
|
||||||
processes = [flexmock(), flexmock()]
|
processes = [flexmock(), flexmock()]
|
||||||
|
|
|
@ -5,6 +5,16 @@ from flexmock import flexmock
|
||||||
from borgmatic.hooks import sqlite as module
|
from borgmatic.hooks import sqlite as module
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_true_for_any_databases():
|
||||||
|
assert module.use_streaming(
|
||||||
|
databases=[flexmock(), flexmock()], config=flexmock(), log_prefix=flexmock()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_use_streaming_false_for_no_databases():
|
||||||
|
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())
|
||||||
|
|
||||||
|
|
||||||
def test_dump_data_sources_logs_and_skips_if_dump_already_exists():
|
def test_dump_data_sources_logs_and_skips_if_dump_already_exists():
|
||||||
databases = [{'path': '/path/to/database', 'name': 'database'}]
|
databases = [{'path': '/path/to/database', 'name': 'database'}]
|
||||||
|
|
||||||
|
|
|
@ -123,6 +123,13 @@ def test_append_last_lines_with_output_log_level_none_appends_captured_output():
|
||||||
(('foo', 'bar'), None, None, None, 'foo bar'),
|
(('foo', 'bar'), None, None, None, 'foo bar'),
|
||||||
(('foo', 'bar'), flexmock(name='input'), None, None, 'foo bar < input'),
|
(('foo', 'bar'), flexmock(name='input'), None, None, 'foo bar < input'),
|
||||||
(('foo', 'bar'), None, flexmock(name='output'), None, 'foo bar > output'),
|
(('foo', 'bar'), None, flexmock(name='output'), None, 'foo bar > output'),
|
||||||
|
(
|
||||||
|
('A',) * module.MAX_LOGGED_COMMAND_LENGTH,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
'A ' * (module.MAX_LOGGED_COMMAND_LENGTH // 2 - 2) + '...',
|
||||||
|
),
|
||||||
(
|
(
|
||||||
('foo', 'bar'),
|
('foo', 'bar'),
|
||||||
flexmock(name='input'),
|
flexmock(name='input'),
|
||||||
|
|
Loading…
Reference in a new issue