296 lines
11 KiB
Python
296 lines
11 KiB
Python
import glob
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import pathlib
|
|
import tempfile
|
|
|
|
from borgmatic.borg import feature
|
|
from borgmatic.execute import DO_NOT_CAPTURE, execute_command, execute_command_with_processes
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _expand_directory(directory):
|
|
'''
|
|
Given a directory path, expand any tilde (representing a user's home directory) and any globs
|
|
therein. Return a list of one or more resulting paths.
|
|
'''
|
|
expanded_directory = os.path.expanduser(directory)
|
|
|
|
return glob.glob(expanded_directory) or [expanded_directory]
|
|
|
|
|
|
def _expand_directories(directories):
|
|
'''
|
|
Given a sequence of directory paths, expand tildes and globs in each one. Return all the
|
|
resulting directories as a single flattened tuple.
|
|
'''
|
|
if directories is None:
|
|
return ()
|
|
|
|
return tuple(
|
|
itertools.chain.from_iterable(_expand_directory(directory) for directory in directories)
|
|
)
|
|
|
|
|
|
def _expand_home_directories(directories):
|
|
'''
|
|
Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
|
|
Return the results as a tuple.
|
|
'''
|
|
if directories is None:
|
|
return ()
|
|
|
|
return tuple(os.path.expanduser(directory) for directory in directories)
|
|
|
|
|
|
def map_directories_to_devices(directories):
|
|
'''
|
|
Given a sequence of directories, return a map from directory to an identifier for the device on
|
|
which that directory resides or None if the path doesn't exist.
|
|
|
|
This is handy for determining whether two different directories are on the same filesystem (have
|
|
the same device identifier).
|
|
'''
|
|
return {
|
|
directory: os.stat(directory).st_dev if os.path.exists(directory) else None
|
|
for directory in directories
|
|
}
|
|
|
|
|
|
def deduplicate_directories(directory_devices):
|
|
'''
|
|
Given a map from directory to the identifier for the device on which that directory resides,
|
|
return the directories as a sorted tuple with all duplicate child directories removed. For
|
|
instance, if paths is ('/foo', '/foo/bar'), return just: ('/foo',)
|
|
|
|
The one exception to this rule is if two paths are on different filesystems (devices). In that
|
|
case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
|
|
location.one_file_system option is true).
|
|
|
|
The idea is that if Borg is given a parent directory, then it doesn't also need to be given
|
|
child directories, because it will naturally spider the contents of the parent directory. And
|
|
there are cases where Borg coming across the same file twice will result in duplicate reads and
|
|
even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
|
|
Borg.
|
|
'''
|
|
deduplicated = set()
|
|
directories = sorted(directory_devices.keys())
|
|
|
|
for directory in directories:
|
|
deduplicated.add(directory)
|
|
parents = pathlib.PurePath(directory).parents
|
|
|
|
# If another directory in the given list is a parent of current directory (even n levels
|
|
# up) and both are on the same filesystem, then the current directory is a duplicate.
|
|
for other_directory in directories:
|
|
for parent in parents:
|
|
if (
|
|
pathlib.PurePath(other_directory) == parent
|
|
and directory_devices[directory] is not None
|
|
and directory_devices[other_directory] == directory_devices[directory]
|
|
):
|
|
if directory in deduplicated:
|
|
deduplicated.remove(directory)
|
|
break
|
|
|
|
return tuple(sorted(deduplicated))
|
|
|
|
|
|
def _write_pattern_file(patterns=None):
|
|
'''
|
|
Given a sequence of patterns, write them to a named temporary file and return it. Return None
|
|
if no patterns are provided.
|
|
'''
|
|
if not patterns:
|
|
return None
|
|
|
|
pattern_file = tempfile.NamedTemporaryFile('w')
|
|
pattern_file.write('\n'.join(patterns))
|
|
pattern_file.flush()
|
|
|
|
return pattern_file
|
|
|
|
|
|
def _make_pattern_flags(location_config, pattern_filename=None):
|
|
'''
|
|
Given a location config dict with a potential patterns_from option, and a filename containing
|
|
any additional patterns, return the corresponding Borg flags for those files as a tuple.
|
|
'''
|
|
pattern_filenames = tuple(location_config.get('patterns_from') or ()) + (
|
|
(pattern_filename,) if pattern_filename else ()
|
|
)
|
|
|
|
return tuple(
|
|
itertools.chain.from_iterable(
|
|
('--patterns-from', pattern_filename) for pattern_filename in pattern_filenames
|
|
)
|
|
)
|
|
|
|
|
|
def _make_exclude_flags(location_config, exclude_filename=None):
|
|
'''
|
|
Given a location config dict with various exclude options, and a filename containing any exclude
|
|
patterns, return the corresponding Borg flags as a tuple.
|
|
'''
|
|
exclude_filenames = tuple(location_config.get('exclude_from') or ()) + (
|
|
(exclude_filename,) if exclude_filename else ()
|
|
)
|
|
exclude_from_flags = tuple(
|
|
itertools.chain.from_iterable(
|
|
('--exclude-from', exclude_filename) for exclude_filename in exclude_filenames
|
|
)
|
|
)
|
|
caches_flag = ('--exclude-caches',) if location_config.get('exclude_caches') else ()
|
|
if_present_flags = tuple(
|
|
itertools.chain.from_iterable(
|
|
('--exclude-if-present', if_present)
|
|
for if_present in location_config.get('exclude_if_present', ())
|
|
)
|
|
)
|
|
keep_exclude_tags_flags = (
|
|
('--keep-exclude-tags',) if location_config.get('keep_exclude_tags') else ()
|
|
)
|
|
exclude_nodump_flags = ('--exclude-nodump',) if location_config.get('exclude_nodump') else ()
|
|
|
|
return (
|
|
exclude_from_flags
|
|
+ caches_flag
|
|
+ if_present_flags
|
|
+ keep_exclude_tags_flags
|
|
+ exclude_nodump_flags
|
|
)
|
|
|
|
|
|
DEFAULT_BORGMATIC_SOURCE_DIRECTORY = '~/.borgmatic'
|
|
|
|
|
|
def borgmatic_source_directories(borgmatic_source_directory):
|
|
'''
|
|
Return a list of borgmatic-specific source directories used for state like database backups.
|
|
'''
|
|
if not borgmatic_source_directory:
|
|
borgmatic_source_directory = DEFAULT_BORGMATIC_SOURCE_DIRECTORY
|
|
|
|
return (
|
|
[borgmatic_source_directory]
|
|
if os.path.exists(os.path.expanduser(borgmatic_source_directory))
|
|
else []
|
|
)
|
|
|
|
|
|
DEFAULT_ARCHIVE_NAME_FORMAT = '{hostname}-{now:%Y-%m-%dT%H:%M:%S.%f}'
|
|
|
|
|
|
def create_archive(
|
|
dry_run,
|
|
repository,
|
|
location_config,
|
|
storage_config,
|
|
local_borg_version,
|
|
local_path='borg',
|
|
remote_path=None,
|
|
progress=False,
|
|
stats=False,
|
|
json=False,
|
|
files=False,
|
|
stream_processes=None,
|
|
):
|
|
'''
|
|
Given vebosity/dry-run flags, a local or remote repository path, a location config dict, and a
|
|
storage config dict, create a Borg archive and return Borg's JSON output (if any).
|
|
|
|
If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
|
|
create command while also triggering the given processes to produce output.
|
|
'''
|
|
sources = deduplicate_directories(
|
|
map_directories_to_devices(
|
|
_expand_directories(
|
|
location_config['source_directories']
|
|
+ borgmatic_source_directories(location_config.get('borgmatic_source_directory'))
|
|
)
|
|
)
|
|
)
|
|
|
|
pattern_file = _write_pattern_file(location_config.get('patterns'))
|
|
exclude_file = _write_pattern_file(
|
|
_expand_home_directories(location_config.get('exclude_patterns'))
|
|
)
|
|
checkpoint_interval = storage_config.get('checkpoint_interval', None)
|
|
chunker_params = storage_config.get('chunker_params', None)
|
|
compression = storage_config.get('compression', None)
|
|
remote_rate_limit = storage_config.get('remote_rate_limit', None)
|
|
umask = storage_config.get('umask', None)
|
|
lock_wait = storage_config.get('lock_wait', None)
|
|
files_cache = location_config.get('files_cache')
|
|
archive_name_format = storage_config.get('archive_name_format', DEFAULT_ARCHIVE_NAME_FORMAT)
|
|
extra_borg_options = storage_config.get('extra_borg_options', {}).get('create', '')
|
|
|
|
atime_feature_available = feature.available(feature.Feature.ATIME, local_borg_version)
|
|
if atime_feature_available:
|
|
atime_flags = ('--atime',) if location_config.get('atime') is True else ()
|
|
else:
|
|
atime_flags = ('--noatime',) if location_config.get('atime') is False else ()
|
|
|
|
full_command = (
|
|
tuple(local_path.split(' '))
|
|
+ ('create',)
|
|
+ _make_pattern_flags(location_config, pattern_file.name if pattern_file else None)
|
|
+ _make_exclude_flags(location_config, exclude_file.name if exclude_file else None)
|
|
+ (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
|
|
+ (('--chunker-params', chunker_params) if chunker_params else ())
|
|
+ (('--compression', compression) if compression else ())
|
|
+ (('--remote-ratelimit', str(remote_rate_limit)) if remote_rate_limit else ())
|
|
+ (
|
|
('--one-file-system',)
|
|
if location_config.get('one_file_system') or stream_processes
|
|
else ()
|
|
)
|
|
+ (('--numeric-owner',) if location_config.get('numeric_owner') else ())
|
|
+ atime_flags
|
|
+ (('--noctime',) if location_config.get('ctime') is False else ())
|
|
+ (('--nobirthtime',) if location_config.get('birthtime') is False else ())
|
|
+ (('--read-special',) if (location_config.get('read_special') or stream_processes) else ())
|
|
+ (('--nobsdflags',) if location_config.get('bsd_flags') is False else ())
|
|
+ (('--files-cache', files_cache) if files_cache else ())
|
|
+ (('--remote-path', remote_path) if remote_path else ())
|
|
+ (('--umask', str(umask)) if umask else ())
|
|
+ (('--lock-wait', str(lock_wait)) if lock_wait else ())
|
|
+ (('--list', '--filter', 'AME-') if files and not json and not progress else ())
|
|
+ (('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
|
|
+ (('--stats',) if stats and not json and not dry_run else ())
|
|
+ (('--debug', '--show-rc') if logger.isEnabledFor(logging.DEBUG) and not json else ())
|
|
+ (('--dry-run',) if dry_run else ())
|
|
+ (('--progress',) if progress else ())
|
|
+ (('--json',) if json else ())
|
|
+ (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
|
|
+ (
|
|
'{repository}::{archive_name_format}'.format(
|
|
repository=repository, archive_name_format=archive_name_format
|
|
),
|
|
)
|
|
+ sources
|
|
)
|
|
|
|
if json:
|
|
output_log_level = None
|
|
elif (stats or files) and logger.getEffectiveLevel() == logging.WARNING:
|
|
output_log_level = logging.WARNING
|
|
else:
|
|
output_log_level = logging.INFO
|
|
|
|
# The progress output isn't compatible with captured and logged output, as progress messes with
|
|
# the terminal directly.
|
|
output_file = DO_NOT_CAPTURE if progress else None
|
|
|
|
if stream_processes:
|
|
return execute_command_with_processes(
|
|
full_command,
|
|
stream_processes,
|
|
output_log_level,
|
|
output_file,
|
|
borg_local_path=local_path,
|
|
)
|
|
|
|
return execute_command(full_command, output_log_level, output_file, borg_local_path=local_path)
|