Add "borgmatic list --find" flag for searching for files across multiple archives (#541).

This commit is contained in:
Dan Helfman 2022-06-03 15:12:14 -07:00
parent b6893f6455
commit d14f22e121
5 changed files with 322 additions and 112 deletions

4
NEWS
View file

@ -1,4 +1,8 @@
1.6.3.dev0 1.6.3.dev0
* #541: Add "borgmatic list --find" flag for searching for files across multiple archives, useful
for hunting down that file you accidentally deleted so you can extract it. See the documentation
for more information:
https://torsion.org/borgmatic/docs/how-to/inspect-your-backups/#searching-for-a-file
* Deprecate "borgmatic list --successful" flag, as listing only non-checkpoint (successful) * Deprecate "borgmatic list --successful" flag, as listing only non-checkpoint (successful)
archives is now the default in newer versions of Borg. archives is now the default in newer versions of Borg.

View file

@ -1,4 +1,6 @@
import copy
import logging import logging
import re
from borgmatic.borg.flags import make_flags, make_flags_from_arguments from borgmatic.borg.flags import make_flags, make_flags_from_arguments
from borgmatic.execute import execute_command from borgmatic.execute import execute_command
@ -40,15 +42,20 @@ def resolve_archive_name(repository, archive, storage_config, local_path='borg',
return latest_archive return latest_archive
def list_archives(repository, storage_config, list_arguments, local_path='borg', remote_path=None): MAKE_FLAGS_EXCLUDES = ('repository', 'archive', 'successful', 'paths', 'find_paths')
def make_list_command(
repository, storage_config, list_arguments, local_path='borg', remote_path=None
):
''' '''
Given a local or remote repository path, a storage config dict, and the arguments to the list Given a local or remote repository path, a storage config dict, the arguments to the list
action, display the output of listing Borg archives in the repository or return JSON output. Or, action, and local and remote Borg paths, return a command as a tuple to list archives or paths
if an archive name is given, listing the files in that archive. within an archive.
''' '''
lock_wait = storage_config.get('lock_wait', None) lock_wait = storage_config.get('lock_wait', None)
full_command = ( return (
(local_path, 'list') (local_path, 'list')
+ ( + (
('--info',) ('--info',)
@ -62,19 +69,90 @@ def list_archives(repository, storage_config, list_arguments, local_path='borg',
) )
+ make_flags('remote-path', remote_path) + make_flags('remote-path', remote_path)
+ make_flags('lock-wait', lock_wait) + make_flags('lock-wait', lock_wait)
+ make_flags_from_arguments( + make_flags_from_arguments(list_arguments, excludes=MAKE_FLAGS_EXCLUDES,)
list_arguments, excludes=('repository', 'archive', 'successful', 'paths')
)
+ ( + (
'::'.join((repository, list_arguments.archive)) ('::'.join((repository, list_arguments.archive)),)
if list_arguments.archive if list_arguments.archive
else repository, else (repository,)
) )
+ (tuple(list_arguments.paths) if list_arguments.paths else ()) + (tuple(list_arguments.paths) if list_arguments.paths else ())
) )
return execute_command(
full_command, def make_find_paths(find_paths):
output_log_level=None if list_arguments.json else logging.WARNING, '''
borg_local_path=local_path, Given a sequence of path fragments or patterns as passed to `--find`, transform all path
fragments into glob patterns. Pass through existing patterns untouched.
For example, given find_paths of:
['foo.txt', 'pp:root/somedir']
... transform that into:
['sh:**/*foo.txt*/**', 'pp:root/somedir']
'''
return tuple(
find_path
if re.compile(r'([-!+RrPp] )|(\w\w:)').match(find_path)
else f'sh:**/*{find_path}*/**'
for find_path in find_paths
) )
def list_archives(repository, storage_config, list_arguments, local_path='borg', remote_path=None):
'''
Given a local or remote repository path, a storage config dict, the arguments to the list
action, and local and remote Borg paths, display the output of listing Borg archives in the
repository or return JSON output. Or, if an archive name is given, list the files in that
archive. Or, if list_arguments.find_paths are given, list the files by searching across multiple
archives.
'''
# If there are any paths to find (and there's not a single archive already selected), start by
# getting a list of archives to search.
if list_arguments.find_paths and not list_arguments.archive:
repository_arguments = copy.copy(list_arguments)
repository_arguments.archive = None
repository_arguments.json = False
repository_arguments.format = None
# Ask Borg to list archives. Capture its output for use below.
archive_lines = tuple(
execute_command(
make_list_command(
repository, storage_config, repository_arguments, local_path, remote_path
),
output_log_level=None,
borg_local_path=local_path,
)
.strip('\n')
.split('\n')
)
else:
archive_lines = (list_arguments.archive,)
# For each archive listed by Borg, run list on the contents of that archive.
for archive_line in archive_lines:
try:
archive = archive_line.split()[0]
except (AttributeError, IndexError):
archive = None
if archive:
logger.warning(archive_line)
archive_arguments = copy.copy(list_arguments)
archive_arguments.archive = archive
main_command = make_list_command(
repository, storage_config, archive_arguments, local_path, remote_path
) + make_find_paths(list_arguments.find_paths)
output = execute_command(
main_command,
output_log_level=None if list_arguments.json else logging.WARNING,
borg_local_path=local_path,
)
if list_arguments.json:
return output

View file

@ -554,7 +554,14 @@ def make_parsers():
metavar='PATH', metavar='PATH',
nargs='+', nargs='+',
dest='paths', dest='paths',
help='Paths to list from archive, defaults to the entire archive', help='Paths or patterns to list from a single selected archive (via "--archive"), defaults to listing the entire archive',
)
list_group.add_argument(
'--find',
metavar='PATH',
nargs='+',
dest='find_paths',
help='Partial paths or patterns to search for and list across multiple archives',
) )
list_group.add_argument( list_group.add_argument(
'--short', default=False, action='store_true', help='Output only archive or path names' '--short', default=False, action='store_true', help='Output only archive or path names'
@ -571,7 +578,7 @@ def make_parsers():
) )
list_group.add_argument( list_group.add_argument(
'--successful', '--successful',
default=False, default=True,
action='store_true', action='store_true',
help='Deprecated in favor of listing successful (non-checkpoint) backups by default in newer versions of Borg', help='Deprecated in favor of listing successful (non-checkpoint) backups by default in newer versions of Borg',
) )

View file

@ -51,6 +51,30 @@ borgmatic info
`--info`. Or upgrade borgmatic!) `--info`. Or upgrade borgmatic!)
### Searching for a file
Let's say you've accidentally deleted a file and want to find the backup
archive(s) containing it. `borgmatic list` provides a `--find` flag for
exactly this purpose. For instance, if you're looking for a `foo.txt`:
```bash
borgmatic list --find foo.txt
```
This will list your archives and indicate those with files matching
`*foo.txt*` anywhere in the archive. The `--find` parameter can alternatively
be a [Borg
pattern](https://borgbackup.readthedocs.io/en/stable/usage/help.html#borg-patterns).
To limit the archives searched, use the standard `list` parameters for
filtering archives such as `--last`, `--archive`, `--glob-archives`, etc. For
example, to search only the last five archives:
```bash
borgmatic list --find foo.txt --last 5
```
## Logging ## Logging
By default, borgmatic logs to a local syslog-compatible daemon if one is By default, borgmatic logs to a local syslog-compatible daemon if one is

View file

@ -1,3 +1,4 @@
import argparse
import logging import logging
import pytest import pytest
@ -106,156 +107,125 @@ def test_resolve_archive_name_with_lock_wait_calls_borg_with_lock_wait_parameter
) )
def test_list_archives_calls_borg_with_parameters(): def test_make_list_command_includes_log_info():
flexmock(module).should_receive('execute_command').with_args( insert_logging_mock(logging.INFO)
('borg', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
)
module.list_archives( command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False), list_arguments=flexmock(archive=None, paths=None, json=False),
) )
assert command == ('borg', 'list', '--info', 'repo')
def test_list_archives_with_log_info_calls_borg_with_info_parameter():
flexmock(module).should_receive('execute_command').with_args( def test_make_list_command_includes_json_but_not_info():
('borg', 'list', '--info', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
)
insert_logging_mock(logging.INFO) insert_logging_mock(logging.INFO)
module.list_archives( command = module.make_list_command(
repository='repo',
storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False),
)
def test_list_archives_with_log_info_and_json_suppresses_most_borg_output():
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg'
)
insert_logging_mock(logging.INFO)
module.list_archives(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=True), list_arguments=flexmock(archive=None, paths=None, json=True),
) )
assert command == ('borg', 'list', '--json', 'repo')
def test_list_archives_with_log_debug_calls_borg_with_debug_parameter():
flexmock(module).should_receive('execute_command').with_args( def test_make_list_command_includes_log_debug():
('borg', 'list', '--debug', '--show-rc', 'repo'),
output_log_level=logging.WARNING,
borg_local_path='borg',
)
insert_logging_mock(logging.DEBUG) insert_logging_mock(logging.DEBUG)
module.list_archives( command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False), list_arguments=flexmock(archive=None, paths=None, json=False),
) )
assert command == ('borg', 'list', '--debug', '--show-rc', 'repo')
def test_list_archives_with_log_debug_and_json_suppresses_most_borg_output():
flexmock(module).should_receive('execute_command').with_args( def test_make_list_command_includes_json_but_not_debug():
('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg'
)
insert_logging_mock(logging.DEBUG) insert_logging_mock(logging.DEBUG)
module.list_archives( command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=True), list_arguments=flexmock(archive=None, paths=None, json=True),
) )
assert command == ('borg', 'list', '--json', 'repo')
def test_list_archives_with_lock_wait_calls_borg_with_lock_wait_parameters():
storage_config = {'lock_wait': 5} def test_make_list_command_includes_json():
flexmock(module).should_receive('execute_command').with_args( command = module.make_list_command(
('borg', 'list', '--lock-wait', '5', 'repo'), repository='repo',
output_log_level=logging.WARNING, storage_config={},
borg_local_path='borg', list_arguments=flexmock(archive=None, paths=None, json=True),
) )
module.list_archives( assert command == ('borg', 'list', '--json', 'repo')
def test_make_list_command_includes_lock_wait():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config=storage_config, storage_config={'lock_wait': 5},
list_arguments=flexmock(archive=None, paths=None, json=False), list_arguments=flexmock(archive=None, paths=None, json=False),
) )
assert command == ('borg', 'list', '--lock-wait', '5', 'repo')
def test_list_archives_with_archive_calls_borg_with_archive_parameter():
storage_config = {}
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo::archive'), output_log_level=logging.WARNING, borg_local_path='borg'
)
module.list_archives( def test_make_list_command_includes_archive():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config=storage_config, storage_config={},
list_arguments=flexmock(archive='archive', paths=None, json=False), list_arguments=flexmock(archive='archive', paths=None, json=False),
) )
assert command == ('borg', 'list', 'repo::archive')
def test_list_archives_with_path_calls_borg_with_path_parameter():
storage_config = {}
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo::archive', 'var/lib'),
output_log_level=logging.WARNING,
borg_local_path='borg',
)
module.list_archives( def test_make_list_command_includes_archive_and_path():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config=storage_config, storage_config={},
list_arguments=flexmock(archive='archive', paths=['var/lib'], json=False), list_arguments=flexmock(archive='archive', paths=['var/lib'], json=False),
) )
assert command == ('borg', 'list', 'repo::archive', 'var/lib')
def test_list_archives_with_local_path_calls_borg_via_local_path():
flexmock(module).should_receive('execute_command').with_args(
('borg1', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg1'
)
module.list_archives( def test_make_list_command_includes_local_path():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False), list_arguments=flexmock(archive=None, paths=None, json=False),
local_path='borg1', local_path='borg2',
) )
assert command == ('borg2', 'list', 'repo')
def test_list_archives_with_remote_path_calls_borg_with_remote_path_parameters():
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', '--remote-path', 'borg1', 'repo'),
output_log_level=logging.WARNING,
borg_local_path='borg',
)
module.list_archives( def test_make_list_command_includes_remote_path():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False), list_arguments=flexmock(archive=None, paths=None, json=False),
remote_path='borg1', remote_path='borg2',
) )
assert command == ('borg', 'list', '--remote-path', 'borg2', 'repo')
def test_list_archives_with_short_calls_borg_with_short_parameter():
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', '--short', 'repo'),
output_log_level=logging.WARNING,
borg_local_path='borg',
).and_return('[]')
module.list_archives( def test_make_list_command_includes_short():
command = module.make_list_command(
repository='repo', repository='repo',
storage_config={}, storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False, short=True), list_arguments=flexmock(archive=None, paths=None, json=False, short=True),
) )
assert command == ('borg', 'list', '--short', 'repo')
@pytest.mark.parametrize( @pytest.mark.parametrize(
'argument_name', 'argument_name',
@ -271,29 +241,156 @@ def test_list_archives_with_short_calls_borg_with_short_parameter():
'patterns_from', 'patterns_from',
), ),
) )
def test_list_archives_passes_through_arguments_to_borg(argument_name): def test_make_list_command_includes_additional_flags(argument_name):
command = module.make_list_command(
repository='repo',
storage_config={},
list_arguments=flexmock(
archive=None,
paths=None,
json=False,
find_paths=None,
format=None,
**{argument_name: 'value'}
),
)
assert command == ('borg', 'list', '--' + argument_name.replace('_', '-'), 'value', 'repo')
def test_make_find_paths_passes_through_empty_paths():
assert module.make_find_paths(()) == ()
def test_make_find_paths_passes_through_patterns():
find_paths = (
'fm:*',
'sh:**/*.txt',
're:^.*$',
'pp:root/somedir',
'pf:root/foo.txt',
'R /',
'r /',
'p /',
'P /',
'+ /',
'- /',
'! /',
)
assert module.make_find_paths(find_paths) == find_paths
def test_make_find_paths_adds_globs_to_path_fragments():
assert module.make_find_paths(('foo.txt',)) == ('sh:**/*foo.txt*/**',)
def test_list_archives_calls_borg_with_parameters():
list_arguments = argparse.Namespace(archive=None, paths=None, json=False, find_paths=None)
flexmock(module).should_receive('make_list_command').with_args(
repository='repo',
storage_config={},
list_arguments=list_arguments,
local_path='borg',
remote_path=None,
).and_return(('borg', 'list', 'repo'))
flexmock(module).should_receive('make_find_paths').and_return(())
flexmock(module).should_receive('execute_command').with_args( flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', '--' + argument_name.replace('_', '-'), 'value', 'repo'), ('borg', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
output_log_level=logging.WARNING, ).once()
borg_local_path='borg',
).and_return('[]')
module.list_archives( module.list_archives(
repository='repo', repository='repo', storage_config={}, list_arguments=list_arguments,
storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=False, **{argument_name: 'value'}),
) )
def test_list_archives_with_json_calls_borg_with_json_parameter(): def test_list_archives_with_json_suppresses_most_borg_output():
list_arguments = argparse.Namespace(archive=None, paths=None, json=True, find_paths=None)
flexmock(module).should_receive('make_list_command').with_args(
repository='repo',
storage_config={},
list_arguments=list_arguments,
local_path='borg',
remote_path=None,
).and_return(('borg', 'list', 'repo'))
flexmock(module).should_receive('make_find_paths').and_return(())
flexmock(module).should_receive('execute_command').with_args( flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg' ('borg', 'list', 'repo'), output_log_level=None, borg_local_path='borg'
).and_return('[]') ).once()
json_output = module.list_archives( module.list_archives(
repository='repo', repository='repo', storage_config={}, list_arguments=list_arguments,
storage_config={},
list_arguments=flexmock(archive=None, paths=None, json=True),
) )
assert json_output == '[]'
def test_list_archives_calls_borg_with_local_path():
list_arguments = argparse.Namespace(archive=None, paths=None, json=False, find_paths=None)
flexmock(module).should_receive('make_list_command').with_args(
repository='repo',
storage_config={},
list_arguments=list_arguments,
local_path='borg2',
remote_path=None,
).and_return(('borg2', 'list', 'repo'))
flexmock(module).should_receive('make_find_paths').and_return(())
flexmock(module).should_receive('execute_command').with_args(
('borg2', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg2'
).once()
module.list_archives(
repository='repo', storage_config={}, list_arguments=list_arguments, local_path='borg2',
)
def test_list_archives_calls_borg_multiple_times_with_find_paths():
glob_paths = ('**/*foo.txt*/**',)
list_arguments = argparse.Namespace(
archive=None, paths=None, json=False, find_paths=['foo.txt'], format=None
)
flexmock(module).should_receive('make_list_command').and_return(
('borg', 'list', 'repo')
).and_return(('borg', 'list', 'repo::archive1')).and_return(('borg', 'list', 'repo::archive2'))
flexmock(module).should_receive('make_find_paths').and_return(glob_paths)
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo'), output_log_level=None, borg_local_path='borg'
).and_return(
'archive1 Sun, 2022-05-29 15:27:04 [abc]\narchive2 Mon, 2022-05-30 19:47:15 [xyz]'
).once()
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo::archive1') + glob_paths,
output_log_level=logging.WARNING,
borg_local_path='borg',
).once()
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo::archive2') + glob_paths,
output_log_level=logging.WARNING,
borg_local_path='borg',
).once()
module.list_archives(
repository='repo', storage_config={}, list_arguments=list_arguments,
)
def test_list_archives_calls_borg_with_archive():
list_arguments = argparse.Namespace(archive='archive', paths=None, json=False, find_paths=None)
flexmock(module).should_receive('make_list_command').with_args(
repository='repo',
storage_config={},
list_arguments=list_arguments,
local_path='borg',
remote_path=None,
).and_return(('borg', 'list', 'repo::archive'))
flexmock(module).should_receive('make_find_paths').and_return(())
flexmock(module).should_receive('execute_command').with_args(
('borg', 'list', 'repo::archive'), output_log_level=logging.WARNING, borg_local_path='borg'
).once()
module.list_archives(
repository='repo', storage_config={}, list_arguments=list_arguments,
)