Spot check documentation and edge case tweaks (#656).

This commit is contained in:
Dan Helfman 2024-04-15 14:18:42 -07:00
parent d243a8c836
commit 75bdbe6087
5 changed files with 73 additions and 33 deletions

8
NEWS
View file

@ -1,8 +1,8 @@
1.8.10.dev0 1.8.10.dev0
* #656: Add a "spot" consistency check that compares file counts and contents between your source * #656 (beta): Add a "spot" consistency check that compares file counts and contents between your
files and the latest archive, ensuring they fall within configured tolerances. This can catch source files and the latest archive, ensuring they fall within configured tolerances. This can
problems like incorrect excludes, inadvertent deletes, files changed by malware, etc. See the catch problems like incorrect excludes, inadvertent deletes, files changed by malware, etc. See
documentation for more information: the documentation for more information:
https://torsion.org/borgmatic/docs/how-to/deal-with-very-large-backups/#spot-check https://torsion.org/borgmatic/docs/how-to/deal-with-very-large-backups/#spot-check
* #842: When a command hook exits with a soft failure, ping the log and finish states for any * #842: When a command hook exits with a soft failure, ping the log and finish states for any
configured monitoring hooks. configured monitoring hooks.

View file

@ -410,7 +410,7 @@ def compare_spot_check_hashes(
# source directories. # source directories.
spot_check_config = next(check for check in config['checks'] if check['name'] == 'spot') spot_check_config = next(check for check in config['checks'] if check['name'] == 'spot')
sample_count = max( sample_count = max(
int(len(source_paths) * (spot_check_config['data_sample_percentage'] / 100)), 1 int(len(source_paths) * (min(spot_check_config['data_sample_percentage'], 100) / 100)), 1
) )
source_sample_paths = tuple(random.sample(source_paths, sample_count)) source_sample_paths = tuple(random.sample(source_paths, sample_count))
existing_source_sample_paths = { existing_source_sample_paths = {

View file

@ -108,23 +108,23 @@ for more information.
The various consistency checks all have trade-offs around speed and The various consistency checks all have trade-offs around speed and
thoroughness, but most of them don't even look at your original source thoroughness, but most of them don't even look at your original source
files—arguably one important way to ensure your backups contain the files files—arguably one important way to ensure your backups contain the files
you'll ultimately want to restore in the case of catastrophe (or just an you'll want to restore in the case of catastrophe (or just an accidentally
accidentally deleted file). Because if something goes wrong with your source deleted file). Because if something goes wrong with your source files, most
files, most consistency checks will still pass with flying colors and you consistency checks will still pass with flying colors and you won't discover
won't discover there's a problem until you go to restore. there's a problem until you go to restore.
<span class="minilink minilink-addedin">New in version 1.8.10</span> <span <span class="minilink minilink-addedin">New in version 1.8.10</span> <span
class="minilink minilink-addedin">Beta feature</span> That's where the spot class="minilink minilink-addedin">Beta feature</span> That's where the spot
check comes in. This check actually compares your source files counts and data check comes in. This check actually compares your source file counts and data
against those in the latest archive, potentially catching problems like against those in the latest archive, potentially catching problems like
incorrect excludes, inadvertent deletes, files changed by malware, etc. incorrect excludes, inadvertent deletes, files changed by malware, etc.
However, because an exhaustive comparison of all source files against the However, because an exhaustive comparison of all source files against the
latest archive might be too slow, the spot check supports sampling a latest archive might be too slow, the spot check supports *sampling* a
percentage of your source files for the comparison, ensuring it falls within percentage of your source files for the comparison, ensuring it falls within
configured tolerances. configured tolerances.
Here's how to use it. Start by installing the `xxhash` OS package if you don't Here's how it works. Start by installing the `xxhash` OS package if you don't
already have it, so the spot check can run the `xxh64sum` command and already have it, so the spot check can run the `xxh64sum` command and
efficiently hash files for comparison. Then add something like the following efficiently hash files for comparison. Then add something like the following
to your borgmatic configuration: to your borgmatic configuration:
@ -140,40 +140,44 @@ checks:
The `count_tolerance_percentage` is the percentage delta between the source The `count_tolerance_percentage` is the percentage delta between the source
directories file count and the latest backup archive file count that is directories file count and the latest backup archive file count that is
allowed before the entire consistency check fails. For instance, if the spot allowed before the entire consistency check fails. For instance, if the spot
check runs and finds 100 source files and 105 files in the latest archive, check runs and finds 100 source files on disk and 105 files in the latest
that would be within a 10% count tolerance and the check would succeed. But if archive, that would be within the configured 10% count tolerance and the check
there were 100 source files and 200 archive files, the check would fail. (100 would succeed. But if there were 100 source files and 200 archive files, the
source files and only 50 archive files would also fail.) check would fail. (100 source files and only 50 archive files would also
fail.)
The `data_sample_percentage` is the percentage of total files in the source The `data_sample_percentage` is the percentage of total files in the source
directories to randomly sample and compare to their corresponding files in the directories to randomly sample and compare to their corresponding files in the
latest backup archive. The comparison is performed by hashing the selected latest backup archive. A higher value allows a more accurate check—and a
files in each of the source paths and the backup archive and counting hashes slower one. The comparison is performed by hashing the selected files in each
that don't match. For instance, if you have 1,000 source files and your sample of the source paths and counting hashes that don't match the latest archive.
percentage is 1%, then only 10 source files will be compared against the For instance, if you have 1,000 source files and your sample percentage is 1%,
latest archive. These sampled files are selected randomly each time, so in then only 10 source files will be compared against the latest archive. These
effect the spot check is a probabilistic check. sampled files are selected randomly each time, so in effect the spot check is
probabilistic.
The `data_tolerance_percentage` is the percentage of total files in the source The `data_tolerance_percentage` is the percentage of total files in the source
directories that can fail a spot check data comparison without failing the directories that can fail a spot check data comparison without failing the
entire consistency check. The value must be lower than or equal to the entire consistency check. The value must be lower than or equal to the
`contents_sample_percentage`. `contents_sample_percentage`.
All three options are required when using the spot check. And because the spot All three options are required when using the spot check. And because the
check relies on these configured tolerances, it may not be a check relies on these configured tolerances, it may not be a
set-it-and-forget-it type of consistency check, at least until you get the set-it-and-forget-it type of consistency check, at least until you get the
tolerances dialed in so there are minimal false positives or negatives. For tolerances dialed in so there are minimal false positives or negatives. It is
certain workloads where your source files experience wild swings of changed recommended you run `borgmatic check` several times after configuring the spot
data or file counts, the spot check may not suitable at all. check, tweaking your tolerances as needed. For certain workloads where your
source files experience wild swings of file contents or counts, the spot check
may not suitable at all.
What if you change or add or delete a bunch of your source files and you don't What if you add, delete, or change a bunch of your source files and you don't
want the spot check to fail the next time it's run? Run `borgmatic create` to want the spot check to fail the next time it's run? Run `borgmatic create` to
create a new backup, thereby allowing the spot check to run against an archive create a new backup, thereby allowing the next spot check to run against an
that contains your source file changes. archive that contains your recent changes.
While the spot check feature is currently in beta, it may be subject to As long as the spot check feature is in beta, it may be subject to breaking
breaking changes. But feel free to use it in production if you're okay with changes. But feel free to use it in production if you're okay with that
that caveat, and please [provide any caveat, and please [provide any
feedback](https://torsion.org/borgmatic/#issues) you have on this feature. feedback](https://torsion.org/borgmatic/#issues) you have on this feature.

View file

@ -13,7 +13,7 @@ flake8-quotes==3.3.2
flake8-use-fstring==1.4 flake8-use-fstring==1.4
flake8-variables-names==0.0.5 flake8-variables-names==0.0.5
flexmock==0.11.3 flexmock==0.11.3
idna==3.4 idna==3.7
isort==5.12.0 isort==5.12.0
jsonschema==4.17.3 jsonschema==4.17.3
Markdown==3.4.1 Markdown==3.4.1

View file

@ -635,6 +635,42 @@ def test_compare_spot_check_hashes_returns_paths_having_failing_hashes():
) == ('/bar',) ) == ('/bar',)
def test_compare_spot_check_hashes_handles_data_sample_percentage_above_100():
flexmock(module.random).should_receive('sample').replace_with(
lambda population, count: population[:count]
)
flexmock(module.os.path).should_receive('exists').and_return(True)
flexmock(module.borgmatic.execute).should_receive(
'execute_command_and_capture_output'
).with_args(('xxh64sum', '/foo', '/bar')).and_return('hash1 /foo\nhash2 /bar')
flexmock(module.borgmatic.borg.list).should_receive('capture_archive_listing').and_return(
['nothash1 /foo', 'nothash2 /bar']
)
assert module.compare_spot_check_hashes(
repository={'path': 'repo'},
archive='archive',
config={
'checks': [
{
'name': 'archives',
'frequency': '2 weeks',
},
{
'name': 'spot',
'data_sample_percentage': 1000,
},
]
},
local_borg_version=flexmock(),
global_arguments=flexmock(),
local_path=flexmock(),
remote_path=flexmock(),
log_label='repo',
source_paths=('/foo', '/bar'),
) == ('/foo', '/bar')
def test_compare_spot_check_hashes_uses_xxh64sum_command_option(): def test_compare_spot_check_hashes_uses_xxh64sum_command_option():
flexmock(module.random).should_receive('sample').replace_with( flexmock(module.random).should_receive('sample').replace_with(
lambda population, count: population[:count] lambda population, count: population[:count]