Allow before_backup and similiar hooks to exit with a soft failure without altering the monitoring status (#292).

This commit is contained in:
Dan Helfman 2020-06-02 14:33:41 -07:00
parent 6db232d4ac
commit 398665be9e
7 changed files with 82 additions and 44 deletions

4
NEWS
View file

@ -1,4 +1,8 @@
1.5.6.dev0
* #292: Allow before_backup and similiar hooks to exit with a soft failure without altering the
monitoring status on Healthchecks or other providers. Support this by waiting to ping monitoring
services with a "start" status until after before_* hooks finish. Failures in before_* hooks
still trigger a monitoring "fail" status.
* #316: Fix hang when a stale database dump named pipe from an aborted borgmatic run remains on
disk.
* Tweak comment indentation in generated configuration file for clarity.

View file

@ -59,11 +59,10 @@ def run_configuration(config_filename, config, arguments):
try:
if prune_create_or_check:
dispatch.call_hooks(
'ping_monitor',
'initialize_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.START,
monitoring_log_level,
global_arguments.dry_run,
)
@ -91,6 +90,16 @@ def run_configuration(config_filename, config, arguments):
'pre-check',
global_arguments.dry_run,
)
if prune_create_or_check:
dispatch.call_hooks(
'ping_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.START,
monitoring_log_level,
global_arguments.dry_run,
)
except (OSError, CalledProcessError) as error:
if command.considered_soft_failure(config_filename, error):
return
@ -123,6 +132,16 @@ def run_configuration(config_filename, config, arguments):
if not encountered_error:
try:
if prune_create_or_check:
dispatch.call_hooks(
'ping_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.FINISH,
monitoring_log_level,
global_arguments.dry_run,
)
if 'prune' in arguments:
command.execute_hook(
hooks.get('after_prune'),
@ -155,16 +174,6 @@ def run_configuration(config_filename, config, arguments):
'post-check',
global_arguments.dry_run,
)
if {'prune', 'create', 'check'}.intersection(arguments):
dispatch.call_hooks(
'ping_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.FINISH,
monitoring_log_level,
global_arguments.dry_run,
)
except (OSError, CalledProcessError) as error:
if command.considered_soft_failure(config_filename, error):
return
@ -176,6 +185,15 @@ def run_configuration(config_filename, config, arguments):
if encountered_error and prune_create_or_check:
try:
dispatch.call_hooks(
'ping_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.FAIL,
monitoring_log_level,
global_arguments.dry_run,
)
command.execute_hook(
hooks.get('on_error'),
hooks.get('umask'),
@ -186,15 +204,6 @@ def run_configuration(config_filename, config, arguments):
error=encountered_error,
output=getattr(encountered_error, 'output', ''),
)
dispatch.call_hooks(
'ping_monitor',
hooks,
config_filename,
monitor.MONITOR_HOOK_NAMES,
monitor.State.FAIL,
monitoring_log_level,
global_arguments.dry_run,
)
except (OSError, CalledProcessError) as error:
if command.considered_soft_failure(config_filename, error):
return

View file

@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONHUB = {
}
def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run):
'''
No initialization is necessary for this monitor.
'''
pass
def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run):
'''
Ping the given Cronhub URL, modified with the monitor.State. Use the given configuration

View file

@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONITOR = {
}
def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run):
'''
No initialization is necessary for this monitor.
'''
pass
def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run):
'''
Ping the given Cronitor URL, modified with the monitor.State. Use the given configuration

View file

@ -65,20 +65,22 @@ def format_buffered_logs_for_payload():
return payload
def initialize_monitor(ping_url_or_uuid, config_filename, monitoring_log_level, dry_run):
'''
Add a handler to the root logger that stores in memory the most recent logs emitted. That
way, we can send them all to Healthchecks upon a finish or failure state.
'''
logging.getLogger().addHandler(
Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level)
)
def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level, dry_run):
'''
Ping the given Healthchecks URL or UUID, modified with the monitor.State. Use the given
configuration filename in any log entries, and log to Healthchecks with the giving log level.
If this is a dry run, then don't actually ping anything.
'''
if state is monitor.State.START:
# Add a handler to the root logger that stores in memory the most recent logs emitted. That
# way, we can send them all to Healthchecks upon a finish or failure state.
logging.getLogger().addHandler(
Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level)
)
payload = ''
ping_url = (
ping_url_or_uuid
if ping_url_or_uuid.startswith('http')
@ -97,6 +99,8 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level,
if state in (monitor.State.FINISH, monitor.State.FAIL):
payload = format_buffered_logs_for_payload()
else:
payload = ''
if not dry_run:
logging.getLogger('urllib3').setLevel(logging.ERROR)

View file

@ -12,6 +12,13 @@ logger = logging.getLogger(__name__)
EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
def initialize_monitor(integration_key, config_filename, monitoring_log_level, dry_run):
'''
No initialization is necessary for this monitor.
'''
pass
def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run):
'''
If this is an error state, create a PagerDuty event with the given integration key. Use the

View file

@ -117,21 +117,21 @@ hooks:
```
With this hook in place, borgmatic pings your Healthchecks project when a
backup begins, ends, or errors. Specifically, before the <a
backup begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Healthchecks know that it has started if any of
the `prune`, `create`, or `check` actions are run.
Then, if the actions complete successfully, borgmatic notifies Healthchecks of
the success after the `after_backup` hooks run, and includes borgmatic logs in
the success before the `after_backup` hooks run, and includes borgmatic logs in
the payload data sent to Healthchecks. This means that borgmatic logs show up
in the Healthchecks UI, although be aware that Healthchecks currently has a
10-kilobyte limit for the logs in each ping.
If an error occurs during any action, borgmatic notifies Healthchecks after
the `on_error` hooks run, also tacking on logs including the error itself. But
the logs are only included for errors that occur when a `prune`, `create`, or
`check` action is run.
If an error occurs during any action or hook, borgmatic notifies Healthchecks
before the `on_error` hooks run, also tacking on logs including the error
itself. But the logs are only included for errors that occur when a `prune`,
`create`, or `check` action is run.
You can customize the verbosity of the logs that are sent to Healthchecks with
borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags
@ -157,13 +157,13 @@ hooks:
```
With this hook in place, borgmatic pings your Cronitor monitor when a backup
begins, ends, or errors. Specifically, before the <a
begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Cronitor know that it has started if any of the
`prune`, `create`, or `check` actions are run. Then, if the actions complete
successfully, borgmatic notifies Cronitor of the success after the
`after_backup` hooks run. And if an error occurs during any action, borgmatic
notifies Cronitor after the `on_error` hooks run.
successfully, borgmatic notifies Cronitor of the success before the
`after_backup` hooks run. And if an error occurs during any action or hook,
borgmatic notifies Cronitor before the `on_error` hooks run.
You can configure Cronitor to notify you by a [variety of
mechanisms](https://cronitor.io/docs/cron-job-notifications) when backups fail
@ -185,13 +185,13 @@ hooks:
```
With this hook in place, borgmatic pings your Cronhub monitor when a backup
begins, ends, or errors. Specifically, before the <a
begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Cronhub know that it has started if any of the
`prune`, `create`, or `check` actions are run. Then, if the actions complete
successfully, borgmatic notifies Cronhub of the success after the
`after_backup` hooks run. And if an error occurs during any action, borgmatic
notifies Cronhub after the `on_error` hooks run.
successfully, borgmatic notifies Cronhub of the success before the
`after_backup` hooks run. And if an error occurs during any action or hook,
borgmatic notifies Cronhub before the `on_error` hooks run.
Note that even though you configure borgmatic with the "start" variant of the
ping URL, borgmatic substitutes the correct state into the URL when pinging
@ -228,7 +228,7 @@ hooks:
With this hook in place, borgmatic creates a PagerDuty event for your service
whenever backups fail. Specifically, if an error occurs during a `create`,
`prune`, or `check` action, borgmatic sends an event to PagerDuty after the
`prune`, or `check` action, borgmatic sends an event to PagerDuty before the
`on_error` hooks run. Note that borgmatic does not contact PagerDuty when a
backup starts or ends without error.