Change connection failures for monitoring hooks to be warnings instead of errors (#439).

This commit is contained in:
Dan Helfman 2022-05-24 15:50:04 -07:00
parent 157e59ac88
commit 54933ebef5
11 changed files with 86 additions and 6 deletions

3
NEWS
View file

@ -3,6 +3,9 @@
logs to send to the Healthchecks server. logs to send to the Healthchecks server.
* #402: Remove the error when "archive_name_format" is specified but a retention prefix isn't. * #402: Remove the error when "archive_name_format" is specified but a retention prefix isn't.
* #420: Warn when an unsupported variable is used in a hook command. * #420: Warn when an unsupported variable is used in a hook command.
* #439: Change connection failures for monitoring hooks (Healthchecks, Cronitor, PagerDuty, and
Cronhub) to be warnings instead of errors. This way, the monitoring system failing does not block
backups.
* #460: Add Healthchecks monitoring hook "send_logs" option to enable/disable sending borgmatic * #460: Add Healthchecks monitoring hook "send_logs" option to enable/disable sending borgmatic
logs to the Healthchecks server. logs to the Healthchecks server.
* #525: Add Healthchecks monitoring hook "states" option to only enable pinging for particular * #525: Add Healthchecks monitoring hook "states" option to only enable pinging for particular

View file

@ -19,7 +19,7 @@ def interpolate_context(config_filename, hook_description, command, context):
command = command.replace('{%s}' % name, str(value)) command = command.replace('{%s}' % name, str(value))
for unsupported_variable in re.findall(r'{\w+}', command): for unsupported_variable in re.findall(r'{\w+}', command):
logger.warn( logger.warning(
f"{config_filename}: Variable '{unsupported_variable}' is not supported in {hook_description} hook" f"{config_filename}: Variable '{unsupported_variable}' is not supported in {hook_description} hook"
) )

View file

@ -42,7 +42,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
if not dry_run: if not dry_run:
logging.getLogger('urllib3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.get(ping_url) try:
requests.get(ping_url)
except requests.exceptions.RequestException as error:
logger.warning(f'{config_filename}: Cronhub error: {error}')
def destroy_monitor( def destroy_monitor(

View file

@ -37,7 +37,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
if not dry_run: if not dry_run:
logging.getLogger('urllib3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.get(ping_url) try:
requests.get(ping_url)
except requests.exceptions.RequestException as error:
logger.warning(f'{config_filename}: Cronitor error: {error}')
def destroy_monitor( def destroy_monitor(

View file

@ -124,7 +124,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
if not dry_run: if not dry_run:
logging.getLogger('urllib3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.post(ping_url, data=payload.encode('utf-8')) try:
requests.post(ping_url, data=payload.encode('utf-8'))
except requests.exceptions.RequestException as error:
logger.warning(f'{config_filename}: Healthchecks error: {error}')
def destroy_monitor(hook_config, config_filename, monitoring_log_level, dry_run): def destroy_monitor(hook_config, config_filename, monitoring_log_level, dry_run):

View file

@ -68,7 +68,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload)) logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload))
logging.getLogger('urllib3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.post(EVENTS_API_URL, data=payload.encode('utf-8')) try:
requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
except requests.exceptions.RequestException as error:
logger.warning(f'{config_filename}: PagerDuty error: {error}')
def destroy_monitor( def destroy_monitor(

View file

@ -159,7 +159,10 @@ itself. But the logs are only included for errors that occur when a `prune`,
You can customize the verbosity of the logs that are sent to Healthchecks with You can customize the verbosity of the logs that are sent to Healthchecks with
borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags
may also be of use. See `borgmatic --help` for more information. may also be of use. See `borgmatic --help` for more information. Additionally,
see the [borgmatic configuration
file](https://torsion.org/borgmatic/docs/reference/configuration/) for
additional Healthchecks options.
You can configure Healthchecks to notify you by a [variety of You can configure Healthchecks to notify you by a [variety of
mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail

View file

@ -58,3 +58,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url():
module.ping_monitor( module.ping_monitor(
hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True
) )
def test_ping_monitor_with_connection_error_does_not_raise():
hook_config = {'ping_url': 'https://example.com/start/abcdef'}
flexmock(module.requests).should_receive('get').and_raise(
module.requests.exceptions.ConnectionError
)
module.ping_monitor(
hook_config,
'config.yaml',
module.monitor.State.START,
monitoring_log_level=1,
dry_run=False,
)

View file

@ -45,3 +45,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url():
module.ping_monitor( module.ping_monitor(
hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True
) )
def test_ping_monitor_with_connection_error_does_not_raise():
hook_config = {'ping_url': 'https://example.com'}
flexmock(module.requests).should_receive('get').and_raise(
module.requests.exceptions.ConnectionError
)
module.ping_monitor(
hook_config,
'config.yaml',
module.monitor.State.START,
monitoring_log_level=1,
dry_run=False,
)

View file

@ -231,3 +231,20 @@ def test_ping_monitor_hits_ping_url_when_states_matching():
monitoring_log_level=1, monitoring_log_level=1,
dry_run=False, dry_run=False,
) )
def test_ping_monitor_with_connection_error_does_not_raise():
flexmock(module).should_receive('Forgetful_buffering_handler')
flexmock(module.logger).should_receive('warning')
hook_config = {'ping_url': 'https://example.com'}
flexmock(module.requests).should_receive('post').with_args(
'https://example.com/start', data=''.encode('utf-8')
).and_raise(module.requests.exceptions.ConnectionError)
module.ping_monitor(
hook_config,
'config.yaml',
state=module.monitor.State.START,
monitoring_log_level=1,
dry_run=False,
)

View file

@ -49,3 +49,18 @@ def test_ping_monitor_dry_run_does_not_call_api():
monitoring_log_level=1, monitoring_log_level=1,
dry_run=True, dry_run=True,
) )
def test_ping_monitor_with_connection_error_does_not_raise():
flexmock(module.requests).should_receive('post').and_raise(
module.requests.exceptions.ConnectionError
)
flexmock(module.logger).should_receive('warning')
module.ping_monitor(
{'integration_key': 'abc123'},
'config.yaml',
module.monitor.State.FAIL,
monitoring_log_level=1,
dry_run=False,
)