When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for completion and failure pings (#241).
This commit is contained in:
parent
26a89de790
commit
50f62d73b7
7 changed files with 198 additions and 36 deletions
7
NEWS
7
NEWS
|
@ -1,3 +1,10 @@
|
|||
1.4.11
|
||||
* #241: When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for
|
||||
completion and failure pings.
|
||||
* With --verbosity level 1 or 2, show error logs both inline when they occur and in the summary
|
||||
logs at the bottom. With lower verbosity levels, suppress the summary and show error logs when
|
||||
they occur.
|
||||
|
||||
1.4.10
|
||||
* #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
|
||||
when no databases are configured to restore. As this can overwrite files from the archive and
|
||||
|
|
|
@ -365,33 +365,36 @@ def load_configurations(config_filenames):
|
|||
return (configs, logs)
|
||||
|
||||
|
||||
def log_record(**kwargs):
|
||||
'''
|
||||
Create a log record based on the given makeLogRecord() arguments, one of which must be
|
||||
named "levelno". Log the record and return it.
|
||||
'''
|
||||
record = logging.makeLogRecord(kwargs)
|
||||
logger.handle(record)
|
||||
|
||||
return record
|
||||
|
||||
|
||||
def make_error_log_records(message, error=None):
|
||||
'''
|
||||
Given error message text and an optional exception object, yield a series of logging.LogRecord
|
||||
instances with error summary information.
|
||||
instances with error summary information. As a side effect, log each record.
|
||||
'''
|
||||
if not error:
|
||||
yield logging.makeLogRecord(
|
||||
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
)
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
return
|
||||
|
||||
try:
|
||||
raise error
|
||||
except CalledProcessError as error:
|
||||
yield logging.makeLogRecord(
|
||||
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
)
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
if error.output:
|
||||
yield logging.makeLogRecord(
|
||||
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
|
||||
)
|
||||
yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
|
||||
except (ValueError, OSError) as error:
|
||||
yield logging.makeLogRecord(
|
||||
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
)
|
||||
yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
|
||||
yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
|
||||
except: # noqa: E722
|
||||
# Raising above only as a means of determining the error type. Swallow the exception here
|
||||
# because we don't want the exception to propagate out of this function.
|
||||
|
@ -535,13 +538,14 @@ def main(): # pragma: no cover
|
|||
|
||||
summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))
|
||||
|
||||
logger.info('')
|
||||
logger.info('summary:')
|
||||
[
|
||||
logger.handle(log)
|
||||
for log in parse_logs + summary_logs
|
||||
if log.levelno >= logger.getEffectiveLevel()
|
||||
]
|
||||
if logger.getEffectiveLevel() < logging.WARNING:
|
||||
logger.info('')
|
||||
logger.info('summary:')
|
||||
[
|
||||
logger.handle(log)
|
||||
for log in parse_logs + summary_logs
|
||||
if log.levelno >= logger.getEffectiveLevel()
|
||||
]
|
||||
|
||||
if any(log.levelno == logging.CRITICAL for log in summary_logs):
|
||||
exit_with_help_link()
|
||||
|
|
|
@ -12,6 +12,58 @@ MONITOR_STATE_TO_HEALTHCHECKS = {
|
|||
monitor.State.FAIL: 'fail',
|
||||
}
|
||||
|
||||
PAYLOAD_TRUNCATION_INDICATOR = '...\n'
|
||||
PAYLOAD_LIMIT_BYTES = 10 * 1024 - len(PAYLOAD_TRUNCATION_INDICATOR)
|
||||
|
||||
|
||||
class Forgetful_buffering_handler(logging.Handler):
|
||||
'''
|
||||
A buffering log handler that stores log messages in memory, and throws away messages (oldest
|
||||
first) once a particular capacity in bytes is reached.
|
||||
'''
|
||||
|
||||
def __init__(self, byte_capacity):
|
||||
super().__init__()
|
||||
|
||||
self.byte_capacity = byte_capacity
|
||||
self.byte_count = 0
|
||||
self.buffer = []
|
||||
self.forgot = False
|
||||
|
||||
def emit(self, record):
|
||||
message = record.getMessage() + '\n'
|
||||
self.byte_count += len(message)
|
||||
self.buffer.append(message)
|
||||
|
||||
while self.byte_count > self.byte_capacity and self.buffer:
|
||||
self.byte_count -= len(self.buffer[0])
|
||||
self.buffer.pop(0)
|
||||
self.forgot = True
|
||||
|
||||
|
||||
def format_buffered_logs_for_payload():
|
||||
'''
|
||||
Get the handler previously added to the root logger, and slurp buffered logs out of it to
|
||||
send to Healthchecks.
|
||||
'''
|
||||
try:
|
||||
buffering_handler = next(
|
||||
handler
|
||||
for handler in logging.getLogger().handlers
|
||||
if isinstance(handler, Forgetful_buffering_handler)
|
||||
)
|
||||
except StopIteration:
|
||||
payload = 'Cannot find the log handler for sending logs to Healthchecks'
|
||||
logger.warning(payload)
|
||||
return payload
|
||||
|
||||
payload = ''.join(message for message in buffering_handler.buffer)
|
||||
|
||||
if buffering_handler.forgot:
|
||||
return PAYLOAD_TRUNCATION_INDICATOR + payload
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
|
||||
'''
|
||||
|
@ -19,6 +71,12 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
|
|||
configuration filename in any log entries. If this is a dry run, then don't actually ping
|
||||
anything.
|
||||
'''
|
||||
if state is monitor.State.START:
|
||||
# Add a handler to the root logger that stores in memory the most recent logs emitted. That
|
||||
# way, we can send them all to Healthchecks upon a finish or failure state.
|
||||
logging.getLogger().addHandler(Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES))
|
||||
payload = ''
|
||||
|
||||
ping_url = (
|
||||
ping_url_or_uuid
|
||||
if ping_url_or_uuid.startswith('http')
|
||||
|
@ -35,6 +93,9 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
|
|||
)
|
||||
logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))
|
||||
|
||||
if state in (monitor.State.FINISH, monitor.State.FAIL):
|
||||
payload = format_buffered_logs_for_payload()
|
||||
|
||||
if not dry_run:
|
||||
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||
requests.get(ping_url)
|
||||
requests.post(ping_url, data=payload)
|
||||
|
|
|
@ -116,9 +116,19 @@ With this hook in place, borgmatic pings your Healthchecks project when a
|
|||
backup begins, ends, or errors. Specifically, before the <a
|
||||
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
|
||||
hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
|
||||
|
||||
Then, if the backup completes successfully, borgmatic notifies Healthchecks of
|
||||
the success after the `after_backup` hooks run. And if an error occurs during
|
||||
the backup, borgmatic notifies Healthchecks after the `on_error` hooks run.
|
||||
the success after the `after_backup` hooks run, and includes borgmatic logs in
|
||||
the payload data sent to Healthchecks. This means that borgmatic logs show up
|
||||
in the Healthchecks UI, although be aware that Healthchecks currently has a
|
||||
10-kilobyte limit for the logs in each ping.
|
||||
|
||||
If an error occurs during the backup, borgmatic notifies Healthchecks after
|
||||
the `on_error` hooks run, also tacking on logs including the error itself.
|
||||
|
||||
Note that borgmatic sends logs to Healthchecks by applying the maximum of any
|
||||
other borgmatic verbosity level (`--verbosity`, `--syslog-verbosity`, etc.),
|
||||
as there is not currently a dedicated Healthchecks verbosity setting.
|
||||
|
||||
You can configure Healthchecks to notify you by a [variety of
|
||||
mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail
|
||||
|
|
2
setup.py
2
setup.py
|
@ -1,6 +1,6 @@
|
|||
from setuptools import find_packages, setup
|
||||
|
||||
VERSION = '1.4.10'
|
||||
VERSION = '1.4.11'
|
||||
|
||||
|
||||
setup(
|
||||
|
|
|
@ -115,36 +115,50 @@ def test_load_configurations_logs_critical_for_parse_error():
|
|||
assert {log.levelno for log in logs} == {logging.CRITICAL}
|
||||
|
||||
|
||||
def test_log_record_does_not_raise():
|
||||
module.log_record(levelno=1, foo='bar', baz='quux')
|
||||
|
||||
|
||||
def test_make_error_log_records_generates_output_logs_for_message_only():
|
||||
flexmock(module).should_receive('log_record').replace_with(dict)
|
||||
|
||||
logs = tuple(module.make_error_log_records('Error'))
|
||||
|
||||
assert {log.levelno for log in logs} == {logging.CRITICAL}
|
||||
assert {log['levelno'] for log in logs} == {logging.CRITICAL}
|
||||
|
||||
|
||||
def test_make_error_log_records_generates_output_logs_for_called_process_error():
|
||||
flexmock(module).should_receive('log_record').replace_with(dict)
|
||||
|
||||
logs = tuple(
|
||||
module.make_error_log_records(
|
||||
'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
|
||||
)
|
||||
)
|
||||
|
||||
assert {log.levelno for log in logs} == {logging.CRITICAL}
|
||||
assert {log['levelno'] for log in logs} == {logging.CRITICAL}
|
||||
assert any(log for log in logs if 'error output' in str(log))
|
||||
|
||||
|
||||
def test_make_error_log_records_generates_logs_for_value_error():
|
||||
flexmock(module).should_receive('log_record').replace_with(dict)
|
||||
|
||||
logs = tuple(module.make_error_log_records('Error', ValueError()))
|
||||
|
||||
assert {log.levelno for log in logs} == {logging.CRITICAL}
|
||||
assert {log['levelno'] for log in logs} == {logging.CRITICAL}
|
||||
|
||||
|
||||
def test_make_error_log_records_generates_logs_for_os_error():
|
||||
flexmock(module).should_receive('log_record').replace_with(dict)
|
||||
|
||||
logs = tuple(module.make_error_log_records('Error', OSError()))
|
||||
|
||||
assert {log.levelno for log in logs} == {logging.CRITICAL}
|
||||
assert {log['levelno'] for log in logs} == {logging.CRITICAL}
|
||||
|
||||
|
||||
def test_make_error_log_records_generates_nothing_for_other_error():
|
||||
flexmock(module).should_receive('log_record').replace_with(dict)
|
||||
|
||||
logs = tuple(module.make_error_log_records('Error', KeyError()))
|
||||
|
||||
assert logs == ()
|
||||
|
@ -268,6 +282,7 @@ def test_collect_configuration_run_summary_logs_run_configuration_error():
|
|||
flexmock(module).should_receive('run_configuration').and_return(
|
||||
[logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
|
||||
)
|
||||
flexmock(module).should_receive('make_error_log_records').and_return([])
|
||||
arguments = {}
|
||||
|
||||
logs = tuple(
|
||||
|
|
|
@ -3,38 +3,103 @@ from flexmock import flexmock
|
|||
from borgmatic.hooks import healthchecks as module
|
||||
|
||||
|
||||
def test_forgetful_buffering_handler_emit_collects_log_records():
|
||||
handler = module.Forgetful_buffering_handler(byte_capacity=100)
|
||||
handler.emit(flexmock(getMessage=lambda: 'foo'))
|
||||
handler.emit(flexmock(getMessage=lambda: 'bar'))
|
||||
|
||||
assert handler.buffer == ['foo\n', 'bar\n']
|
||||
assert not handler.forgot
|
||||
|
||||
|
||||
def test_forgetful_buffering_handler_emit_forgets_log_records_when_capacity_reached():
|
||||
handler = module.Forgetful_buffering_handler(byte_capacity=len('foo\nbar\n'))
|
||||
handler.emit(flexmock(getMessage=lambda: 'foo'))
|
||||
assert handler.buffer == ['foo\n']
|
||||
handler.emit(flexmock(getMessage=lambda: 'bar'))
|
||||
assert handler.buffer == ['foo\n', 'bar\n']
|
||||
handler.emit(flexmock(getMessage=lambda: 'baz'))
|
||||
assert handler.buffer == ['bar\n', 'baz\n']
|
||||
handler.emit(flexmock(getMessage=lambda: 'quux'))
|
||||
assert handler.buffer == ['quux\n']
|
||||
assert handler.forgot
|
||||
|
||||
|
||||
def test_format_buffered_logs_for_payload_flattens_log_buffer():
|
||||
handler = module.Forgetful_buffering_handler(byte_capacity=100)
|
||||
handler.buffer = ['foo\n', 'bar\n']
|
||||
flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
|
||||
|
||||
payload = module.format_buffered_logs_for_payload()
|
||||
|
||||
assert payload == 'foo\nbar\n'
|
||||
|
||||
|
||||
def test_format_buffered_logs_for_payload_inserts_truncation_indicator_when_logs_forgotten():
|
||||
handler = module.Forgetful_buffering_handler(byte_capacity=100)
|
||||
handler.buffer = ['foo\n', 'bar\n']
|
||||
handler.forgot = True
|
||||
flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
|
||||
|
||||
payload = module.format_buffered_logs_for_payload()
|
||||
|
||||
assert payload == '...\nfoo\nbar\n'
|
||||
|
||||
|
||||
def test_format_buffered_logs_for_payload_without_handler_produces_payload_anyway():
|
||||
flexmock(module.logging).should_receive('getLogger').and_return(
|
||||
flexmock(handlers=[module.logging.Handler()])
|
||||
)
|
||||
|
||||
payload = module.format_buffered_logs_for_payload()
|
||||
|
||||
assert payload
|
||||
|
||||
|
||||
def test_ping_monitor_hits_ping_url_for_start_state():
|
||||
flexmock(module).should_receive('Forgetful_buffering_handler')
|
||||
ping_url = 'https://example.com'
|
||||
flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'start'))
|
||||
flexmock(module.requests).should_receive('post').with_args(
|
||||
'{}/{}'.format(ping_url, 'start'), data=''
|
||||
)
|
||||
|
||||
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)
|
||||
|
||||
|
||||
def test_ping_monitor_hits_ping_url_for_finish_state():
|
||||
ping_url = 'https://example.com'
|
||||
flexmock(module.requests).should_receive('get').with_args(ping_url)
|
||||
payload = flexmock()
|
||||
flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
|
||||
flexmock(module.requests).should_receive('post').with_args(ping_url, data=payload)
|
||||
|
||||
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
|
||||
|
||||
|
||||
def test_ping_monitor_hits_ping_url_for_fail_state():
|
||||
ping_url = 'https://example.com'
|
||||
flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'fail'))
|
||||
payload = flexmock()
|
||||
flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
|
||||
flexmock(module.requests).should_receive('post').with_args(
|
||||
'{}/{}'.format(ping_url, 'fail'), data=payload
|
||||
)
|
||||
|
||||
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)
|
||||
|
||||
|
||||
def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
|
||||
ping_uuid = 'abcd-efgh-ijkl-mnop'
|
||||
flexmock(module.requests).should_receive('get').with_args(
|
||||
'https://hc-ping.com/{}'.format(ping_uuid)
|
||||
payload = flexmock()
|
||||
flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
|
||||
flexmock(module.requests).should_receive('post').with_args(
|
||||
'https://hc-ping.com/{}'.format(ping_uuid), data=payload
|
||||
)
|
||||
|
||||
module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
|
||||
|
||||
|
||||
def test_ping_monitor_dry_run_does_not_hit_ping_url():
|
||||
flexmock(module).should_receive('Forgetful_buffering_handler')
|
||||
ping_url = 'https://example.com'
|
||||
flexmock(module.requests).should_receive('get').never()
|
||||
flexmock(module.requests).should_receive('post').never()
|
||||
|
||||
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)
|
||||
|
|
Loading…
Reference in a new issue