When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for completion and failure pings (#241).

This commit is contained in:
Dan Helfman 2019-11-17 16:54:27 -08:00
parent 26a89de790
commit 50f62d73b7
7 changed files with 198 additions and 36 deletions

7
NEWS
View file

@ -1,3 +1,10 @@
1.4.11
* #241: When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for
completion and failure pings.
* With --verbosity level 1 or 2, show error logs both inline when they occur and in the summary
logs at the bottom. With lower verbosity levels, suppress the summary and show error logs when
they occur.
1.4.10 1.4.10
* #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even * #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
when no databases are configured to restore. As this can overwrite files from the archive and when no databases are configured to restore. As this can overwrite files from the archive and

View file

@ -365,33 +365,36 @@ def load_configurations(config_filenames):
return (configs, logs) return (configs, logs)
def log_record(**kwargs):
'''
Create a log record based on the given makeLogRecord() arguments, one of which must be
named "levelno". Log the record and return it.
'''
record = logging.makeLogRecord(kwargs)
logger.handle(record)
return record
def make_error_log_records(message, error=None): def make_error_log_records(message, error=None):
''' '''
Given error message text and an optional exception object, yield a series of logging.LogRecord Given error message text and an optional exception object, yield a series of logging.LogRecord
instances with error summary information. instances with error summary information. As a side effect, log each record.
''' '''
if not error: if not error:
yield logging.makeLogRecord( yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
)
return return
try: try:
raise error raise error
except CalledProcessError as error: except CalledProcessError as error:
yield logging.makeLogRecord( yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
)
if error.output: if error.output:
yield logging.makeLogRecord( yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output) yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
)
yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
except (ValueError, OSError) as error: except (ValueError, OSError) as error:
yield logging.makeLogRecord( yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message) yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
)
yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
except: # noqa: E722 except: # noqa: E722
# Raising above only as a means of determining the error type. Swallow the exception here # Raising above only as a means of determining the error type. Swallow the exception here
# because we don't want the exception to propagate out of this function. # because we don't want the exception to propagate out of this function.
@ -535,13 +538,14 @@ def main(): # pragma: no cover
summary_logs = list(collect_configuration_run_summary_logs(configs, arguments)) summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))
logger.info('') if logger.getEffectiveLevel() < logging.WARNING:
logger.info('summary:') logger.info('')
[ logger.info('summary:')
logger.handle(log) [
for log in parse_logs + summary_logs logger.handle(log)
if log.levelno >= logger.getEffectiveLevel() for log in parse_logs + summary_logs
] if log.levelno >= logger.getEffectiveLevel()
]
if any(log.levelno == logging.CRITICAL for log in summary_logs): if any(log.levelno == logging.CRITICAL for log in summary_logs):
exit_with_help_link() exit_with_help_link()

View file

@ -12,6 +12,58 @@ MONITOR_STATE_TO_HEALTHCHECKS = {
monitor.State.FAIL: 'fail', monitor.State.FAIL: 'fail',
} }
PAYLOAD_TRUNCATION_INDICATOR = '...\n'
PAYLOAD_LIMIT_BYTES = 10 * 1024 - len(PAYLOAD_TRUNCATION_INDICATOR)
class Forgetful_buffering_handler(logging.Handler):
'''
A buffering log handler that stores log messages in memory, and throws away messages (oldest
first) once a particular capacity in bytes is reached.
'''
def __init__(self, byte_capacity):
super().__init__()
self.byte_capacity = byte_capacity
self.byte_count = 0
self.buffer = []
self.forgot = False
def emit(self, record):
message = record.getMessage() + '\n'
self.byte_count += len(message)
self.buffer.append(message)
while self.byte_count > self.byte_capacity and self.buffer:
self.byte_count -= len(self.buffer[0])
self.buffer.pop(0)
self.forgot = True
def format_buffered_logs_for_payload():
'''
Get the handler previously added to the root logger, and slurp buffered logs out of it to
send to Healthchecks.
'''
try:
buffering_handler = next(
handler
for handler in logging.getLogger().handlers
if isinstance(handler, Forgetful_buffering_handler)
)
except StopIteration:
payload = 'Cannot find the log handler for sending logs to Healthchecks'
logger.warning(payload)
return payload
payload = ''.join(message for message in buffering_handler.buffer)
if buffering_handler.forgot:
return PAYLOAD_TRUNCATION_INDICATOR + payload
return payload
def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run): def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
''' '''
@ -19,6 +71,12 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
configuration filename in any log entries. If this is a dry run, then don't actually ping configuration filename in any log entries. If this is a dry run, then don't actually ping
anything. anything.
''' '''
if state is monitor.State.START:
# Add a handler to the root logger that stores in memory the most recent logs emitted. That
# way, we can send them all to Healthchecks upon a finish or failure state.
logging.getLogger().addHandler(Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES))
payload = ''
ping_url = ( ping_url = (
ping_url_or_uuid ping_url_or_uuid
if ping_url_or_uuid.startswith('http') if ping_url_or_uuid.startswith('http')
@ -35,6 +93,9 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
) )
logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url)) logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))
if state in (monitor.State.FINISH, monitor.State.FAIL):
payload = format_buffered_logs_for_payload()
if not dry_run: if not dry_run:
logging.getLogger('urllib3').setLevel(logging.ERROR) logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.get(ping_url) requests.post(ping_url, data=payload)

View file

@ -116,9 +116,19 @@ With this hook in place, borgmatic pings your Healthchecks project when a
backup begins, ends, or errors. Specifically, before the <a backup begins, ends, or errors. Specifically, before the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup` href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Healthchecks know that a backup has started. hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
Then, if the backup completes successfully, borgmatic notifies Healthchecks of Then, if the backup completes successfully, borgmatic notifies Healthchecks of
the success after the `after_backup` hooks run. And if an error occurs during the success after the `after_backup` hooks run, and includes borgmatic logs in
the backup, borgmatic notifies Healthchecks after the `on_error` hooks run. the payload data sent to Healthchecks. This means that borgmatic logs show up
in the Healthchecks UI, although be aware that Healthchecks currently has a
10-kilobyte limit for the logs in each ping.
If an error occurs during the backup, borgmatic notifies Healthchecks after
the `on_error` hooks run, also tacking on logs including the error itself.
Note that borgmatic sends logs to Healthchecks by applying the maximum of any
other borgmatic verbosity level (`--verbosity`, `--syslog-verbosity`, etc.),
as there is not currently a dedicated Healthchecks verbosity setting.
You can configure Healthchecks to notify you by a [variety of You can configure Healthchecks to notify you by a [variety of
mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail

View file

@ -1,6 +1,6 @@
from setuptools import find_packages, setup from setuptools import find_packages, setup
VERSION = '1.4.10' VERSION = '1.4.11'
setup( setup(

View file

@ -115,36 +115,50 @@ def test_load_configurations_logs_critical_for_parse_error():
assert {log.levelno for log in logs} == {logging.CRITICAL} assert {log.levelno for log in logs} == {logging.CRITICAL}
def test_log_record_does_not_raise():
module.log_record(levelno=1, foo='bar', baz='quux')
def test_make_error_log_records_generates_output_logs_for_message_only(): def test_make_error_log_records_generates_output_logs_for_message_only():
flexmock(module).should_receive('log_record').replace_with(dict)
logs = tuple(module.make_error_log_records('Error')) logs = tuple(module.make_error_log_records('Error'))
assert {log.levelno for log in logs} == {logging.CRITICAL} assert {log['levelno'] for log in logs} == {logging.CRITICAL}
def test_make_error_log_records_generates_output_logs_for_called_process_error(): def test_make_error_log_records_generates_output_logs_for_called_process_error():
flexmock(module).should_receive('log_record').replace_with(dict)
logs = tuple( logs = tuple(
module.make_error_log_records( module.make_error_log_records(
'Error', subprocess.CalledProcessError(1, 'ls', 'error output') 'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
) )
) )
assert {log.levelno for log in logs} == {logging.CRITICAL} assert {log['levelno'] for log in logs} == {logging.CRITICAL}
assert any(log for log in logs if 'error output' in str(log)) assert any(log for log in logs if 'error output' in str(log))
def test_make_error_log_records_generates_logs_for_value_error(): def test_make_error_log_records_generates_logs_for_value_error():
flexmock(module).should_receive('log_record').replace_with(dict)
logs = tuple(module.make_error_log_records('Error', ValueError())) logs = tuple(module.make_error_log_records('Error', ValueError()))
assert {log.levelno for log in logs} == {logging.CRITICAL} assert {log['levelno'] for log in logs} == {logging.CRITICAL}
def test_make_error_log_records_generates_logs_for_os_error(): def test_make_error_log_records_generates_logs_for_os_error():
flexmock(module).should_receive('log_record').replace_with(dict)
logs = tuple(module.make_error_log_records('Error', OSError())) logs = tuple(module.make_error_log_records('Error', OSError()))
assert {log.levelno for log in logs} == {logging.CRITICAL} assert {log['levelno'] for log in logs} == {logging.CRITICAL}
def test_make_error_log_records_generates_nothing_for_other_error(): def test_make_error_log_records_generates_nothing_for_other_error():
flexmock(module).should_receive('log_record').replace_with(dict)
logs = tuple(module.make_error_log_records('Error', KeyError())) logs = tuple(module.make_error_log_records('Error', KeyError()))
assert logs == () assert logs == ()
@ -268,6 +282,7 @@ def test_collect_configuration_run_summary_logs_run_configuration_error():
flexmock(module).should_receive('run_configuration').and_return( flexmock(module).should_receive('run_configuration').and_return(
[logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))] [logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
) )
flexmock(module).should_receive('make_error_log_records').and_return([])
arguments = {} arguments = {}
logs = tuple( logs = tuple(

View file

@ -3,38 +3,103 @@ from flexmock import flexmock
from borgmatic.hooks import healthchecks as module from borgmatic.hooks import healthchecks as module
def test_forgetful_buffering_handler_emit_collects_log_records():
handler = module.Forgetful_buffering_handler(byte_capacity=100)
handler.emit(flexmock(getMessage=lambda: 'foo'))
handler.emit(flexmock(getMessage=lambda: 'bar'))
assert handler.buffer == ['foo\n', 'bar\n']
assert not handler.forgot
def test_forgetful_buffering_handler_emit_forgets_log_records_when_capacity_reached():
handler = module.Forgetful_buffering_handler(byte_capacity=len('foo\nbar\n'))
handler.emit(flexmock(getMessage=lambda: 'foo'))
assert handler.buffer == ['foo\n']
handler.emit(flexmock(getMessage=lambda: 'bar'))
assert handler.buffer == ['foo\n', 'bar\n']
handler.emit(flexmock(getMessage=lambda: 'baz'))
assert handler.buffer == ['bar\n', 'baz\n']
handler.emit(flexmock(getMessage=lambda: 'quux'))
assert handler.buffer == ['quux\n']
assert handler.forgot
def test_format_buffered_logs_for_payload_flattens_log_buffer():
handler = module.Forgetful_buffering_handler(byte_capacity=100)
handler.buffer = ['foo\n', 'bar\n']
flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
payload = module.format_buffered_logs_for_payload()
assert payload == 'foo\nbar\n'
def test_format_buffered_logs_for_payload_inserts_truncation_indicator_when_logs_forgotten():
handler = module.Forgetful_buffering_handler(byte_capacity=100)
handler.buffer = ['foo\n', 'bar\n']
handler.forgot = True
flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
payload = module.format_buffered_logs_for_payload()
assert payload == '...\nfoo\nbar\n'
def test_format_buffered_logs_for_payload_without_handler_produces_payload_anyway():
flexmock(module.logging).should_receive('getLogger').and_return(
flexmock(handlers=[module.logging.Handler()])
)
payload = module.format_buffered_logs_for_payload()
assert payload
def test_ping_monitor_hits_ping_url_for_start_state(): def test_ping_monitor_hits_ping_url_for_start_state():
flexmock(module).should_receive('Forgetful_buffering_handler')
ping_url = 'https://example.com' ping_url = 'https://example.com'
flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'start')) flexmock(module.requests).should_receive('post').with_args(
'{}/{}'.format(ping_url, 'start'), data=''
)
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False) module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)
def test_ping_monitor_hits_ping_url_for_finish_state(): def test_ping_monitor_hits_ping_url_for_finish_state():
ping_url = 'https://example.com' ping_url = 'https://example.com'
flexmock(module.requests).should_receive('get').with_args(ping_url) payload = flexmock()
flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
flexmock(module.requests).should_receive('post').with_args(ping_url, data=payload)
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False) module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
def test_ping_monitor_hits_ping_url_for_fail_state(): def test_ping_monitor_hits_ping_url_for_fail_state():
ping_url = 'https://example.com' ping_url = 'https://example.com'
flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'fail')) payload = flexmock()
flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
flexmock(module.requests).should_receive('post').with_args(
'{}/{}'.format(ping_url, 'fail'), data=payload
)
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False) module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)
def test_ping_monitor_with_ping_uuid_hits_corresponding_url(): def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
ping_uuid = 'abcd-efgh-ijkl-mnop' ping_uuid = 'abcd-efgh-ijkl-mnop'
flexmock(module.requests).should_receive('get').with_args( payload = flexmock()
'https://hc-ping.com/{}'.format(ping_uuid) flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
flexmock(module.requests).should_receive('post').with_args(
'https://hc-ping.com/{}'.format(ping_uuid), data=payload
) )
module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False) module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
def test_ping_monitor_dry_run_does_not_hit_ping_url(): def test_ping_monitor_dry_run_does_not_hit_ping_url():
flexmock(module).should_receive('Forgetful_buffering_handler')
ping_url = 'https://example.com' ping_url = 'https://example.com'
flexmock(module.requests).should_receive('get').never() flexmock(module.requests).should_receive('post').never()
module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True) module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)