When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for completion and failure pings (#241).

2019-11-17 16:54:27 -08:00 · 2019-11-17 16:54:27 -08:00 · 50f62d73b7
commit 50f62d73b7
parent 26a89de790
7 changed files with 198 additions and 36 deletions
--- a/7
+++ b/7
@ -1,3 +1,10 @@
+1.4.11
+ * #241: When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for
+   completion and failure pings.
+ * With --verbosity level 1 or 2, show error logs both inline when they occur and in the summary
+   logs at the bottom. With lower verbosity levels, suppress the summary and show error logs when
+   they occur.
+
 1.4.10
 * #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
   when no databases are configured to restore. As this can overwrite files from the archive and
--- a/borgmatic/commands/borgmatic.py
+++ b/borgmatic/commands/borgmatic.py
@ -365,33 +365,36 @@ def load_configurations(config_filenames):
    return (configs, logs)


+def log_record(**kwargs):
+    '''
+    Create a log record based on the given makeLogRecord() arguments, one of which must be
+    named "levelno". Log the record and return it.
+    '''
+    record = logging.makeLogRecord(kwargs)
+    logger.handle(record)
+
+    return record
+
+
 def make_error_log_records(message, error=None):
    '''
    Given error message text and an optional exception object, yield a series of logging.LogRecord
-    instances with error summary information.
+    instances with error summary information. As a side effect, log each record.
    '''
    if not error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
        return

    try:
        raise error
    except CalledProcessError as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
        if error.output:
-            yield logging.makeLogRecord(
-                dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
-            )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+            yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
    except (ValueError, OSError) as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
    except:  # noqa: E722
        # Raising above only as a means of determining the error type. Swallow the exception here
        # because we don't want the exception to propagate out of this function.
@ -535,13 +538,14 @@ def main():  # pragma: no cover

    summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))

-    logger.info('')
-    logger.info('summary:')
-    [
-        logger.handle(log)
-        for log in parse_logs + summary_logs
-        if log.levelno >= logger.getEffectiveLevel()
-    ]
+    if logger.getEffectiveLevel() < logging.WARNING:
+        logger.info('')
+        logger.info('summary:')
+        [
+            logger.handle(log)
+            for log in parse_logs + summary_logs
+            if log.levelno >= logger.getEffectiveLevel()
+        ]

    if any(log.levelno == logging.CRITICAL for log in summary_logs):
        exit_with_help_link()
--- a/borgmatic/hooks/healthchecks.py
+++ b/borgmatic/hooks/healthchecks.py
@ -12,6 +12,58 @@ MONITOR_STATE_TO_HEALTHCHECKS = {
    monitor.State.FAIL: 'fail',
 }

+PAYLOAD_TRUNCATION_INDICATOR = '...\n'
+PAYLOAD_LIMIT_BYTES = 10 * 1024 - len(PAYLOAD_TRUNCATION_INDICATOR)
+
+
+class Forgetful_buffering_handler(logging.Handler):
+    '''
+    A buffering log handler that stores log messages in memory, and throws away messages (oldest
+    first) once a particular capacity in bytes is reached.
+    '''
+
+    def __init__(self, byte_capacity):
+        super().__init__()
+
+        self.byte_capacity = byte_capacity
+        self.byte_count = 0
+        self.buffer = []
+        self.forgot = False
+
+    def emit(self, record):
+        message = record.getMessage() + '\n'
+        self.byte_count += len(message)
+        self.buffer.append(message)
+
+        while self.byte_count > self.byte_capacity and self.buffer:
+            self.byte_count -= len(self.buffer[0])
+            self.buffer.pop(0)
+            self.forgot = True
+
+
+def format_buffered_logs_for_payload():
+    '''
+    Get the handler previously added to the root logger, and slurp buffered logs out of it to
+    send to Healthchecks.
+    '''
+    try:
+        buffering_handler = next(
+            handler
+            for handler in logging.getLogger().handlers
+            if isinstance(handler, Forgetful_buffering_handler)
+        )
+    except StopIteration:
+        payload = 'Cannot find the log handler for sending logs to Healthchecks'
+        logger.warning(payload)
+        return payload
+
+    payload = ''.join(message for message in buffering_handler.buffer)
+
+    if buffering_handler.forgot:
+        return PAYLOAD_TRUNCATION_INDICATOR + payload
+
+    return payload
+

 def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
    '''
@ -19,6 +71,12 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
    configuration filename in any log entries. If this is a dry run, then don't actually ping
    anything.
    '''
+    if state is monitor.State.START:
+        # Add a handler to the root logger that stores in memory the most recent logs emitted. That
+        # way, we can send them all to Healthchecks upon a finish or failure state.
+        logging.getLogger().addHandler(Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES))
+        payload = ''
+
    ping_url = (
        ping_url_or_uuid
        if ping_url_or_uuid.startswith('http')
@ -35,6 +93,9 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
    )
    logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))

+    if state in (monitor.State.FINISH, monitor.State.FAIL):
+        payload = format_buffered_logs_for_payload()
+
    if not dry_run:
        logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.get(ping_url)
+        requests.post(ping_url, data=payload)
--- a/docs/how-to/monitor-your-backups.md
+++ b/docs/how-to/monitor-your-backups.md
@ -116,9 +116,19 @@ With this hook in place, borgmatic pings your Healthchecks project when a
 backup begins, ends, or errors. Specifically, before the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
+
 Then, if the backup completes successfully, borgmatic notifies Healthchecks of
-the success after the `after_backup` hooks run. And if an error occurs during
-the backup, borgmatic notifies Healthchecks after the `on_error` hooks run.
+the success after the `after_backup` hooks run, and includes borgmatic logs in
+the payload data sent to Healthchecks. This means that borgmatic logs show up
+in the Healthchecks UI, although be aware that Healthchecks currently has a
+10-kilobyte limit for the logs in each ping.
+
+If an error occurs during the backup, borgmatic notifies Healthchecks after
+the `on_error` hooks run, also tacking on logs including the error itself.
+
+Note that borgmatic sends logs to Healthchecks by applying the maximum of any
+other borgmatic verbosity level (`--verbosity`, `--syslog-verbosity`, etc.),
+as there is not currently a dedicated Healthchecks verbosity setting.

 You can configure Healthchecks to notify you by a [variety of
 mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail
--- a/setup.py
+++ b/setup.py
@ -1,6 +1,6 @@
 from setuptools import find_packages, setup

-VERSION = '1.4.10'
+VERSION = '1.4.11'


 setup(
--- a/tests/unit/commands/test_borgmatic.py
+++ b/tests/unit/commands/test_borgmatic.py
@ -115,36 +115,50 @@ def test_load_configurations_logs_critical_for_parse_error():
    assert {log.levelno for log in logs} == {logging.CRITICAL}


+def test_log_record_does_not_raise():
+    module.log_record(levelno=1, foo='bar', baz='quux')
+
+
 def test_make_error_log_records_generates_output_logs_for_message_only():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
    logs = tuple(module.make_error_log_records('Error'))

-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}


 def test_make_error_log_records_generates_output_logs_for_called_process_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
    logs = tuple(
        module.make_error_log_records(
            'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
        )
    )

-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
    assert any(log for log in logs if 'error output' in str(log))


 def test_make_error_log_records_generates_logs_for_value_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
    logs = tuple(module.make_error_log_records('Error', ValueError()))

-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}


 def test_make_error_log_records_generates_logs_for_os_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
    logs = tuple(module.make_error_log_records('Error', OSError()))

-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}


 def test_make_error_log_records_generates_nothing_for_other_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
    logs = tuple(module.make_error_log_records('Error', KeyError()))

    assert logs == ()
@ -268,6 +282,7 @@ def test_collect_configuration_run_summary_logs_run_configuration_error():
    flexmock(module).should_receive('run_configuration').and_return(
        [logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
    )
+    flexmock(module).should_receive('make_error_log_records').and_return([])
    arguments = {}

    logs = tuple(
--- a/tests/unit/hooks/test_healthchecks.py
+++ b/tests/unit/hooks/test_healthchecks.py
@ -3,38 +3,103 @@ from flexmock import flexmock
 from borgmatic.hooks import healthchecks as module


+def test_forgetful_buffering_handler_emit_collects_log_records():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+
+    assert handler.buffer == ['foo\n', 'bar\n']
+    assert not handler.forgot
+
+
+def test_forgetful_buffering_handler_emit_forgets_log_records_when_capacity_reached():
+    handler = module.Forgetful_buffering_handler(byte_capacity=len('foo\nbar\n'))
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    assert handler.buffer == ['foo\n']
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+    assert handler.buffer == ['foo\n', 'bar\n']
+    handler.emit(flexmock(getMessage=lambda: 'baz'))
+    assert handler.buffer == ['bar\n', 'baz\n']
+    handler.emit(flexmock(getMessage=lambda: 'quux'))
+    assert handler.buffer == ['quux\n']
+    assert handler.forgot
+
+
+def test_format_buffered_logs_for_payload_flattens_log_buffer():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == 'foo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_inserts_truncation_indicator_when_logs_forgotten():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    handler.forgot = True
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == '...\nfoo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_without_handler_produces_payload_anyway():
+    flexmock(module.logging).should_receive('getLogger').and_return(
+        flexmock(handlers=[module.logging.Handler()])
+    )
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload
+
+
 def test_ping_monitor_hits_ping_url_for_start_state():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
    ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'start'))
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'start'), data=''
+    )

    module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)


 def test_ping_monitor_hits_ping_url_for_finish_state():
    ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args(ping_url)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(ping_url, data=payload)

    module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)


 def test_ping_monitor_hits_ping_url_for_fail_state():
    ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'fail'))
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'fail'), data=payload
+    )

    module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)


 def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
    ping_uuid = 'abcd-efgh-ijkl-mnop'
-    flexmock(module.requests).should_receive('get').with_args(
-        'https://hc-ping.com/{}'.format(ping_uuid)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        'https://hc-ping.com/{}'.format(ping_uuid), data=payload
    )

    module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)


 def test_ping_monitor_dry_run_does_not_hit_ping_url():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
    ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').never()
+    flexmock(module.requests).should_receive('post').never()

    module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)