Add documentation for Grafana Loki hook (#743).

This commit is contained in:
Dan Helfman 2023-08-25 10:52:00 -07:00
parent fa9a061033
commit 32019ea8f3
7 changed files with 154 additions and 83 deletions

3
NEWS
View file

@ -6,6 +6,9 @@
only restorable with a "mysql_databases:" configuration.
* #738: Fix for potential data loss (data not getting restored) in which the database "restore"
action didn't actually restore anything and indicated success anyway.
* #743: Add a monitoring hook for sending backup status and logs to to Grafana Loki. See the
documentation for more information:
https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#loki-hook
* Remove the deprecated use of the MongoDB hook's "--db" flag for database restoration.
* Add source code reference documentation for getting oriented with the borgmatic code as a
developer: https://torsion.org/borgmatic/docs/reference/source-code/

View file

@ -67,6 +67,7 @@ class Loki_log_buffer:
request_body = self.to_request()
self.root['streams'][0]['values'] = []
request_header = {'Content-Type': 'application/json'}
try:
result = requests.post(self.url, headers=request_header, data=request_body, timeout=5)
result.raise_for_status()
@ -100,6 +101,7 @@ class Loki_log_handler(logging.Handler):
Add an arbitrary string as a log entry to the stream.
'''
self.buffer.add_value(msg)
if len(self.buffer) > MAX_BUFFER_LINES:
self.buffer.flush()
@ -116,6 +118,7 @@ def initialize_monitor(hook_config, config, config_filename, monitoring_log_leve
'''
url = hook_config.get('url')
loki = Loki_log_handler(url, dry_run)
for key, value in hook_config.get('labels').items():
if value == '__hostname':
loki.add_label(key, platform.node())
@ -125,6 +128,7 @@ def initialize_monitor(hook_config, config, config_filename, monitoring_log_leve
loki.add_label(key, config_filename)
else:
loki.add_label(key, value)
logging.getLogger().addHandler(loki)
@ -143,6 +147,7 @@ def destroy_monitor(hook_config, config, config_filename, monitoring_log_level,
Remove the monitor handler that was added to the root logger.
'''
logger = logging.getLogger()
for handler in tuple(logger.handlers):
if isinstance(handler, Loki_log_handler):
handler.flush()

View file

@ -62,7 +62,7 @@ def execute_dump_command(
):
'''
Kick off a dump for the given MariaDB database (provided as a configuration dict) to a named
pipe constructed from the given dump path and database names. Use the given log prefix in any
pipe constructed from the given dump path and database name. Use the given log prefix in any
log entries.
Return a subprocess.Popen instance for the dump process ready to spew to a named pipe. But if
@ -72,6 +72,7 @@ def execute_dump_command(
dump_filename = dump.make_data_source_dump_filename(
dump_path, database['name'], database.get('hostname')
)
if os.path.exists(dump_filename):
logger.warning(
f'{log_prefix}: Skipping duplicate dump of MariaDB database "{database_name}" to {dump_filename}'

View file

@ -62,7 +62,7 @@ def execute_dump_command(
):
'''
Kick off a dump for the given MySQL/MariaDB database (provided as a configuration dict) to a
named pipe constructed from the given dump path and database names. Use the given log prefix in
named pipe constructed from the given dump path and database name. Use the given log prefix in
any log entries.
Return a subprocess.Popen instance for the dump process ready to spew to a named pipe. But if
@ -72,6 +72,7 @@ def execute_dump_command(
dump_filename = dump.make_data_source_dump_filename(
dump_path, database['name'], database.get('hostname')
)
if os.path.exists(dump_filename):
logger.warning(
f'{log_prefix}: Skipping duplicate dump of MySQL database "{database_name}" to {dump_filename}'

View file

@ -38,11 +38,11 @@ below for how to configure this.
borgmatic integrates with monitoring services like
[Healthchecks](https://healthchecks.io/), [Cronitor](https://cronitor.io),
[Cronhub](https://cronhub.io), [PagerDuty](https://www.pagerduty.com/), and
[ntfy](https://ntfy.sh/) and pings these services whenever borgmatic runs.
That way, you'll receive an alert when something goes wrong or (for certain
hooks) the service doesn't hear from borgmatic for a configured interval. See
[Healthchecks
[Cronhub](https://cronhub.io), [PagerDuty](https://www.pagerduty.com/),
[ntfy](https://ntfy.sh/), and [Grafana Loki](https://grafana.com/oss/loki/)
and pings these services whenever borgmatic runs. That way, you'll receive an
alert when something goes wrong or (for certain hooks) the service doesn't
hear from borgmatic for a configured interval. See [Healthchecks
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#healthchecks-hook),
[Cronitor
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook),
@ -50,7 +50,10 @@ hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-h
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook),
[PagerDuty
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook),
and [ntfy hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#ntfy-hook)
[ntfy
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#ntfy-hook),
and [Loki
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#loki-hook),
below for how to configure this.
While these services offer different features, you probably only need to use
@ -129,7 +132,7 @@ especially the security information.
## Healthchecks hook
[Healthchecks](https://healthchecks.io/) is a service that provides "instant
alerts when your cron jobs fail silently", and borgmatic has built-in
alerts when your cron jobs fail silently," and borgmatic has built-in
integration with it. Once you create a Healthchecks account and project on
their site, all you need to do is configure borgmatic with the unique "Ping
URL" for your project. Here's an example:
@ -144,21 +147,19 @@ healthchecks:
this option in the `hooks:` section of your configuration.
With this hook in place, borgmatic pings your Healthchecks project when a
backup begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Healthchecks know that it has started if any of
the `create`, `prune`, `compact`, or `check` actions are run.
backup begins, ends, or errors, but only when any of the `create`, `prune`,
`compact`, or `check` actions are run.
Then, if the actions complete successfully, borgmatic notifies Healthchecks of
the success after the `after_backup` hooks run and includes borgmatic logs in
the payload data sent to Healthchecks. This means that borgmatic logs show up
in the Healthchecks UI, although be aware that Healthchecks currently has a
10-kilobyte limit for the logs in each ping.
the success and includes borgmatic logs in the payload data sent to
Healthchecks. This means that borgmatic logs show up in the Healthchecks UI,
although be aware that Healthchecks currently has a 10-kilobyte limit for the
logs in each ping.
If an error occurs during any action or hook, borgmatic notifies Healthchecks
after the `on_error` hooks run, also tacking on logs including the error
itself. But the logs are only included for errors that occur when a `create`,
`prune`, `compact`, or `check` action is run.
If an error occurs during any action or hook, borgmatic notifies Healthchecks,
also tacking on logs including the error itself. But the logs are only
included for errors that occur when a `create`, `prune`, `compact`, or `check`
action is run.
You can customize the verbosity of the logs that are sent to Healthchecks with
borgmatic's `--monitoring-verbosity` flag. The `--list` and `--stats` flags
@ -175,7 +176,7 @@ or it doesn't hear from borgmatic for a certain period of time.
## Cronitor hook
[Cronitor](https://cronitor.io/) provides "Cron monitoring and uptime healthchecks
for websites, services and APIs", and borgmatic has built-in
for websites, services and APIs," and borgmatic has built-in
integration with it. Once you create a Cronitor account and cron job monitor on
their site, all you need to do is configure borgmatic with the unique "Ping
API URL" for your monitor. Here's an example:
@ -190,13 +191,9 @@ cronitor:
this option in the `hooks:` section of your configuration.
With this hook in place, borgmatic pings your Cronitor monitor when a backup
begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Cronitor know that it has started if any of the
`prune`, `compact`, `create`, or `check` actions are run. Then, if the actions
complete successfully, borgmatic notifies Cronitor of the success after the
`after_backup` hooks run. And if an error occurs during any action or hook,
borgmatic notifies Cronitor after the `on_error` hooks run.
begins, ends, or errors, but only when any of the `prune`, `compact`,
`create`, or `check` actions are run. Then, if the actions complete
successfully or errors, borgmatic notifies Cronitor accordingly.
You can configure Cronitor to notify you by a [variety of
mechanisms](https://cronitor.io/docs/cron-job-notifications) when backups fail
@ -206,7 +203,7 @@ or it doesn't hear from borgmatic for a certain period of time.
## Cronhub hook
[Cronhub](https://cronhub.io/) provides "instant alerts when any of your
background jobs fail silently or run longer than expected", and borgmatic has
background jobs fail silently or run longer than expected," and borgmatic has
built-in integration with it. Once you create a Cronhub account and monitor on
their site, all you need to do is configure borgmatic with the unique "Ping
URL" for your monitor. Here's an example:
@ -221,13 +218,9 @@ cronhub:
this option in the `hooks:` section of your configuration.
With this hook in place, borgmatic pings your Cronhub monitor when a backup
begins, ends, or errors. Specifically, after the <a
href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
hooks</a> run, borgmatic lets Cronhub know that it has started if any of the
`prune`, `compact`, `create`, or `check` actions are run. Then, if the actions
complete successfully, borgmatic notifies Cronhub of the success after the
`after_backup` hooks run. And if an error occurs during any action or hook,
borgmatic notifies Cronhub after the `on_error` hooks run.
begins, ends, or errors, but only when any of the `prune`, `compact`,
`create`, or `check` actions are run. Then, if the actions complete
successfully or errors, borgmatic notifies Cronhub accordingly.
Note that even though you configure borgmatic with the "start" variant of the
ping URL, borgmatic substitutes the correct state into the URL when pinging
@ -266,10 +259,9 @@ pagerduty:
this option in the `hooks:` section of your configuration.
With this hook in place, borgmatic creates a PagerDuty event for your service
whenever backups fail. Specifically, if an error occurs during a `create`,
`prune`, `compact`, or `check` action, borgmatic sends an event to PagerDuty
before the `on_error` hooks run. Note that borgmatic does not contact
PagerDuty when a backup starts or ends without error.
whenever backups fail, but only when any of the `create`, `prune`, `compact`,
or `check` actions are run. Note that borgmatic does not contact PagerDuty
when a backup starts or when it ends without error.
You can configure PagerDuty to notify you by a [variety of
mechanisms](https://support.pagerduty.com/docs/notifications) when backups
@ -328,6 +320,58 @@ ntfy:
the `ntfy:` option in the `hooks:` section of your configuration.
## Loki hook
[Grafana Loki](https://grafana.com/oss/loki/) is a "horizontally scalable,
highly available, multi-tenant log aggregation system inspired by Prometheus."
borgmatic has built-in integration with Loki, sending both backup status and
borgmatic logs.
You can configure borgmatic to use either a [self-hosted Loki
instance](https://grafana.com/docs/loki/latest/installation/) or [a Grafana
Cloud account](https://grafana.com/auth/sign-up/create-user). Start by setting
your Loki API push URL. Here's an example:
```yaml
loki:
url: http://localhost:3100/loki/api/v1/push
```
With this hook in place, borgmatic sends its logs to your Loki instance as any
of the `prune`, `compact`, `create`, or `check` actions are run. Then, after
the actions complete, borgmatic notifies Loki of success or failure.
This hook supports sending arbitrary labels to Loki. For instance:
```yaml
loki:
url: http://localhost:3100/loki/api/v1/push
labels:
app: borgmatic
hostname: example.org
```
There are also a few placeholders you can optionally use as label values:
* `__config`: name of the borgmatic configuration file
* `__config_path`: full path of the borgmatic configuration file
* `__hostname`: the local machine hostname
These placeholders are only substituted for the whole label value, not
interpolated into a larger string. For instance:
```yaml
loki:
url: http://localhost:3100/loki/api/v1/push
labels:
app: borgmatic
config: __config
hostname: __hostname
```
## Scripting borgmatic
To consume the output of borgmatic in other software, you can include an

View file

@ -6,9 +6,9 @@ from flexmock import flexmock
from borgmatic.hooks import loki as module
def test_log_handler_label_replacment():
def test_initialize_monitor_replaces_labels():
'''
Assert that label placeholders get replaced
Assert that label placeholders get replaced.
'''
hook_config = {
'url': 'http://localhost:3100/loki/api/v1/push',
@ -17,18 +17,20 @@ def test_log_handler_label_replacment():
config_filename = '/mock/path/test.yaml'
dry_run = True
module.initialize_monitor(hook_config, flexmock(), config_filename, flexmock(), dry_run)
for handler in tuple(logging.getLogger().handlers):
if isinstance(handler, module.Loki_log_handler):
assert handler.buffer.root['streams'][0]['stream']['hostname'] == platform.node()
assert handler.buffer.root['streams'][0]['stream']['config'] == 'test.yaml'
assert handler.buffer.root['streams'][0]['stream']['config_full'] == config_filename
return
assert False
def test_initalize_adds_log_handler():
def test_initialize_monitor_adds_log_handler():
'''
Assert that calling initialize_monitor adds our logger to the root logger
Assert that calling initialize_monitor adds our logger to the root logger.
'''
hook_config = {'url': 'http://localhost:3100/loki/api/v1/push', 'labels': {'app': 'borgmatic'}}
module.initialize_monitor(
@ -38,15 +40,17 @@ def test_initalize_adds_log_handler():
monitoring_log_level=flexmock(),
dry_run=True,
)
for handler in tuple(logging.getLogger().handlers):
if isinstance(handler, module.Loki_log_handler):
return
assert False
def test_ping_adds_log_message():
def test_ping_monitor_adds_log_message():
'''
Assert that calling ping_monitor adds a message to our logger
Assert that calling ping_monitor adds a message to our logger.
'''
hook_config = {'url': 'http://localhost:3100/loki/api/v1/push', 'labels': {'app': 'borgmatic'}}
config_filename = 'test.yaml'
@ -55,6 +59,7 @@ def test_ping_adds_log_message():
module.ping_monitor(
hook_config, flexmock(), config_filename, module.monitor.State.FINISH, flexmock(), dry_run
)
for handler in tuple(logging.getLogger().handlers):
if isinstance(handler, module.Loki_log_handler):
assert any(
@ -65,18 +70,20 @@ def test_ping_adds_log_message():
)
)
return
assert False
def test_log_handler_gets_removed():
def test_destroy_monitor_removes_log_handler():
'''
Assert that destroy_monitor removes the logger from the root logger
Assert that destroy_monitor removes the logger from the root logger.
'''
hook_config = {'url': 'http://localhost:3100/loki/api/v1/push', 'labels': {'app': 'borgmatic'}}
config_filename = 'test.yaml'
dry_run = True
module.initialize_monitor(hook_config, flexmock(), config_filename, flexmock(), dry_run)
module.destroy_monitor(hook_config, flexmock(), config_filename, flexmock(), dry_run)
for handler in tuple(logging.getLogger().handlers):
if isinstance(handler, module.Loki_log_handler):
assert False

View file

@ -6,93 +6,103 @@ from flexmock import flexmock
from borgmatic.hooks import loki as module
def test_log_handler_gets_labels():
def test_loki_log_buffer_add_value_gets_raw():
'''
Assert that adding labels works
'''
buffer = module.Loki_log_buffer(flexmock(), False)
buffer.add_label('test', 'label')
assert buffer.root['streams'][0]['stream']['test'] == 'label'
buffer.add_label('test2', 'label2')
assert buffer.root['streams'][0]['stream']['test2'] == 'label2'
def test_log_buffer_gets_raw():
'''
Assert that adding values to the log buffer increases it's length
Assert that adding values to the log buffer increases it's length.
'''
buffer = module.Loki_log_buffer(flexmock(), False)
assert len(buffer) == 0
buffer.add_value('Some test log line')
assert len(buffer) == 1
buffer.add_value('Another test log line')
assert len(buffer) == 2
def test_log_buffer_gets_log_messages():
def test_loki_log_buffer_json_serializes_empty_buffer():
'''
Assert that adding log records works
'''
handler = module.Loki_log_handler(flexmock(), False)
handler.emit(flexmock(getMessage=lambda: 'Some test log line'))
assert len(handler.buffer) == 1
def test_log_buffer_json():
'''
Assert that the buffer correctly serializes when empty
Assert that the buffer correctly serializes when empty.
'''
buffer = module.Loki_log_buffer(flexmock(), False)
assert json.loads(buffer.to_request()) == json.loads('{"streams":[{"stream":{},"values":[]}]}')
def test_log_buffer_json_labels():
def test_loki_log_buffer_json_serializes_labels():
'''
Assert that the buffer correctly serializes with labels
Assert that the buffer correctly serializes with labels.
'''
buffer = module.Loki_log_buffer(flexmock(), False)
buffer.add_label('test', 'label')
assert json.loads(buffer.to_request()) == json.loads(
'{"streams":[{"stream":{"test": "label"},"values":[]}]}'
)
def test_log_buffer_json_log_lines():
def test_loki_log_buffer_json_serializes_log_lines():
'''
Assert that log lines end up in the correct place in the log buffer
Assert that log lines end up in the correct place in the log buffer.
'''
buffer = module.Loki_log_buffer(flexmock(), False)
buffer.add_value('Some test log line')
assert json.loads(buffer.to_request())['streams'][0]['values'][0][1] == 'Some test log line'
def test_log_handler_post():
def test_loki_log_handler_add_label_gets_labels():
'''
Assert that the flush function sends a post request after a certain limit
Assert that adding labels works.
'''
buffer = module.Loki_log_buffer(flexmock(), False)
buffer.add_label('test', 'label')
assert buffer.root['streams'][0]['stream']['test'] == 'label'
buffer.add_label('test2', 'label2')
assert buffer.root['streams'][0]['stream']['test2'] == 'label2'
def test_loki_log_handler_emit_gets_log_messages():
'''
Assert that adding log records works.
'''
handler = module.Loki_log_handler(flexmock(), False)
handler.emit(flexmock(getMessage=lambda: 'Some test log line'))
assert len(handler.buffer) == 1
def test_loki_log_handler_raw_posts_to_server():
'''
Assert that the flush function sends a post request after a certain limit.
'''
handler = module.Loki_log_handler(flexmock(), False)
flexmock(module.requests).should_receive('post').and_return(
flexmock(raise_for_status=lambda: '')
).once()
for num in range(int(module.MAX_BUFFER_LINES * 1.5)):
handler.raw(num)
def test_log_handler_post_failiure():
def test_loki_log_handler_raw_post_failure_does_not_raise():
'''
Assert that the flush function catches request exceptions
Assert that the flush function catches request exceptions.
'''
handler = module.Loki_log_handler(flexmock(), False)
flexmock(module.requests).should_receive('post').and_return(
flexmock(raise_for_status=lambda: (_ for _ in ()).throw(requests.RequestException()))
).once()
for num in range(int(module.MAX_BUFFER_LINES * 1.5)):
handler.raw(num)
def test_log_handler_empty_flush_noop():
def test_loki_log_handler_flush_with_empty_buffer_does_not_raise():
'''
Test that flushing an empty buffer does indeed nothing
Test that flushing an empty buffer does indeed nothing.
'''
handler = module.Loki_log_handler(flexmock(), False)
handler.flush()