Monitor backups with PagerDuty hook integration (#245).
This commit is contained in:
parent
e76d5ad988
commit
bc02c123e6
9 changed files with 146 additions and 9 deletions
2
NEWS
2
NEWS
|
@ -1,4 +1,6 @@
|
||||||
1.5.0
|
1.5.0
|
||||||
|
* #245: Monitor backups with PagerDuty hook integration. See the documentation for more
|
||||||
|
information: https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
|
||||||
* #255: Add per-action hooks: "before_prune", "after_prune", "before_check", and "after_check".
|
* #255: Add per-action hooks: "before_prune", "after_prune", "before_check", and "after_check".
|
||||||
* #274: Add ~/.config/borgmatic.d as another configuration directory default.
|
* #274: Add ~/.config/borgmatic.d as another configuration directory default.
|
||||||
* #277: Customize Healthchecks log level via borgmatic "--monitoring-verbosity" flag.
|
* #277: Customize Healthchecks log level via borgmatic "--monitoring-verbosity" flag.
|
||||||
|
|
|
@ -66,6 +66,7 @@ borgmatic is powered by [Borg Backup](https://www.borgbackup.org/).
|
||||||
<a href="https://healthchecks.io/"><img src="docs/static/healthchecks.png" alt="Healthchecks" height="60px" style="margin-bottom:20px;"></a>
|
<a href="https://healthchecks.io/"><img src="docs/static/healthchecks.png" alt="Healthchecks" height="60px" style="margin-bottom:20px;"></a>
|
||||||
<a href="https://cronitor.io/"><img src="docs/static/cronitor.png" alt="Cronitor" height="60px" style="margin-bottom:20px;"></a>
|
<a href="https://cronitor.io/"><img src="docs/static/cronitor.png" alt="Cronitor" height="60px" style="margin-bottom:20px;"></a>
|
||||||
<a href="https://cronhub.io/"><img src="docs/static/cronhub.png" alt="Cronhub" height="60px" style="margin-bottom:20px;"></a>
|
<a href="https://cronhub.io/"><img src="docs/static/cronhub.png" alt="Cronhub" height="60px" style="margin-bottom:20px;"></a>
|
||||||
|
<a href="https://www.pagerduty.com/"><img src="docs/static/pagerduty.png" alt="PagerDuty" height="60px" style="margin-bottom:20px;"></a>
|
||||||
<a href="https://www.rsync.net/cgi-bin/borg.cgi?campaign=borg&adgroup=borgmatic"><img src="docs/static/rsyncnet.png" alt="rsync.net" height="60px" style="margin-bottom:20px;"></a>
|
<a href="https://www.rsync.net/cgi-bin/borg.cgi?campaign=borg&adgroup=borgmatic"><img src="docs/static/rsyncnet.png" alt="rsync.net" height="60px" style="margin-bottom:20px;"></a>
|
||||||
<a href="https://www.borgbase.com/?utm_source=borgmatic"><img src="docs/static/borgbase.png" alt="BorgBase" height="60px" style="margin-bottom:20px;"></a>
|
<a href="https://www.borgbase.com/?utm_source=borgmatic"><img src="docs/static/borgbase.png" alt="BorgBase" height="60px" style="margin-bottom:20px;"></a>
|
||||||
|
|
||||||
|
|
|
@ -567,6 +567,15 @@ map:
|
||||||
for details.
|
for details.
|
||||||
example:
|
example:
|
||||||
https://cronitor.link/d3x0c1
|
https://cronitor.link/d3x0c1
|
||||||
|
pagerduty:
|
||||||
|
type: str
|
||||||
|
desc: |
|
||||||
|
PagerDuty integration key used to notify PagerDuty when a backup errors. Create
|
||||||
|
an account at https://www.pagerduty.com/ if you'd like to use this service. See
|
||||||
|
https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
|
||||||
|
for details.
|
||||||
|
example:
|
||||||
|
a177cad45bd374409f78906a810a3074
|
||||||
cronhub:
|
cronhub:
|
||||||
type: str
|
type: str
|
||||||
desc: |
|
desc: |
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, postgresql
|
from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ HOOK_NAME_TO_MODULE = {
|
||||||
'healthchecks': healthchecks,
|
'healthchecks': healthchecks,
|
||||||
'cronitor': cronitor,
|
'cronitor': cronitor,
|
||||||
'cronhub': cronhub,
|
'cronhub': cronhub,
|
||||||
|
'pagerduty': pagerduty,
|
||||||
'postgresql_databases': postgresql,
|
'postgresql_databases': postgresql,
|
||||||
'mysql_databases': mysql,
|
'mysql_databases': mysql,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub')
|
MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub', 'pagerduty')
|
||||||
|
|
||||||
|
|
||||||
class State(Enum):
|
class State(Enum):
|
||||||
|
|
62
borgmatic/hooks/pagerduty.py
Normal file
62
borgmatic/hooks/pagerduty.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import platform
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from borgmatic.hooks import monitor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
|
||||||
|
|
||||||
|
|
||||||
|
def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run):
|
||||||
|
'''
|
||||||
|
If this is an error state, create a PagerDuty event with the given integration key. Use the
|
||||||
|
given configuration filename in any log entries. If this is a dry run, then don't actually
|
||||||
|
create an event.
|
||||||
|
'''
|
||||||
|
if state != monitor.State.FAIL:
|
||||||
|
logger.debug(
|
||||||
|
'{}: Ignoring unsupported monitoring {} in PagerDuty hook'.format(
|
||||||
|
config_filename, state.name.lower()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
|
||||||
|
logger.info('{}: Sending failure event to PagerDuty {}'.format(config_filename, dry_run_label))
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
return
|
||||||
|
|
||||||
|
hostname = platform.node()
|
||||||
|
local_timestamp = (
|
||||||
|
datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).astimezone().isoformat()
|
||||||
|
)
|
||||||
|
payload = json.dumps(
|
||||||
|
{
|
||||||
|
'routing_key': integration_key,
|
||||||
|
'event_action': 'trigger',
|
||||||
|
'payload': {
|
||||||
|
'summary': 'backup failed on {}'.format(hostname),
|
||||||
|
'severity': 'error',
|
||||||
|
'source': hostname,
|
||||||
|
'timestamp': local_timestamp,
|
||||||
|
'component': 'borgmatic',
|
||||||
|
'group': 'backups',
|
||||||
|
'class': 'backup failure',
|
||||||
|
'custom_details': {
|
||||||
|
'hostname': hostname,
|
||||||
|
'configuration filename': config_filename,
|
||||||
|
'server time': local_timestamp,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload))
|
||||||
|
|
||||||
|
logging.getLogger('urllib3').setLevel(logging.ERROR)
|
||||||
|
requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
|
|
@ -28,14 +28,15 @@ hooks](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#error-hoo
|
||||||
below for how to configure this.
|
below for how to configure this.
|
||||||
4. **borgmatic monitoring hooks**: This feature integrates with monitoring
|
4. **borgmatic monitoring hooks**: This feature integrates with monitoring
|
||||||
services like [Healthchecks](https://healthchecks.io/),
|
services like [Healthchecks](https://healthchecks.io/),
|
||||||
[Cronitor](https://cronitor.io), and [Cronhub](https://cronhub.io), and pings
|
[Cronitor](https://cronitor.io), [Cronhub](https://cronhub.io), and
|
||||||
these services whenever borgmatic runs. That way, you'll receive an alert when
|
[PagerDuty](https://www.pagerduty.com/) and pings these services whenever
|
||||||
something goes wrong or the service doesn't hear from borgmatic for a
|
borgmatic runs. That way, you'll receive an alert when something goes wrong or
|
||||||
configured interval. See
|
(for certain hooks) the service doesn't hear from borgmatic for a configured
|
||||||
[Healthchecks
|
interval. See [Healthchecks
|
||||||
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#healthchecks-hook), [Cronitor
|
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#healthchecks-hook), [Cronitor
|
||||||
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), and [Cronhub
|
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), [Cronhub
|
||||||
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook)
|
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook), and
|
||||||
|
[PagerDuty hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook)
|
||||||
below for how to configure this.
|
below for how to configure this.
|
||||||
3. **Third-party monitoring software**: You can use traditional monitoring
|
3. **Third-party monitoring software**: You can use traditional monitoring
|
||||||
software to consume borgmatic JSON output and track when the last
|
software to consume borgmatic JSON output and track when the last
|
||||||
|
@ -200,6 +201,32 @@ mechanisms](https://docs.cronhub.io/integrations.html) when backups fail
|
||||||
or it doesn't hear from borgmatic for a certain period of time.
|
or it doesn't hear from borgmatic for a certain period of time.
|
||||||
|
|
||||||
|
|
||||||
|
## PagerDuty hook
|
||||||
|
|
||||||
|
[PagerDuty](https://cronhub.io/) provides incident monitoring and alerting,
|
||||||
|
and borgmatic has built-in integration with it. Once you create a PagerDuty
|
||||||
|
account and <a
|
||||||
|
href="https://support.pagerduty.com/docs/services-and-integrations">service</a>
|
||||||
|
on their site, all you need to do is configure borgmatic with the unique
|
||||||
|
"Integration Key" for your service. Here's an example:
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
hooks:
|
||||||
|
pagerduty: a177cad45bd374409f78906a810a3074
|
||||||
|
```
|
||||||
|
|
||||||
|
With this hook in place, borgmatic creates a PagerDuty event for your service
|
||||||
|
whenever backups fail. Specifically, if an error occurs during a `create`,
|
||||||
|
`prune`, or `check` action, borgmatic sends an event to PagerDuty after the
|
||||||
|
`on_error` hooks run. Note that borgmatic does not contact PagerDuty when a
|
||||||
|
backup starts or ends without error.
|
||||||
|
|
||||||
|
You can configure PagerDuty to notify you by a [variety of
|
||||||
|
mechanisms](https://support.pagerduty.com/docs/notifications) when backups
|
||||||
|
fail.
|
||||||
|
|
||||||
|
|
||||||
## Scripting borgmatic
|
## Scripting borgmatic
|
||||||
|
|
||||||
To consume the output of borgmatic in other software, you can include an
|
To consume the output of borgmatic in other software, you can include an
|
||||||
|
|
BIN
docs/static/pagerduty.png
vendored
Normal file
BIN
docs/static/pagerduty.png
vendored
Normal file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
35
tests/unit/hooks/test_pagerduty.py
Normal file
35
tests/unit/hooks/test_pagerduty.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
from flexmock import flexmock
|
||||||
|
|
||||||
|
from borgmatic.hooks import pagerduty as module
|
||||||
|
|
||||||
|
|
||||||
|
def test_ping_monitor_ignores_start_state():
|
||||||
|
flexmock(module.requests).should_receive('post').never()
|
||||||
|
|
||||||
|
module.ping_monitor(
|
||||||
|
'abc123', 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ping_monitor_ignores_finish_state():
|
||||||
|
flexmock(module.requests).should_receive('post').never()
|
||||||
|
|
||||||
|
module.ping_monitor(
|
||||||
|
'abc123', 'config.yaml', module.monitor.State.FINISH, monitoring_log_level=1, dry_run=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ping_monitor_calls_api_for_fail_state():
|
||||||
|
flexmock(module.requests).should_receive('post')
|
||||||
|
|
||||||
|
module.ping_monitor(
|
||||||
|
'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ping_monitor_dry_run_does_not_call_api():
|
||||||
|
flexmock(module.requests).should_receive('post').never()
|
||||||
|
|
||||||
|
module.ping_monitor(
|
||||||
|
'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=True
|
||||||
|
)
|
Loading…
Reference in a new issue