1
0
mirror of https://github.com/TwiN/gatus.git synced 2026-02-04 12:56:48 +00:00

feat(alerting): Implement alert reminders (#1138)

* feat(alerting): add reminder-interval feature which allows setting an interval to run alert consecutively

* feat(test): add tests for reminder-interval feature

* feat(docs): modify documentation for reminder-interval feature

* chore: change "due" to "TRIGGERED" for easier log look through

* chore: update "reminder-interval" to "repeat-interval"

* chore: update reminder-interval to repeat-interval

* chore: adapt repeat interval feature after merge

* chore: adapt repeat interval feature after merge

* RepeatInterval => MinimumRepeatInterval

* fix merge issues

(cherry picked from commit 9b2161556bddf01d385f97dafac2515857190ae5)

* rename and move MiniumRepeatInterval

* move MiniumRepeatInterval (again)

---------

Co-authored-by: Bugra Kocabay <kocabay.bugra@gmail.com>
Co-authored-by: Bugra Kocabay <kocabaybugra@gmail.com>
Co-authored-by: Konstantin Nosov <nosovk@gmail.com>
Co-authored-by: Viktor Ziegler <Viktor.Ziegler@ti8m.ch>
Co-authored-by: TwiN <twin@linux.com>
This commit is contained in:
Viktor Ziegler
2025-08-08 03:35:44 +02:00
committed by GitHub
parent f6e938746f
commit c374649019
6 changed files with 83 additions and 16 deletions

View File

@@ -2,7 +2,9 @@ package watchdog
import (
"errors"
"log"
"os"
"time"
"github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/config/endpoint"
@@ -30,14 +32,24 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
if !endpointAlert.IsEnabled() || endpointAlert.FailureThreshold > ep.NumberOfFailuresInARow {
continue
}
if endpointAlert.Triggered {
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint with key=%s with description='%s' has already been TRIGGERED, skipping", ep.Key(), endpointAlert.GetDescription())
// Determine if an initial alert should be sent
sendInitialAlert := !endpointAlert.Triggered
// Determine if a reminder should be sent
sendReminder := endpointAlert.Triggered && endpointAlert.MinimumReminderInterval > 0 && time.Since(ep.LastReminderSent) >= endpointAlert.MinimumReminderInterval
// If neither initial alert nor reminder needs to be sent, skip to the next alert
if !sendInitialAlert && !sendReminder {
logr.Debugf("[watchdog.handleAlertsToTrigger] Alert for endpoint=%s with description='%s' is not due for triggering or reminding, skipping", ep.Name, endpointAlert.GetDescription())
continue
}
alertProvider := alertingConfig.GetAlertingProviderByAlertType(endpointAlert.Type)
if alertProvider != nil {
logr.Infof("[watchdog.handleAlertsToTrigger] Sending %s alert because alert for endpoint with key=%s with description='%s' has been TRIGGERED", endpointAlert.Type, ep.Key(), endpointAlert.GetDescription())
var err error
alertType := "reminder"
if sendInitialAlert {
alertType = "initial"
}
log.Printf("[watchdog.handleAlertsToTrigger] Sending %s %s alert because alert for endpoint=%s with description='%s' has been TRIGGERED", alertType, endpointAlert.Type, ep.Name, endpointAlert.GetDescription())
if os.Getenv("MOCK_ALERT_PROVIDER") == "true" {
if os.Getenv("MOCK_ALERT_PROVIDER_ERROR") == "true" {
err = errors.New("error")
@@ -48,7 +60,11 @@ func handleAlertsToTrigger(ep *endpoint.Endpoint, result *endpoint.Result, alert
if err != nil {
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to send an alert for endpoint with key=%s: %s", ep.Key(), err.Error())
} else {
endpointAlert.Triggered = true
// Mark initial alert as triggered and update last reminder time
if sendInitialAlert {
endpointAlert.Triggered = true
}
ep.LastReminderSent = time.Now()
if err := store.Get().UpsertTriggeredEndpointAlert(ep, endpointAlert); err != nil {
logr.Errorf("[watchdog.handleAlertsToTrigger] Failed to persist triggered endpoint alert for endpoint with key=%s: %s", ep.Key(), err.Error())
}

View File

@@ -3,6 +3,7 @@ package watchdog
import (
"os"
"testing"
"time"
"github.com/TwiN/gatus/v5/alerting"
"github.com/TwiN/gatus/v5/alerting/alert"
@@ -517,6 +518,48 @@ func TestHandleAlertingWithProviderThatOnlyReturnsErrorOnResolve(t *testing.T) {
verify(t, ep, 0, 2, false, "")
}
func TestHandleAlertingWithMinimumReminderInterval(t *testing.T) {
_ = os.Setenv("MOCK_ALERT_PROVIDER", "true")
defer os.Clearenv()
cfg := &config.Config{
Alerting: &alerting.Config{
Custom: &custom.AlertProvider{
DefaultConfig: custom.Config{
URL: "https://twin.sh/health",
Method: "GET",
},
},
},
}
enabled := true
ep := &endpoint.Endpoint{
URL: "https://example.com",
Alerts: []*alert.Alert{
{
Type: alert.TypeCustom,
Enabled: &enabled,
FailureThreshold: 2,
SuccessThreshold: 3,
SendOnResolved: &enabled,
Triggered: false,
MinimumReminderInterval: 1 * time.Second,
},
},
}
verify(t, ep, 0, 0, false, "The alert shouldn't start triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 1, 0, false, "The alert shouldn't have triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 2, 0, true, "The alert should've triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 3, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: false}, cfg.Alerting)
verify(t, ep, 4, 0, true, "The alert should still be triggered")
HandleAlerting(ep, &endpoint.Result{Success: true}, cfg.Alerting)
}
func verify(t *testing.T, ep *endpoint.Endpoint, expectedNumberOfFailuresInARow, expectedNumberOfSuccessInARow int, expectedTriggered bool, expectedTriggeredReason string) {
if ep.NumberOfFailuresInARow != expectedNumberOfFailuresInARow {
t.Errorf("endpoint.NumberOfFailuresInARow should've been %d, got %d", expectedNumberOfFailuresInARow, ep.NumberOfFailuresInARow)