Get email alerting working
Some checks failed
/ dev-shell (push) Successful in 3m23s
/ check (push) Failing after 12m13s
/ rust-packages (push) Successful in 11m56s
/ terraform-providers (push) Successful in 17m7s
/ systems (push) Successful in 53m59s

This commit is contained in:
Kaare Hoff Skovgaard 2025-08-17 00:42:33 +02:00
parent 1ab2d1c15f
commit 619984cd89
Signed by: khs
GPG key ID: C7D890804F01E9F0
3 changed files with 63 additions and 42 deletions

View file

@ -90,7 +90,7 @@ in
enable = true; enable = true;
enableImap = false; enableImap = false;
enableImapSsl = true; enableImapSsl = true;
enableSubmission = false; enableSubmission = true;
enableSubmissionSsl = true; enableSubmissionSsl = true;
fqdn = config.khscodes.networking.fqdn; fqdn = config.khscodes.networking.fqdn;
useUTF8FolderNames = true; useUTF8FolderNames = true;

View file

@ -12,9 +12,9 @@
expr: > expr: >
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[10m])) / sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[10m])) /
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[10m])) sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[10m]))
> 0 > 0.05
for: 30m for: 30m
labels: labels:
severity: critical severity: critical
annotations: annotations:
summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 30 minutes" summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating over 5% internal server errors over 30 minutes"

View file

@ -62,7 +62,6 @@ in
systemd.services.grafana = { systemd.services.grafana = {
unitConfig.ConditionPathExists = [ unitConfig.ConditionPathExists = [
oauthCredentialFile oauthCredentialFile
smtpPasswordFile
]; ];
}; };
services.grafana = { services.grafana = {
@ -104,16 +103,6 @@ in
org_mapping = "*:*:Admin"; org_mapping = "*:*:Admin";
role_attribute_path = "'GrafanaAdmin'"; role_attribute_path = "'GrafanaAdmin'";
}; };
smtp = {
enabled = true;
from_name = "monitoring.kaareskovgaard.net";
from_address = "monitoring@kas.codes";
ehlo_identity = "monitoring.kaareskovgaard.net";
host = "mx.kaareskovgaard.net:465";
password = "$__file{${smtpPasswordFile}}";
user = "monitoring";
};
}; };
provision = { provision = {
enable = true; enable = true;
@ -145,6 +134,15 @@ in
manageAlerts = true; manageAlerts = true;
}; };
} }
{
name = "Alertmanager";
type = "alertmanager";
url = "http://127.0.0.1:${toString config.services.prometheus.alertmanager.port}";
jsonData = {
implementation = "prometheus";
handleGrafanaManagedAlerts = true;
};
}
]; ];
dashboards.settings.providers = [ dashboards.settings.providers = [
{ {
@ -168,39 +166,55 @@ in
options.path = postfixDashboard; options.path = postfixDashboard;
} }
]; ];
alerting = {
contactPoints.settings.contactPoints = [
{
orgId = 1;
name = "grafana-default-email";
receivers = [
{
uid = "khs";
type = "email";
settings = {
addresses = "kaare@kaareskovgaard.net";
};
}
];
}
];
policies.settings.policies = [
{
orgId = 1;
receiver = "grafana-default-email";
group_by = [ "instance" ];
}
];
};
}; };
}; };
services.prometheus = { services.prometheus = {
enable = true; enable = true;
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
alertmanagers = [
{
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.alertmanager.port}" ];
}
];
}
];
extraFlags = [ extraFlags = [
"--web.enable-otlp-receiver" "--web.enable-otlp-receiver"
"--storage.tsdb.retention.time=60d" "--storage.tsdb.retention.time=60d"
]; ];
alertmanager = {
enable = true;
environmentFile = smtpPasswordFile;
logLevel = "info";
configuration = {
global = {
smtp_from = "monitoring@kas.codes";
smtp_smarthost = "mx.kaareskovgaard.net:587";
smtp_hello = "monitoring.kaareskovgaard.net";
smtp_auth_username = "monitoring";
smtp_auth_password = "$SMTP_PASSWORD";
};
route = {
receiver = "email";
group_by = [
"instance"
];
};
receivers = [
{
name = "email";
email_configs = [
{
send_resolved = true;
to = "kaare@kaareskovgaard.net";
}
];
}
];
};
};
# I think I need to move these into grafana managed rules, in order to get notifications # I think I need to move these into grafana managed rules, in order to get notifications
# working properly. # working properly.
rules = [ rules = [
@ -217,6 +231,11 @@ in
'' ''
]; ];
}; };
systemd.services.alertmanager = {
unitConfig = {
ConditionPathExists = [ smtpPasswordFile ];
};
};
services.loki = { services.loki = {
enable = true; enable = true;
configuration = { configuration = {
@ -306,6 +325,7 @@ in
verify = "on"; verify = "on";
certificate = "/etc/loki/client-signer.pem"; certificate = "/etc/loki/client-signer.pem";
}; };
rateLimit.enable = false;
locations."/" = { locations."/" = {
proxyPass = "http://${loki.configuration.server.http_listen_address}:${toString loki.configuration.server.http_listen_port}"; proxyPass = "http://${loki.configuration.server.http_listen_address}:${toString loki.configuration.server.http_listen_port}";
proxyWebsockets = true; proxyWebsockets = true;
@ -317,6 +337,7 @@ in
verify = "on"; verify = "on";
certificate = "/etc/prometheus/client-signer.pem"; certificate = "/etc/prometheus/client-signer.pem";
}; };
rateLimit.enable = false;
locations."/" = { locations."/" = {
proxyPass = "http://${prometheus.listenAddress}:${toString prometheus.port}"; proxyPass = "http://${prometheus.listenAddress}:${toString prometheus.port}";
proxyWebsockets = true; proxyWebsockets = true;
@ -352,14 +373,14 @@ in
{ {
contents = '' contents = ''
{{- with secret "mx.kaareskovgaard.net/data/users/monitoring" -}} {{- with secret "mx.kaareskovgaard.net/data/users/monitoring" -}}
{{ .Data.data.password }} SMTP_PASSWORD={{ .Data.data.password }}
{{- end -}} {{- end -}}
''; '';
destination = smtpPasswordFile; destination = smtpPasswordFile;
owner = "grafana"; owner = "root";
group = "grafana"; group = "root";
perms = "0600"; perms = "0600";
reloadOrRestartUnits = [ "grafana.service" ]; reloadOrRestartUnits = [ "alertmanager.service" ];
} }
]; ];
infrastructure.kanidm-client-application = { infrastructure.kanidm-client-application = {