Attempt to bring up email notifications for alerts
Some checks failed
/ systems (push) Waiting to run
/ dev-shell (push) Successful in 8m30s
/ terraform-providers (push) Has been cancelled
/ check (push) Has been cancelled
/ rust-packages (push) Has been cancelled

The email sending works, but apparently because the
alerting rules are not grafana managed, grafana is
not sending alerts on them.
This commit is contained in:
Kaare Hoff Skovgaard 2025-08-15 00:03:20 +02:00
parent b9e7228520
commit 16643efe46
Signed by: khs
GPG key ID: C7D890804F01E9F0
5 changed files with 67 additions and 8 deletions

View file

@ -131,11 +131,11 @@ let
) [ ] cfg.accounts; ) [ ] cfg.accounts;
systemAccountsPassDbTemplateContents = systemAccountsPassDbTemplateContents =
lib.concatStringsSep "\n" ( lib.concatStrings (
lib.lists.map (account: '' lib.lists.map (account: ''
{{- with secret "mx.kaareskovgaard.net/data/users/${account}" -}} {{- with secret "mx.kaareskovgaard.net/data/users/${account}" -}}
${account}:{{ .Data.data.hashed_password }}:::::: ${account}:{{ .Data.data.hashed_password }}::::::
{{- end -}} {{ end -}}
'') systemAccounts '') systemAccounts
) )
# Just make sure the file is not empty # Just make sure the file is not empty

View file

@ -15,4 +15,9 @@
addresses = [ "git@kas.codes" ]; addresses = [ "git@kas.codes" ];
sendOnly = true; sendOnly = true;
}; };
"monitoring" = {
name = "monitoring.kaareskovgaard.net";
addresses = [ "monitoring@kas.codes" ];
sendOnly = true;
};
} }

View file

@ -10,8 +10,8 @@
summary: "Nginx on {{ $labels.instance }} is down" summary: "Nginx on {{ $labels.instance }} is down"
- alert: HighHttpErrorRate - alert: HighHttpErrorRate
expr: > expr: >
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) / sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[10m])) /
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m])) sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[10m]))
> 0 > 0
for: 30m for: 30m
labels: labels:

View file

@ -29,7 +29,7 @@ in
- alert: JobDown - alert: JobDown
expr: > expr: >
${exprs} ${exprs}
for: 10m for: 1m
labels: labels:
severity: critical severity: critical
annotations: annotations:

View file

@ -52,6 +52,7 @@ let
); );
oauthCredentialFile = config.khscodes.infrastructure.kanidm-client-application.secretFile; oauthCredentialFile = config.khscodes.infrastructure.kanidm-client-application.secretFile;
smtpPasswordFile = "/run/secret/grafana/smtp/password";
in in
{ {
imports = [ imports = [
@ -61,6 +62,7 @@ in
systemd.services.grafana = { systemd.services.grafana = {
unitConfig.ConditionPathExists = [ unitConfig.ConditionPathExists = [
oauthCredentialFile oauthCredentialFile
smtpPasswordFile
]; ];
}; };
services.grafana = { services.grafana = {
@ -102,6 +104,16 @@ in
org_mapping = "*:*:Admin"; org_mapping = "*:*:Admin";
role_attribute_path = "'GrafanaAdmin'"; role_attribute_path = "'GrafanaAdmin'";
}; };
smtp = {
enabled = true;
from_name = "monitoring.kaareskovgaard.net";
from_address = "monitoring@kas.codes";
ehlo_identity = "monitoring.kaareskovgaard.net";
host = "mx.kaareskovgaard.net:465";
password = "$__file{${smtpPasswordFile}}";
user = "monitoring";
};
}; };
provision = { provision = {
enable = true; enable = true;
@ -156,6 +168,30 @@ in
options.path = postfixDashboard; options.path = postfixDashboard;
} }
]; ];
alerting = {
contactPoints.settings.contactPoints = [
{
orgId = 1;
name = "Default";
receivers = [
{
uid = "khs";
type = "email";
settings = {
addresses = "kaare@kaareskovgaard.net";
};
}
];
}
];
policies.settings.policies = [
{
orgId = 1;
receiver = "Default";
group_by = [ "instance" ];
}
];
};
}; };
}; };
services.prometheus = { services.prometheus = {
@ -163,9 +199,10 @@ in
listenAddress = "127.0.0.1"; listenAddress = "127.0.0.1";
extraFlags = [ extraFlags = [
"--web.enable-otlp-receiver" "--web.enable-otlp-receiver"
"--storage.tsdb.retention.time=15d" "--storage.tsdb.retention.time=60d"
]; ];
# alertmanager.enable = true; # I think I need to move these into grafana managed rules, in order to get notifications
# working properly.
rules = [ rules = [
'' ''
groups: groups:
@ -227,7 +264,7 @@ in
compactor = { compactor = {
retention_enabled = true; retention_enabled = true;
compaction_interval = "24h"; compaction_interval = "24h";
retention_delete_delay = "15d"; retention_delete_delay = "${builtins.toString (60 * 24)}h";
delete_request_store = "filesystem"; delete_request_store = "filesystem";
working_directory = "${config.services.loki.dataDir}/retention"; working_directory = "${config.services.loki.dataDir}/retention";
}; };
@ -312,6 +349,18 @@ in
perms = "0644"; perms = "0644";
reloadOrRestartUnits = [ "nginx.service" ]; reloadOrRestartUnits = [ "nginx.service" ];
} }
{
contents = ''
{{- with secret "mx.kaareskovgaard.net/data/users/monitoring" -}}
{{ .Data.data.password }}
{{- end -}}
'';
destination = smtpPasswordFile;
owner = "grafana";
group = "grafana";
perms = "0600";
reloadOrRestartUnits = [ "grafana.service" ];
}
]; ];
infrastructure.kanidm-client-application = { infrastructure.kanidm-client-application = {
enable = true; enable = true;
@ -319,6 +368,11 @@ in
secretOwner = "grafana"; secretOwner = "grafana";
reloadOrRestartUnits = [ "grafana.service" ]; reloadOrRestartUnits = [ "grafana.service" ];
}; };
infrastructure.vault-server-approle.policy = {
"mx.kaareskovgaard.net/data/users/monitoring" = {
capabilities = [ "read" ];
};
};
}; };
khscodes.networking.fqdn = "monitoring.kaareskovgaard.net"; khscodes.networking.fqdn = "monitoring.kaareskovgaard.net";
system.stateVersion = "25.05"; system.stateVersion = "25.05";