Attempt to bring up email notifications for alerts
The email sending works, but apparently because the alerting rules are not grafana managed, grafana is not sending alerts on them.
This commit is contained in:
parent
b9e7228520
commit
16643efe46
5 changed files with 67 additions and 8 deletions
|
@ -131,11 +131,11 @@ let
|
|||
) [ ] cfg.accounts;
|
||||
|
||||
systemAccountsPassDbTemplateContents =
|
||||
lib.concatStringsSep "\n" (
|
||||
lib.concatStrings (
|
||||
lib.lists.map (account: ''
|
||||
{{- with secret "mx.kaareskovgaard.net/data/users/${account}" -}}
|
||||
${account}:{{ .Data.data.hashed_password }}::::::
|
||||
{{- end -}}
|
||||
{{ end -}}
|
||||
'') systemAccounts
|
||||
)
|
||||
# Just make sure the file is not empty
|
||||
|
|
|
@ -15,4 +15,9 @@
|
|||
addresses = [ "git@kas.codes" ];
|
||||
sendOnly = true;
|
||||
};
|
||||
"monitoring" = {
|
||||
name = "monitoring.kaareskovgaard.net";
|
||||
addresses = [ "monitoring@kas.codes" ];
|
||||
sendOnly = true;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
summary: "Nginx on {{ $labels.instance }} is down"
|
||||
- alert: HighHttpErrorRate
|
||||
expr: >
|
||||
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) /
|
||||
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m]))
|
||||
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[10m])) /
|
||||
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[10m]))
|
||||
> 0
|
||||
for: 30m
|
||||
labels:
|
||||
|
|
|
@ -29,7 +29,7 @@ in
|
|||
- alert: JobDown
|
||||
expr: >
|
||||
${exprs}
|
||||
for: 10m
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
|
|
|
@ -52,6 +52,7 @@ let
|
|||
);
|
||||
|
||||
oauthCredentialFile = config.khscodes.infrastructure.kanidm-client-application.secretFile;
|
||||
smtpPasswordFile = "/run/secret/grafana/smtp/password";
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
|
@ -61,6 +62,7 @@ in
|
|||
systemd.services.grafana = {
|
||||
unitConfig.ConditionPathExists = [
|
||||
oauthCredentialFile
|
||||
smtpPasswordFile
|
||||
];
|
||||
};
|
||||
services.grafana = {
|
||||
|
@ -102,6 +104,16 @@ in
|
|||
org_mapping = "*:*:Admin";
|
||||
role_attribute_path = "'GrafanaAdmin'";
|
||||
};
|
||||
|
||||
smtp = {
|
||||
enabled = true;
|
||||
from_name = "monitoring.kaareskovgaard.net";
|
||||
from_address = "monitoring@kas.codes";
|
||||
ehlo_identity = "monitoring.kaareskovgaard.net";
|
||||
host = "mx.kaareskovgaard.net:465";
|
||||
password = "$__file{${smtpPasswordFile}}";
|
||||
user = "monitoring";
|
||||
};
|
||||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
|
@ -156,6 +168,30 @@ in
|
|||
options.path = postfixDashboard;
|
||||
}
|
||||
];
|
||||
alerting = {
|
||||
contactPoints.settings.contactPoints = [
|
||||
{
|
||||
orgId = 1;
|
||||
name = "Default";
|
||||
receivers = [
|
||||
{
|
||||
uid = "khs";
|
||||
type = "email";
|
||||
settings = {
|
||||
addresses = "kaare@kaareskovgaard.net";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
policies.settings.policies = [
|
||||
{
|
||||
orgId = 1;
|
||||
receiver = "Default";
|
||||
group_by = [ "instance" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
services.prometheus = {
|
||||
|
@ -163,9 +199,10 @@ in
|
|||
listenAddress = "127.0.0.1";
|
||||
extraFlags = [
|
||||
"--web.enable-otlp-receiver"
|
||||
"--storage.tsdb.retention.time=15d"
|
||||
"--storage.tsdb.retention.time=60d"
|
||||
];
|
||||
# alertmanager.enable = true;
|
||||
# I think I need to move these into grafana managed rules, in order to get notifications
|
||||
# working properly.
|
||||
rules = [
|
||||
''
|
||||
groups:
|
||||
|
@ -227,7 +264,7 @@ in
|
|||
compactor = {
|
||||
retention_enabled = true;
|
||||
compaction_interval = "24h";
|
||||
retention_delete_delay = "15d";
|
||||
retention_delete_delay = "${builtins.toString (60 * 24)}h";
|
||||
delete_request_store = "filesystem";
|
||||
working_directory = "${config.services.loki.dataDir}/retention";
|
||||
};
|
||||
|
@ -312,6 +349,18 @@ in
|
|||
perms = "0644";
|
||||
reloadOrRestartUnits = [ "nginx.service" ];
|
||||
}
|
||||
{
|
||||
contents = ''
|
||||
{{- with secret "mx.kaareskovgaard.net/data/users/monitoring" -}}
|
||||
{{ .Data.data.password }}
|
||||
{{- end -}}
|
||||
'';
|
||||
destination = smtpPasswordFile;
|
||||
owner = "grafana";
|
||||
group = "grafana";
|
||||
perms = "0600";
|
||||
reloadOrRestartUnits = [ "grafana.service" ];
|
||||
}
|
||||
];
|
||||
infrastructure.kanidm-client-application = {
|
||||
enable = true;
|
||||
|
@ -319,6 +368,11 @@ in
|
|||
secretOwner = "grafana";
|
||||
reloadOrRestartUnits = [ "grafana.service" ];
|
||||
};
|
||||
infrastructure.vault-server-approle.policy = {
|
||||
"mx.kaareskovgaard.net/data/users/monitoring" = {
|
||||
capabilities = [ "read" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
khscodes.networking.fqdn = "monitoring.kaareskovgaard.net";
|
||||
system.stateVersion = "25.05";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue