Add some more alerting and fail2ban rules
All checks were successful
/ dev-shell (push) Successful in 33s
/ rust-packages (push) Successful in 38s
/ terraform-providers (push) Successful in 1m1s
/ check (push) Successful in 2m7s
/ systems (push) Successful in 3m52s

This commit is contained in:
Kaare Hoff Skovgaard 2025-07-22 15:17:17 +02:00
parent e28f501316
commit 8e21df1764
Signed by: khs
GPG key ID: C7D890804F01E9F0
8 changed files with 214 additions and 25 deletions

View file

@ -70,6 +70,9 @@ in
DEFAULT = { DEFAULT = {
APP_NAME = "KAS: Codes"; APP_NAME = "KAS: Codes";
}; };
metrics = {
ENABLED = true;
};
server = rec { server = rec {
DOMAIN = "kas.codes"; DOMAIN = "kas.codes";
ROOT_URL = "https://${DOMAIN}"; ROOT_URL = "https://${DOMAIN}";
@ -189,6 +192,9 @@ in
locations."/" = { locations."/" = {
proxyPass = "http://localhost:3000"; proxyPass = "http://localhost:3000";
}; };
locations."/metrics" = {
return = "404";
};
}; };
}; };
}; };
@ -199,4 +205,19 @@ in
useDefaultShell = true; useDefaultShell = true;
}; };
users.groups.git = { }; users.groups.git = { };
environment.etc."alloy/forgejo_prometheus.alloy" = {
text = ''
prometheus.scrape "forgejo_exporter" {
scrape_interval = "1m"
targets = [
{
"__address__" = "127.0.0.1:${toString config.services.forgejo.settings.server.HTTP_PORT}",
},
]
metrics_path = "/metrics"
forward_to = [otelcol.receiver.prometheus.default.receiver]
}
'';
};
} }

View file

@ -10,6 +10,68 @@
capabilities = [ "read" ]; capabilities = [ "read" ];
}; };
}; };
khscodes.infrastructure.hetzner-instance.extraFirewallRules = [
{
direction = "out";
protocol = "tcp";
port = 25;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "smtp";
}
{
direction = "out";
protocol = "tcp";
port = 80;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "http";
}
{
direction = "out";
protocol = "tcp";
port = 443;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "http";
}
{
direction = "out";
protocol = "udp";
port = 443;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "quic";
}
{
direction = "out";
protocol = "udp";
port = 53;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "dns";
}
{
direction = "out";
protocol = "tcp";
port = 53;
destination_ips = [
"0.0.0.0/0"
"::/0"
];
description = "dns";
}
];
khscodes.infrastructure.provisioning.pre.modules = [ khscodes.infrastructure.provisioning.pre.modules = [
{ {
khscodes.vault = { khscodes.vault = {
@ -25,6 +87,34 @@
}; };
} }
]; ];
services.prometheus.exporters.postfix = {
enable = true;
};
khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [ "postfix" ];
services.fail2ban.jails = {
postfix-sasl = {
settings = {
filter = "postfix[mode=auth]";
port = "smtp,submission,imap,imaps,pop3,pop3s";
findtime = 600;
maxretry = 5;
};
};
postfix = {
settings = {
enabled = true;
findtime = 600;
maxretry = 3;
};
};
dovecot = {
settings = {
enabled = true;
findtime = 600;
maxretry = 3;
};
};
};
mailserver = { mailserver = {
enable = true; enable = true;
fqdn = "kas.codes"; fqdn = "kas.codes";
@ -37,7 +127,7 @@
}; };
certificateScheme = "acme"; certificateScheme = "acme";
dkimKeyDirectory = "/var/lib/vault-agent/mailserver/dkim/"; dkimKeyDirectory = "/var/lib/vault-agent/mailserver/dkim/";
dkimSelector = "dkim_rsa"; dkimSelector = "snm_rsa";
# Not sure we need to set this at all. # Not sure we need to set this at all.
dkimKeyBits = 2048; dkimKeyBits = 2048;
}; };

View file

@ -1,3 +1,4 @@
{ pkgs, lib, ... }:
let let
publicKeyBegin = ''"-----BEGIN PUBLIC KEY-----\n"''; publicKeyBegin = ''"-----BEGIN PUBLIC KEY-----\n"'';
publicKeyEnd = ''"-----END PUBLIC KEY-----\n"''; publicKeyEnd = ''"-----END PUBLIC KEY-----\n"'';
@ -13,10 +14,19 @@ in
{{ .Data.data.dkim_private_key }} {{ .Data.data.dkim_private_key }}
{{- end -}} {{- end -}}
''; '';
destination = "/var/lib/vault-agent/mailserver/dkim/rsa_private.key"; destination = "/var/lib/vault-agent/mailserver/dkim/kas.codes.snm_rsa.key";
perms = "0600"; perms = "0600";
owner = "rspamd"; owner = "rspamd";
group = "rspamd"; group = "rspamd";
exec = lib.getExe (
pkgs.writeShellApplication {
name = "kas.codes.snm_rsa-remove-txt";
runtimeInputs = [ pkgs.uutils-coreutils-noprefix ];
text = ''
rm -f /var/lib/vault-agent/mailserver/dkim/kas.codes.snm_rsa.txt
'';
}
);
restartUnits = [ restartUnits = [
"rspamd.service" "rspamd.service"
"postfix.service" "postfix.service"
@ -28,10 +38,19 @@ in
{{ .Data.data.dkim_private_key }} {{ .Data.data.dkim_private_key }}
{{- end -}} {{- end -}}
''; '';
destination = "/var/lib/vault-agent/mailserver/dkim/ed25519_private.key"; destination = "/var/lib/vault-agent/mailserver/dkim/kas.codes.snm_ed25519.key";
perms = "0600"; perms = "0600";
owner = "rspamd"; owner = "rspamd";
group = "rspamd"; group = "rspamd";
exec = lib.getExe (
pkgs.writeShellApplication {
name = "kas.codes.snm_rsa-remove-txt";
runtimeInputs = [ pkgs.uutils-coreutils-noprefix ];
text = ''
rm -f /var/lib/vault-agent/mailserver/dkim/kas.codes.snm_ed25519.txt
'';
}
);
restartUnits = [ restartUnits = [
"rspamd.service" "rspamd.service"
"postfix.service" "postfix.service"

View file

@ -1,21 +1,20 @@
groups: - name: Http
- name: Http rules:
rules: - alert: NginxDown
- alert: NginxDown expr: >
expr: > nginx_up{job="nginx"} == 0
nginx_up{job="nginx"} == 0 for: 10m
for: 10m labels:
labels: severity: critical
severity: critical annotations:
annotations: summary: "Nginx on {{ $labels.instance }} is down"
summary: "Nginx on {{ $labels.instance }} is down" - alert: HighHttpErrorRate
- alert: HighHttpErrorRate expr: >
expr: > sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) /
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) / sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m]))
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m])) > 0
> 0 for: 30m
for: 30m labels:
labels: severity: critical
severity: critical annotations:
annotations: summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 1 hour"
summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 1 hour"

View file

@ -0,0 +1,18 @@
- name: Postfix
rules:
- alert: PostfixDown
expr: >
postfix_up{job="postfix"} == 0
for: 10m
labels:
severity: critical
annotations:
summary: "Postfix on {{ $labels.instance }} is down"
- alert: PostfixQueueNotEmptying
expr: >
postfix_showq_message_size_bytes_sum{job="postfix"} > 0
for: 10m
labels:
severity: critical
annotations:
summary: "Postfix queue {{ $labels.queue }} on {{ $labels.instance }} has been non-empty over 10m"

View file

@ -0,0 +1,18 @@
- name: Postgres
rules:
- alert: PgDown
expr: >
pg_up{job="postgres"} == 0
for: 10m
labels:
severity: critical
annotations:
summary: "Postgres on {{ $labels.instance }} is down"
- alert: PgScrapeError
expr: >
pg_exporter_last_scrape_error{job="postgres"} > 0
for: 10m
labels:
severity: warning
annotations:
summary: "Could not scrape postgres on {{ $labels.instance }}"

View file

@ -0,0 +1,10 @@
- name: Systemd
rules:
- alert: UnitFailed
expr: >
node_systemd_unit_state{job="integrations/node_exporter",state="failed"} == 1
for: 10m
labels:
severity: warn
annotations:
summary: "Unit {{ $labels.name}} on {{ $labels.instance }} is in failed state"

View file

@ -13,6 +13,10 @@ let
url = "https://grafana.com/api/dashboards/9628/revisions/8/download"; url = "https://grafana.com/api/dashboards/9628/revisions/8/download";
hash = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc="; hash = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc=";
}; };
postfixDashboard = pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/10013/revisions/2/download";
hash = "sha256-SIKL1V+sJ5F7vPOwp/LuOjrGm8nCsscEX8LcLFMotfc=";
};
in in
{ {
imports = [ imports = [
@ -102,6 +106,10 @@ in
name = "Postgresql"; name = "Postgresql";
options.path = postgresqlDashboard; options.path = postgresqlDashboard;
} }
{
name = "Postfix";
options.path = postfixDashboard;
}
]; ];
}; };
}; };
@ -111,7 +119,13 @@ in
extraFlags = [ "--web.enable-otlp-receiver" ]; extraFlags = [ "--web.enable-otlp-receiver" ];
# alertmanager.enable = true; # alertmanager.enable = true;
rules = [ rules = [
(builtins.readFile ./alerts/http.yaml) ''
groups:
${builtins.readFile ./alerts/http.yaml}
${builtins.readFile ./alerts/postfix.yaml}
${builtins.readFile ./alerts/postgres.yaml}
${builtins.readFile ./alerts/systemd.yaml}
''
]; ];
}; };
services.loki = { services.loki = {