diff --git a/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix b/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix index 6163202..3395289 100644 --- a/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix +++ b/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix @@ -70,6 +70,9 @@ in DEFAULT = { APP_NAME = "KAS: Codes"; }; + metrics = { + ENABLED = true; + }; server = rec { DOMAIN = "kas.codes"; ROOT_URL = "https://${DOMAIN}"; @@ -189,6 +192,9 @@ in locations."/" = { proxyPass = "http://localhost:3000"; }; + locations."/metrics" = { + return = "404"; + }; }; }; }; @@ -199,4 +205,19 @@ in useDefaultShell = true; }; users.groups.git = { }; + + environment.etc."alloy/forgejo_prometheus.alloy" = { + text = '' + prometheus.scrape "forgejo_exporter" { + scrape_interval = "1m" + targets = [ + { + "__address__" = "127.0.0.1:${toString config.services.forgejo.settings.server.HTTP_PORT}", + }, + ] + metrics_path = "/metrics" + forward_to = [otelcol.receiver.prometheus.default.receiver] + } + ''; + }; } diff --git a/nix/systems/aarch64-linux/kas.codes/mailserver/default.nix b/nix/systems/aarch64-linux/kas.codes/mailserver/default.nix index 88099df..9b4d5d8 100644 --- a/nix/systems/aarch64-linux/kas.codes/mailserver/default.nix +++ b/nix/systems/aarch64-linux/kas.codes/mailserver/default.nix @@ -10,6 +10,68 @@ capabilities = [ "read" ]; }; }; + khscodes.infrastructure.hetzner-instance.extraFirewallRules = [ + { + direction = "out"; + protocol = "tcp"; + port = 25; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "smtp"; + } + { + direction = "out"; + protocol = "tcp"; + port = 80; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "http"; + } + { + direction = "out"; + protocol = "tcp"; + port = 443; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "http"; + } + { + direction = "out"; + protocol = "udp"; + port = 443; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "quic"; + } + { + direction = "out"; + protocol = "udp"; + port = 53; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "dns"; + } + { + direction = "out"; + protocol = "tcp"; + port = 53; + destination_ips = [ + "0.0.0.0/0" + "::/0" + ]; + description = "dns"; + } + ]; khscodes.infrastructure.provisioning.pre.modules = [ { khscodes.vault = { @@ -25,6 +87,34 @@ }; } ]; + services.prometheus.exporters.postfix = { + enable = true; + }; + khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [ "postfix" ]; + services.fail2ban.jails = { + postfix-sasl = { + settings = { + filter = "postfix[mode=auth]"; + port = "smtp,submission,imap,imaps,pop3,pop3s"; + findtime = 600; + maxretry = 5; + }; + }; + postfix = { + settings = { + enabled = true; + findtime = 600; + maxretry = 3; + }; + }; + dovecot = { + settings = { + enabled = true; + findtime = 600; + maxretry = 3; + }; + }; + }; mailserver = { enable = true; fqdn = "kas.codes"; @@ -37,7 +127,7 @@ }; certificateScheme = "acme"; dkimKeyDirectory = "/var/lib/vault-agent/mailserver/dkim/"; - dkimSelector = "dkim_rsa"; + dkimSelector = "snm_rsa"; # Not sure we need to set this at all. dkimKeyBits = 2048; }; diff --git a/nix/systems/aarch64-linux/kas.codes/mailserver/dkim.nix b/nix/systems/aarch64-linux/kas.codes/mailserver/dkim.nix index 41f182d..f15022d 100644 --- a/nix/systems/aarch64-linux/kas.codes/mailserver/dkim.nix +++ b/nix/systems/aarch64-linux/kas.codes/mailserver/dkim.nix @@ -1,3 +1,4 @@ +{ pkgs, lib, ... }: let publicKeyBegin = ''"-----BEGIN PUBLIC KEY-----\n"''; publicKeyEnd = ''"-----END PUBLIC KEY-----\n"''; @@ -13,10 +14,19 @@ in {{ .Data.data.dkim_private_key }} {{- end -}} ''; - destination = "/var/lib/vault-agent/mailserver/dkim/rsa_private.key"; + destination = "/var/lib/vault-agent/mailserver/dkim/kas.codes.snm_rsa.key"; perms = "0600"; owner = "rspamd"; group = "rspamd"; + exec = lib.getExe ( + pkgs.writeShellApplication { + name = "kas.codes.snm_rsa-remove-txt"; + runtimeInputs = [ pkgs.uutils-coreutils-noprefix ]; + text = '' + rm -f /var/lib/vault-agent/mailserver/dkim/kas.codes.snm_rsa.txt + ''; + } + ); restartUnits = [ "rspamd.service" "postfix.service" @@ -28,10 +38,19 @@ in {{ .Data.data.dkim_private_key }} {{- end -}} ''; - destination = "/var/lib/vault-agent/mailserver/dkim/ed25519_private.key"; + destination = "/var/lib/vault-agent/mailserver/dkim/kas.codes.snm_ed25519.key"; perms = "0600"; owner = "rspamd"; group = "rspamd"; + exec = lib.getExe ( + pkgs.writeShellApplication { + name = "kas.codes.snm_rsa-remove-txt"; + runtimeInputs = [ pkgs.uutils-coreutils-noprefix ]; + text = '' + rm -f /var/lib/vault-agent/mailserver/dkim/kas.codes.snm_ed25519.txt + ''; + } + ); restartUnits = [ "rspamd.service" "postfix.service" diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/http.yaml b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/http.yaml index 279f264..bca404e 100644 --- a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/http.yaml +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/http.yaml @@ -1,21 +1,20 @@ -groups: - - name: Http - rules: - - alert: NginxDown - expr: > - nginx_up{job="nginx"} == 0 - for: 10m - labels: - severity: critical - annotations: - summary: "Nginx on {{ $labels.instance }} is down" - - alert: HighHttpErrorRate - expr: > - sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) / - sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m])) - > 0 - for: 30m - labels: - severity: critical - annotations: - summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 1 hour" +- name: Http + rules: + - alert: NginxDown + expr: > + nginx_up{job="nginx"} == 0 + for: 10m + labels: + severity: critical + annotations: + summary: "Nginx on {{ $labels.instance }} is down" + - alert: HighHttpErrorRate + expr: > + sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) / + sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m])) + > 0 + for: 30m + labels: + severity: critical + annotations: + summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 1 hour" diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postfix.yaml b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postfix.yaml new file mode 100644 index 0000000..4cc0f57 --- /dev/null +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postfix.yaml @@ -0,0 +1,18 @@ +- name: Postfix + rules: + - alert: PostfixDown + expr: > + postfix_up{job="postfix"} == 0 + for: 10m + labels: + severity: critical + annotations: + summary: "Postfix on {{ $labels.instance }} is down" + - alert: PostfixQueueNotEmptying + expr: > + postfix_showq_message_size_bytes_sum{job="postfix"} > 0 + for: 10m + labels: + severity: critical + annotations: + summary: "Postfix queue {{ $labels.queue }} on {{ $labels.instance }} has been non-empty over 10m" diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postgres.yaml b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postgres.yaml new file mode 100644 index 0000000..25fe18a --- /dev/null +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/postgres.yaml @@ -0,0 +1,18 @@ +- name: Postgres + rules: + - alert: PgDown + expr: > + pg_up{job="postgres"} == 0 + for: 10m + labels: + severity: critical + annotations: + summary: "Postgres on {{ $labels.instance }} is down" + - alert: PgScrapeError + expr: > + pg_exporter_last_scrape_error{job="postgres"} > 0 + for: 10m + labels: + severity: warning + annotations: + summary: "Could not scrape postgres on {{ $labels.instance }}" diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/systemd.yaml b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/systemd.yaml new file mode 100644 index 0000000..019eda2 --- /dev/null +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/systemd.yaml @@ -0,0 +1,10 @@ +- name: Systemd + rules: + - alert: UnitFailed + expr: > + node_systemd_unit_state{job="integrations/node_exporter",state="failed"} == 1 + for: 10m + labels: + severity: warn + annotations: + summary: "Unit {{ $labels.name}} on {{ $labels.instance }} is in failed state" diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix index ccd0ca3..162e350 100644 --- a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix @@ -13,6 +13,10 @@ let url = "https://grafana.com/api/dashboards/9628/revisions/8/download"; hash = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc="; }; + postfixDashboard = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/10013/revisions/2/download"; + hash = "sha256-SIKL1V+sJ5F7vPOwp/LuOjrGm8nCsscEX8LcLFMotfc="; + }; in { imports = [ @@ -102,6 +106,10 @@ in name = "Postgresql"; options.path = postgresqlDashboard; } + { + name = "Postfix"; + options.path = postfixDashboard; + } ]; }; }; @@ -111,7 +119,13 @@ in extraFlags = [ "--web.enable-otlp-receiver" ]; # alertmanager.enable = true; rules = [ - (builtins.readFile ./alerts/http.yaml) + '' + groups: + ${builtins.readFile ./alerts/http.yaml} + ${builtins.readFile ./alerts/postfix.yaml} + ${builtins.readFile ./alerts/postgres.yaml} + ${builtins.readFile ./alerts/systemd.yaml} + '' ]; }; services.loki = {