diff --git a/nix/modules/nixos/infrastructure/vault-prometheus-sender/default.nix b/nix/modules/nixos/infrastructure/vault-prometheus-sender/default.nix index faf09d0..df89a7a 100644 --- a/nix/modules/nixos/infrastructure/vault-prometheus-sender/default.nix +++ b/nix/modules/nixos/infrastructure/vault-prometheus-sender/default.nix @@ -19,10 +19,17 @@ in description = "This should only be configured for the server hosting vault, to allow setting up dependencies in terraform"; default = "prometheus-mtls"; }; - exporters.enabled = lib.mkOption { - type = lib.types.listOf lib.types.str; - default = [ ]; - description = "List of config.services.prometheus.exporters. that are enabled. This is not done automatically as I don't know how to do that without triggering removed options warnings."; + exporters = { + enabled = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ ]; + description = "List of config.services.prometheus.exporters. that are enabled. This is not done automatically as I don't know how to do that without triggering removed options warnings."; + }; + external = lib.mkOption { + type = lib.types.listOf lib.types.str; + default = [ "node_exporter" ]; + description = "Externally managed exporters. This should be the name of the job of that exporter. Adding it to this list will get monitoring of when the exporter goes down working"; + }; }; }; @@ -82,35 +89,34 @@ in PROMETHEUS_CLIENT_CERT = client_cert; }; }; - environment.etc = - { - "alloy/prometheus.alloy" = { - source = ./prometheus.alloy; - }; - } - // lib.listToAttrs ( - lib.lists.map ( - name: - let - value = config.services.prometheus.exporters.${name}; - in - { - name = "alloy/prometheus_${name}.alloy"; - value = { - text = '' - prometheus.scrape "exporter_${name}" { - targets = [ - {"__address__" = "127.0.0.1:${toString value.port}", "instance" = constants.hostname, "job" = "${name}"}, - ] + environment.etc = { + "alloy/prometheus.alloy" = { + source = ./prometheus.alloy; + }; + } + // lib.listToAttrs ( + lib.lists.map ( + name: + let + value = config.services.prometheus.exporters.${name}; + in + { + name = "alloy/prometheus_${name}.alloy"; + value = { + text = '' + prometheus.scrape "exporter_${name}" { + targets = [ + {"__address__" = "127.0.0.1:${toString value.port}", "instance" = constants.hostname, "job" = "${name}"}, + ] - scrape_interval = "1m" + scrape_interval = "1m" - forward_to = [otelcol.receiver.prometheus.default.receiver] - } - ''; - }; - } - ) cfg.exporters.enabled - ); + forward_to = [otelcol.receiver.prometheus.default.receiver] + } + ''; + }; + } + ) cfg.exporters.enabled + ); }; } diff --git a/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix b/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix index 7e4b274..26d70aa 100644 --- a/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix +++ b/nix/systems/aarch64-linux/kas.codes/forgejo/default.nix @@ -181,6 +181,7 @@ in }; users.groups.git = { }; + khscodes.infrastructure.vault-prometheus-sender.exporters.external = [ "forgejo" ]; environment.etc."alloy/forgejo_prometheus.alloy" = { text = '' prometheus.scrape "forgejo_exporter" { diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/job_up.nix b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/job_up.nix new file mode 100644 index 0000000..7ce7dac --- /dev/null +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/job_up.nix @@ -0,0 +1,37 @@ +{ inputs, lib }: +let + jobs = lib.attrsets.foldlAttrs ( + acc: name: nixos: + let + vault-prometheus-sender = nixos.config.khscodes.infrastructure.vault-prometheus-sender; + instance = nixos.config.khscodes.networking.fqdn; + instanceJobs = + vault-prometheus-sender.exporters.enabled ++ vault-prometheus-sender.exporters.external; + expr = lib.strings.concatMapStringsSep " or " ( + job: ''absent_over_time(up{instance="${instance}", job="${job}"}[2m])'' + ) instanceJobs; + in + acc + ++ ( + if vault-prometheus-sender.enable then + [ + expr + ] + else + [ ] + ) + ) [ ] inputs.self.nixosConfigurations; + exprs = lib.strings.concatStringsSep " or " jobs; +in +'' + - name: Scraping + rules: + - alert: JobDown + expr: > + ${exprs} + for: 10m + labels: + severity: critical + annotations: + summary: "Scrape job {{ $labels.job }} on {{ $labels.instance }} is down" +'' diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/job_up.yaml b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/alerts/job_up.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix index 6e21b2c..e9fc525 100644 --- a/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix +++ b/nix/systems/x86_64-linux/monitoring.kaareskovgaard.net/default.nix @@ -2,6 +2,7 @@ inputs, config, pkgs, + lib, ... }: let @@ -126,6 +127,7 @@ in ${builtins.readFile ./alerts/postfix.yaml} ${builtins.readFile ./alerts/postgres.yaml} ${builtins.readFile ./alerts/systemd.yaml} + ${import ./alerts/job_up.nix { inherit inputs lib; }} '' ]; };