Begin adding some alerts and add postgres exporter
This commit is contained in:
parent
0233431feb
commit
e28f501316
7 changed files with 301 additions and 62 deletions
|
@ -11,6 +11,7 @@ let
|
|||
client_cert = "/var/lib/alloy/prometheus.cert";
|
||||
in
|
||||
{
|
||||
imports = [ ./postgres.nix ];
|
||||
options.khscodes.infrastructure.vault-prometheus-sender = {
|
||||
enable = lib.mkEnableOption "Configures the server approle to allow sending data to prometheus";
|
||||
terranixBackendName = lib.mkOption {
|
||||
|
@ -18,6 +19,11 @@ in
|
|||
description = "This should only be configured for the server hosting vault, to allow setting up dependencies in terraform";
|
||||
default = "prometheus-mtls";
|
||||
};
|
||||
exporters.enabled = lib.mkOption {
|
||||
type = lib.types.listOf lib.types.str;
|
||||
default = [ ];
|
||||
description = "List of config.services.prometheus.exporters.<name> that are enabled. This is not done automatically as I don't know how to do that without triggering removed options warnings.";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
|
@ -76,8 +82,35 @@ in
|
|||
PROMETHEUS_CLIENT_CERT = client_cert;
|
||||
};
|
||||
};
|
||||
environment.etc."alloy/prometheus.alloy" = {
|
||||
source = ./prometheus.alloy;
|
||||
};
|
||||
environment.etc =
|
||||
{
|
||||
"alloy/prometheus.alloy" = {
|
||||
source = ./prometheus.alloy;
|
||||
};
|
||||
}
|
||||
// lib.listToAttrs (
|
||||
lib.lists.map (
|
||||
name:
|
||||
let
|
||||
value = config.services.prometheus.exporters.${name};
|
||||
in
|
||||
{
|
||||
name = "alloy/prometheus_${name}.alloy";
|
||||
value = {
|
||||
text = ''
|
||||
prometheus.scrape "exporter_${name}" {
|
||||
targets = [
|
||||
{"__address__" = "127.0.0.1:${toString value.port}", "instance" = constants.hostname, "job" = "${name}"},
|
||||
]
|
||||
|
||||
scrape_interval = "1m"
|
||||
|
||||
forward_to = [otelcol.receiver.prometheus.default.receiver]
|
||||
}
|
||||
'';
|
||||
};
|
||||
}
|
||||
) cfg.exporters.enabled
|
||||
);
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
{
|
||||
lib,
|
||||
config,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
sql = pkgs.writeText "create-postgres-exporter.sql" ''
|
||||
-- To use IF statements, hence to be able to check if the user exists before
|
||||
-- attempting creation, we need to switch to procedural SQL (PL/pgSQL)
|
||||
-- instead of standard SQL.
|
||||
-- More: https://www.postgresql.org/docs/9.3/plpgsql-overview.html
|
||||
-- To preserve compatibility with <9.0, DO blocks are not used; instead,
|
||||
-- a function is created and dropped.
|
||||
CREATE OR REPLACE FUNCTION __tmp_create_user() returns void as $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT -- SELECT list can stay empty for this
|
||||
FROM pg_catalog.pg_user
|
||||
WHERE usename = 'postgres_exporter') THEN
|
||||
CREATE USER postgres_exporter;
|
||||
END IF;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
|
||||
SELECT __tmp_create_user();
|
||||
DROP FUNCTION __tmp_create_user();
|
||||
|
||||
ALTER USER postgres_exporter SET SEARCH_PATH TO postgres_exporter,pg_catalog;
|
||||
|
||||
-- If deploying as non-superuser (for example in AWS RDS), uncomment the GRANT
|
||||
-- line below and replace <MASTER_USER> with your root user.
|
||||
-- GRANT postgres_exporter TO <MASTER_USER>;
|
||||
|
||||
GRANT CONNECT ON DATABASE postgres TO postgres_exporter;
|
||||
GRANT pg_monitor to postgres_exporter;
|
||||
'';
|
||||
script = pkgs.writeShellApplication {
|
||||
name = "postgresql-create-postgres-exporter";
|
||||
runtimeInputs = [ config.services.postgresql.package ];
|
||||
text = ''
|
||||
PSQL="psql --port=${builtins.toString config.services.postgresql.settings.port}"
|
||||
|
||||
while ! $PSQL -d postgres -c "" 2> /dev/null; do
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
$PSQL -tAf ${sql}
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
config =
|
||||
lib.mkIf
|
||||
(config.khscodes.infrastructure.vault-prometheus-sender.enable && config.services.postgresql.enable)
|
||||
{
|
||||
khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [ "postgres" ];
|
||||
services.prometheus.exporters.postgres = {
|
||||
enable = true;
|
||||
dataSourceName = "user=postgres_exporter host=/run/postgresql database=postgres sslmode=disable";
|
||||
user = "postgres_exporter";
|
||||
group = "postgres_exporter";
|
||||
};
|
||||
users.users.postgres_exporter = {
|
||||
enable = true;
|
||||
group = "postgres_exporter";
|
||||
isSystemUser = true;
|
||||
};
|
||||
users.groups.postgres_exporter = { };
|
||||
systemd.services.postgresql-create-postgres-exporter = {
|
||||
enable = true;
|
||||
after = [ "postgresql.service" ];
|
||||
requires = [ "postgresql.service" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
before = [ "prometheus-postgres-exporter.service" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = [
|
||||
(lib.getExe script)
|
||||
];
|
||||
User = "postgres";
|
||||
Group = "postgres";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
|
@ -50,7 +50,7 @@ prometheus.exporter.unix "integrations_node_exporter" {
|
|||
|
||||
// Define how to scrape metrics from the node_exporter
|
||||
prometheus.scrape "integrations_node_exporter" {
|
||||
scrape_interval = "1m"
|
||||
scrape_interval = "1m"
|
||||
// Use the targets with labels from the discovery.relabel component
|
||||
targets = discovery.relabel.integrations_node_exporter.output
|
||||
// Send the scraped metrics to the relabeling component
|
||||
|
|
|
@ -5,13 +5,44 @@
|
|||
modulesPath,
|
||||
...
|
||||
}:
|
||||
# TODO: Enable and configure prometheus-nginx-exporter and prometheus-nginxlog-exporter
|
||||
# to get some metrics into prometheus.
|
||||
let
|
||||
cfg = config.khscodes.services.nginx;
|
||||
locationOptions = import "${modulesPath}/services/web-servers/nginx/location-options.nix" {
|
||||
inherit lib config;
|
||||
};
|
||||
logfmt = ''
|
||||
log_format logfmt escape=default
|
||||
'ts="$time_iso8601" http_host="$http_host" request_method="$request_method" '
|
||||
'uri="$request_uri" ' # full path
|
||||
'status=$status ' # response status
|
||||
'remote_addr="$remote_addr" remote_port="$remote_port" ' # remote ip and port (port should be dynamic)
|
||||
'http_user_agent="$http_user_agent" '
|
||||
'http_referer="$http_referer" '
|
||||
'http_x_forwarded_for="$http_x_forwarded_for" '
|
||||
'server_name="$server_name" ' # the name of the vhost serving the request, may be different from http_host
|
||||
'scheme="$scheme" ' # http/https
|
||||
'body_bytes_sent=$body_bytes_sent ' # the number of body bytes exclude headers sent to a client
|
||||
'bytes_sent=$bytes_sent ' # the number of bytes sent to a client
|
||||
'ssl_protocol="$ssl_protocol" ' # TLS protocol
|
||||
'ssl_cipher="$ssl_cipher" ' # TLS cipher
|
||||
'server_protocol="$server_protocol" ' # request protocol, like HTTP/1.1 or HTTP/2.0
|
||||
'remote_user="$remote_user" ' # http user name (should be unused for this setup)
|
||||
'pid="$pid" ' # process pid
|
||||
'connection="$connection" ' # connection serial number
|
||||
'connection_requests="$connection_requests" ' # number of requests made in connection
|
||||
'request_id="$request_id" ' # unique id for the request
|
||||
'request_length=$request_length ' # request length (including headers and body)
|
||||
'gzip_ratio="$gzip_ratio" '
|
||||
'request_time=$request_time ' # request processing time in seconds with msec resolutio
|
||||
'upstream="$upstream_addr" ' # upstream backend server for proxied requests
|
||||
'upstream_connect_time=$upstream_connect_time ' # upstream handshake time incl. TLS
|
||||
'upstream_header_time=$upstream_header_time ' # time spent receiving upstream headers
|
||||
'upstream_response_time=$upstream_response_time ' # time spent receiving upstream body
|
||||
'upstream_response_length=$upstream_response_length ' # upstream response length
|
||||
'upstream_cache_status="$upstream_cache_status" ' # cache HIT/MISS where applicable
|
||||
'msec=$msec' # unix timestamp with millis in fraction
|
||||
;
|
||||
'';
|
||||
vhostOption = lib.khscodes.mkSubmodule {
|
||||
description = "nginx vhost";
|
||||
options = {
|
||||
|
@ -133,12 +164,59 @@ in
|
|||
message = "Cannot use `config.khscodes.services.nginx.virtualHosts.<name>.acme = {}` without setting config.khscodes.security.acme.dns01Enabled";
|
||||
}
|
||||
];
|
||||
services.prometheus.exporters = {
|
||||
nginx = {
|
||||
enable = config.khscodes.infrastructure.vault-prometheus-sender.enable;
|
||||
};
|
||||
nginxlog = {
|
||||
enable = config.khscodes.infrastructure.vault-prometheus-sender.enable;
|
||||
settings = {
|
||||
namespaces = lib.lists.map (vhost: {
|
||||
name = vhost;
|
||||
format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$http_x_forwarded_for\" $upstream_response_time";
|
||||
metrics_override = {
|
||||
prefix = "";
|
||||
};
|
||||
namespace_label = "vhost";
|
||||
source = {
|
||||
files = [ "/var/log/nginx/access.${vhost}.log" ];
|
||||
};
|
||||
histogram_buckets = [
|
||||
0.005
|
||||
0.01
|
||||
0.025
|
||||
0.05
|
||||
0.1
|
||||
0.25
|
||||
0.5
|
||||
1
|
||||
2.5
|
||||
5
|
||||
10
|
||||
];
|
||||
}) (lib.attrsets.attrNames cfg.virtualHosts);
|
||||
};
|
||||
};
|
||||
};
|
||||
systemd.services.prometheus-nginxlog-exporter.serviceConfig.DynamicUser = false;
|
||||
khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [
|
||||
"nginx"
|
||||
"nginxlog"
|
||||
];
|
||||
users = {
|
||||
users.${config.services.prometheus.exporters.nginxlog.user} = {
|
||||
group = config.services.prometheus.exporters.nginxlog.group;
|
||||
extraGroups = [ config.services.nginx.group ];
|
||||
isSystemUser = true;
|
||||
};
|
||||
groups.${config.services.prometheus.exporters.nginxlog.user} = { };
|
||||
};
|
||||
services.fail2ban.jails = {
|
||||
nginx-botsearch = {
|
||||
settings = {
|
||||
filter = "nginx-botsearch";
|
||||
port = "http,https";
|
||||
logpath = "/var/log/nginx/access.log";
|
||||
logpath = "/var/log/nginx/access.fail2ban.log";
|
||||
backend = "auto";
|
||||
findtime = 600;
|
||||
maxretry = 5;
|
||||
|
@ -148,10 +226,10 @@ in
|
|||
settings = {
|
||||
filter = "nginx-bad-request";
|
||||
port = "http,https";
|
||||
logpath = "/var/log/nginx/access.log";
|
||||
logpath = "/var/log/nginx/access.fail2ban.log";
|
||||
backend = "auto";
|
||||
findtime = 600;
|
||||
maxretry = 2;
|
||||
maxretry = 30;
|
||||
};
|
||||
};
|
||||
nginx-req-limit = {
|
||||
|
@ -160,7 +238,7 @@ in
|
|||
port = "http,https";
|
||||
backend = "systemd";
|
||||
findtime = 600;
|
||||
maxretry = 3;
|
||||
maxretry = 5;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
@ -190,6 +268,18 @@ in
|
|||
recommendedOptimisation = lib.mkDefault true;
|
||||
recommendedZstdSettings = lib.mkDefault true;
|
||||
recommendedProxySettings = lib.mkDefault true;
|
||||
commonHttpConfig = ''
|
||||
${logfmt}
|
||||
access_log /var/log/nginx/access.logfmt.log logfmt;
|
||||
|
||||
log_format fail2ban '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'$status $body_bytes_sent "$http_referer" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
||||
log_format nginx_exporter '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'$status $body_bytes_sent "$http_referer" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for" $upstream_response_time';
|
||||
access_log /var/log/nginx/access.fail2ban.log fail2ban;
|
||||
'';
|
||||
appendHttpConfig = ''
|
||||
limit_req_zone $binary_remote_addr zone=nobots:10m rate=20r/s;
|
||||
map $scheme $hsts_header {
|
||||
|
@ -210,52 +300,6 @@ in
|
|||
"" "Unknown";
|
||||
}
|
||||
|
||||
log_format json_analytics escape=json '{'
|
||||
'"msec": "$msec", ' # request unixtime in seconds with a milliseconds resolution
|
||||
'"connection": "$connection", ' # connection serial number
|
||||
'"connection_requests": "$connection_requests", ' # number of requests made in connection
|
||||
'"pid": "$pid", ' # process pid
|
||||
'"request_id": "$request_id", ' # the unique request id
|
||||
'"request_length": "$request_length", ' # request length (including headers and body)
|
||||
'"remote_addr": "$remote_addr", ' # client IP
|
||||
'"remote_user": "$remote_user", ' # client HTTP username
|
||||
'"remote_port": "$remote_port", ' # client port
|
||||
'"time_local": "$time_local", '
|
||||
'"time_iso8601": "$time_iso8601", ' # local time in the ISO 8601 standard format
|
||||
'"request": "$request", ' # full path no arguments if the request
|
||||
'"request_uri": "$request_uri", ' # full path and arguments if the request
|
||||
'"args": "$args", ' # args
|
||||
'"status": "$status", ' # response status code
|
||||
'"body_bytes_sent": "$body_bytes_sent", ' # the number of body bytes exclude headers sent to a client
|
||||
'"bytes_sent": "$bytes_sent", ' # the number of bytes sent to a client
|
||||
'"http_referer": "$http_referer", ' # HTTP referer
|
||||
'"http_user_agent": "$http_user_agent", ' # user agent
|
||||
'"http_x_forwarded_for": "$http_x_forwarded_for", ' # http_x_forwarded_for
|
||||
'"http_host": "$http_host", ' # the request Host: header
|
||||
'"server_name": "$server_name", ' # the name of the vhost serving the request
|
||||
'"request_time": "$request_time", ' # request processing time in seconds with msec resolution
|
||||
'"upstream": "$upstream_addr", ' # upstream backend server for proxied requests
|
||||
'"upstream_connect_time": "$upstream_connect_time", ' # upstream handshake time incl. TLS
|
||||
'"upstream_header_time": "$upstream_header_time", ' # time spent receiving upstream headers
|
||||
'"upstream_response_time": "$upstream_response_time", ' # time spent receiving upstream body
|
||||
'"upstream_response_length": "$upstream_response_length", ' # upstream response length
|
||||
'"upstream_cache_status": "$upstream_cache_status", ' # cache HIT/MISS where applicable
|
||||
'"ssl_protocol": "$ssl_protocol", ' # TLS protocol
|
||||
'"ssl_cipher": "$ssl_cipher", ' # TLS cipher
|
||||
'"scheme": "$scheme", ' # http or https
|
||||
'"request_method": "$request_method", ' # request method
|
||||
'"server_protocol": "$server_protocol", ' # request protocol, like HTTP/1.1 or HTTP/2.0
|
||||
'"pipe": "$pipe", ' # "p" if request was pipelined, "." otherwise
|
||||
'"gzip_ratio": "$gzip_ratio"'
|
||||
'}';
|
||||
|
||||
access_log /var/log/nginx/access.json.log json_analytics;
|
||||
|
||||
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'$status $body_bytes_sent "$http_referer" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
||||
access_log /var/log/nginx/access.log main;
|
||||
|
||||
${modernSslAppendedHttpConfig}
|
||||
'';
|
||||
virtualHosts = lib.attrsets.mapAttrs (
|
||||
|
@ -275,6 +319,9 @@ in
|
|||
extraConfig = ''
|
||||
${mtls}
|
||||
${reqLimit}
|
||||
access_log /var/log/nginx/access.fail2ban.log fail2ban;
|
||||
access_log /var/log/nginx/access.logfmt.log logfmt;
|
||||
access_log /var/log/nginx/access.${name}.log nginx_exporter;
|
||||
${value.extraConfig}
|
||||
'';
|
||||
in
|
||||
|
|
|
@ -12,21 +12,33 @@ loki_send "nginx_stream_error" {
|
|||
}
|
||||
loki.source.file "nginx_access_logs" {
|
||||
targets = [{
|
||||
__path__ = "/var/log/nginx/access.json.log",
|
||||
__path__ = "/var/log/nginx/access.logfmt.log",
|
||||
}]
|
||||
forward_to = [loki.process.nginx_access_logs.receiver]
|
||||
}
|
||||
|
||||
loki.process "nginx_access_logs" {
|
||||
forward_to = [loki_send.nginx_access.receiver]
|
||||
stage.json {
|
||||
expressions = {
|
||||
timestamp = "time_iso8601",
|
||||
stage.logfmt {
|
||||
mapping = {
|
||||
timestamp = "msec",
|
||||
http_host = "http_host",
|
||||
level = "status",
|
||||
}
|
||||
}
|
||||
stage.timestamp {
|
||||
source = "timestamp"
|
||||
format = "RFC3339"
|
||||
format = "Unix"
|
||||
}
|
||||
stage.template {
|
||||
source = "level"
|
||||
template = "{{$v := atoi .Value}}{{if gt $v 499}}error{{else if gt $v 399}}warn{{ else if lt $v 200}}debug{{else}}info{{end}}"
|
||||
}
|
||||
stage.structured_metadata {
|
||||
values = {
|
||||
level = "",
|
||||
http_host = "",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
groups:
|
||||
- name: Http
|
||||
rules:
|
||||
- alert: NginxDown
|
||||
expr: >
|
||||
nginx_up{job="nginx"} == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Nginx on {{ $labels.instance }} is down"
|
||||
- alert: HighHttpErrorRate
|
||||
expr: >
|
||||
sum by(vhost, instance) (rate(http_response_count_total{status=~"5..",job="nginxlog"}[1m])) /
|
||||
sum by(vhost, instance) (rate(http_response_count_total{job="nginxlog"}[1m]))
|
||||
> 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Nginx VHost {{ $labels.vhost }} on {{ $labels.instance }} is generating many internal server errors over 1 hour"
|
|
@ -1,12 +1,18 @@
|
|||
{
|
||||
inputs,
|
||||
config,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
grafana = config.services.grafana;
|
||||
loki = config.services.loki;
|
||||
prometheus = config.services.prometheus;
|
||||
nginxExporterSrc = "${pkgs.prometheus-nginx-exporter.src}/grafana/dashboard.json";
|
||||
postgresqlDashboard = pkgs.fetchurl {
|
||||
url = "https://grafana.com/api/dashboards/9628/revisions/8/download";
|
||||
hash = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc=";
|
||||
};
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
|
@ -54,16 +60,33 @@ in
|
|||
};
|
||||
provision = {
|
||||
enable = true;
|
||||
alerting = {
|
||||
rules = {
|
||||
settings = {
|
||||
deleteRules = [
|
||||
{
|
||||
uid = "desmw56u3jfgga";
|
||||
orgId = 1;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
datasources.settings.datasources = [
|
||||
{
|
||||
url = "http://${loki.configuration.server.http_listen_address}:${toString loki.configuration.server.http_listen_port}";
|
||||
type = "loki";
|
||||
name = "Logs";
|
||||
uid = "loki";
|
||||
}
|
||||
{
|
||||
url = "http://${prometheus.listenAddress}:${toString prometheus.port}";
|
||||
type = "prometheus";
|
||||
name = "Metrics";
|
||||
uid = "prometheus";
|
||||
jsonData = {
|
||||
manageAlerts = true;
|
||||
};
|
||||
}
|
||||
];
|
||||
dashboards.settings.providers = [
|
||||
|
@ -71,6 +94,14 @@ in
|
|||
name = "Node Exporter";
|
||||
options.path = ./grafana/dashboards/node_exporter;
|
||||
}
|
||||
{
|
||||
name = "Nginx";
|
||||
options.path = nginxExporterSrc;
|
||||
}
|
||||
{
|
||||
name = "Postgresql";
|
||||
options.path = postgresqlDashboard;
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
@ -78,6 +109,10 @@ in
|
|||
enable = true;
|
||||
listenAddress = "127.0.0.1";
|
||||
extraFlags = [ "--web.enable-otlp-receiver" ];
|
||||
# alertmanager.enable = true;
|
||||
rules = [
|
||||
(builtins.readFile ./alerts/http.yaml)
|
||||
];
|
||||
};
|
||||
services.loki = {
|
||||
enable = true;
|
||||
|
@ -130,16 +165,21 @@ in
|
|||
delete_request_store = "filesystem";
|
||||
working_directory = "${config.services.loki.dataDir}/retention";
|
||||
};
|
||||
limits_config = {
|
||||
allow_structured_metadata = true;
|
||||
discover_log_levels = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
khscodes = {
|
||||
infrastructure.khs-openstack-instance = {
|
||||
enable = true;
|
||||
flavor = "m.medium";
|
||||
flavor = "m.large";
|
||||
};
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
virtualHosts."monitoring.kaareskovgaard.net" = {
|
||||
rateLimit.enable = false;
|
||||
locations."/" = {
|
||||
proxyPass = "http://${grafana.settings.server.http_addr}:${toString grafana.settings.server.http_port}";
|
||||
proxyWebsockets = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue