From 9fbfd0ce02e3331859c765ca6df248651d789b16 Mon Sep 17 00:00:00 2001 From: Kaare Hoff Skovgaard Date: Sun, 10 Aug 2025 22:01:27 +0200 Subject: [PATCH] Attempt to improve zfs setup a bit --- nix/modules/nixos/fs/zfs/default.nix | 197 ++++++++++++++++----------- rust/program/zpool-setup/src/main.rs | 97 +++++++++---- rust/program/zpool-setup/src/zfs.rs | 19 ++- 3 files changed, 211 insertions(+), 102 deletions(-) diff --git a/nix/modules/nixos/fs/zfs/default.nix b/nix/modules/nixos/fs/zfs/default.nix index bf4e59d..1b519c0 100644 --- a/nix/modules/nixos/fs/zfs/default.nix +++ b/nix/modules/nixos/fs/zfs/default.nix @@ -120,85 +120,126 @@ in "${cfg.mainPoolName}" = { }; }; }; - }; - config = lib.mkIf cfg.enable { - # TODO: Verify that each member disk is uniquely named, and exists somewhere? - assertions = lib.lists.map ( - { name, value }: - { - assertion = (lib.lists.length value.vdevs) > 0; - message = "Zpool ${name} contains no vdevs"; - } - ) (lib.attrsToList cfg.zpools); - boot.supportedFilesystems = { - zfs = true; - }; - # On servers, we handle importing, creating and mounting of the pool manually. - boot.zfs = { - forceImportRoot = false; - requestEncryptionCredentials = false; - }; - services.zfs.autoScrub.enable = true; - systemd.services.zfs-mount.enable = false; - systemd.services.zfs-import-zroot.enable = false; - systemd.services.khscodes-zpool-setup = { - after = [ - "network-online.target" - ]; - wants = [ - "network-online.target" - ]; - wantedBy = [ - "multi-user.target" - ]; - environment = { - BAO_ADDR = config.khscodes.services.vault-agent.vault.address; - VAULT_ROLE_ID_FILE = "/var/lib/vault-agent/role-id"; - VAULT_SECRET_ID_FILE = "/var/lib/vault-agent/secret-id"; - DISK_MAPPING_FILE = "/run/secret/disk-mapping.json"; - LOGLEVEL = "trace"; - } - // (lib.attrsets.optionalAttrs isTest { - ZFS_TEST = "true"; - }); - unitConfig.ConditionPathExists = [ - "/run/secret/disk-mapping.json" - ] - ++ lib.lists.optionals (!isTest) [ - "/var/lib/vault-agent/role-id" - "/var/lib/vault-agent/secret-id" - ]; - serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - ExecStart = '' - ${lib.strings.concatStringsSep "\n" setupZpools} - ''; + services = { + postgresql = { + enable = lib.option { + description = "Enables storing postgresql data on a zfs zpool"; + type = lib.types.bool; + default = cfg.enable && config.services.postgresql.enable; + }; + pool = lib.mkOption { + type = lib.types.str; + default = cfg.mainPoolName; + }; + datasetName = lib.mkOption { + type = lib.types.str; + default = "database/postgresql"; + }; + datasetConfig = lib.mkOption { + type = datasetModule; + default = { + mountpoint = config.services.postgresql.dataDir; + }; + }; }; }; - khscodes.infrastructure.vault-server-approle.policy = lib.mapAttrs' (name: value: { - name = "${value.encryptionKeyOpenbao.mount}/data/${value.encryptionKeyOpenbao.name}"; - value = { - capabilities = [ "read" ]; - }; - }) cfg.zpools; - # Reading the disk setup through anopenbao secret allows - # the service to be restarted when adding new disks, or resizing existing disks. - khscodes.services.vault-agent.templates = [ - { - contents = '' - {{- with secret "data-disks/data/${config.khscodes.networking.fqdn}" -}} - {{ .Data.data | toUnescapedJSON }} - {{- end -}} - ''; - destination = "/run/secret/disk-mapping.json"; - owner = "root"; - group = "root"; - perms = "0644"; - restartUnits = [ "khscodes-zpool-setup.service" ]; - } - ]; - services.prometheus.exporters.zfs.enable = true; - khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [ "zfs" ]; }; + config = lib.mkMerge [ + (lib.mkIf cfg.enable { + # TODO: Verify that each member disk is uniquely named, and exists somewhere? + assertions = lib.lists.map ( + { name, value }: + { + assertion = (lib.lists.length value.vdevs) > 0; + message = "Zpool ${name} contains no vdevs"; + } + ) (lib.attrsToList cfg.zpools); + boot.supportedFilesystems = { + zfs = true; + }; + # On servers, we handle importing, creating and mounting of the pool manually. + boot.zfs = { + forceImportRoot = false; + requestEncryptionCredentials = false; + }; + services.zfs.autoScrub.enable = true; + systemd.services.zfs-mount.enable = false; + systemd.services.zfs-import-zroot.enable = false; + systemd.services.khscodes-zpool-setup = { + after = [ + "network-online.target" + ]; + wants = [ + "network-online.target" + ]; + wantedBy = [ + "multi-user.target" + ]; + environment = { + BAO_ADDR = config.khscodes.services.vault-agent.vault.address; + VAULT_ROLE_ID_FILE = "/var/lib/vault-agent/role-id"; + VAULT_SECRET_ID_FILE = "/var/lib/vault-agent/secret-id"; + DISK_MAPPING_FILE = "/run/secret/disk-mapping.json"; + LOGLEVEL = "trace"; + } + // (lib.attrsets.optionalAttrs isTest { + ZFS_TEST = "true"; + }); + unitConfig.ConditionPathExists = [ + "/run/secret/disk-mapping.json" + ] + ++ lib.lists.optionals (!isTest) [ + "/var/lib/vault-agent/role-id" + "/var/lib/vault-agent/secret-id" + ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = '' + ${lib.strings.concatStringsSep "\n" setupZpools} + ''; + }; + }; + khscodes.infrastructure.vault-server-approle.policy = lib.mapAttrs' (name: value: { + name = "${value.encryptionKeyOpenbao.mount}/data/${value.encryptionKeyOpenbao.name}"; + value = { + capabilities = [ "read" ]; + }; + }) cfg.zpools; + # Reading the disk setup through anopenbao secret allows + # the service to be restarted when adding new disks, or resizing existing disks. + khscodes.services.vault-agent.templates = [ + { + contents = '' + {{- with secret "data-disks/data/${config.khscodes.networking.fqdn}" -}} + {{ .Data.data | toUnescapedJSON }} + {{- end -}} + ''; + destination = "/run/secret/disk-mapping.json"; + owner = "root"; + group = "root"; + perms = "0644"; + restartUnits = [ "khscodes-zpool-setup.service" ]; + } + ]; + services.prometheus.exporters.zfs.enable = true; + khscodes.infrastructure.vault-prometheus-sender.exporters.enabled = [ "zfs" ]; + }) + (lib.mkIf (cfg.enable && cfg.services.postgresql.enable) { + khscodes.fs.zfs.zpools."${cfg.services.postgresql.pool + }".datasets."${cfg.services.postgresql.datasetName}" = + cfg.services.postgresql.datasetConfig; + systemd.services.postgresql = { + after = [ "khscodes-zpool-setup.service" ]; + unitConfig = { + RequiresMountsFor = cfg.services.postgresql.datasetConfig.mountpoint; + }; + }; + systemd.services.khscodes-zpool-setup = { + ExecStartPost = '' + chown ${config.services.postgresql.user}:${config.services.postgresql.group} ${lib.escapeShellArg cfg.services.postgresql.datasetConfig.mountpoint} + ''; + }; + }) + ]; } diff --git a/rust/program/zpool-setup/src/main.rs b/rust/program/zpool-setup/src/main.rs index b7bdf0f..fea25f3 100644 --- a/rust/program/zpool-setup/src/main.rs +++ b/rust/program/zpool-setup/src/main.rs @@ -1,10 +1,13 @@ use serde::Deserialize; -use std::{collections::BTreeMap, path::PathBuf}; +use std::{ + collections::{BTreeMap, HashMap}, + path::{Path, PathBuf}, +}; use anyhow::Context as _; use clap::{Parser, Subcommand}; -use crate::cli::Dataset; +use crate::cli::{Dataset, VdevMode}; mod cli; mod disk_mapping; @@ -158,23 +161,52 @@ enum ZpoolState { #[serde(tag = "vdev_type")] enum ZpoolStatusVdev { #[serde(rename = "root")] - Root { - name: String, - state: ZpoolState, - vdevs: HashMap, - }, + Root(ZpoolStatusVdevRoot), #[serde(rename = "disk")] - Disk { - name: String, - state: ZpoolState, - path: PathBuf, - }, + Disk(ZpoolStatusVdevDisk), #[serde(rename = "mirror")] - Mirror { - name: String, - state: ZpoolState, - vdevs: HashMap, + #[allow(dead_code)] + Mirror(ZpoolStatusVdevMirror), +} + +impl ZpoolStatusVdev { + pub fn as_root(&self) -> anyhow::Result<&ZpoolStatusVdevRoot> { + match self { + Self::Root(root) => Ok(root), + _ => Err(anyhow::format_err!("VDev was not a root vdev")), + } } + pub fn is_vdev_for_disk(&self, disk_path: &Path) -> bool { + matches!(self, Self::Disk(disk) if disk.path == disk_path) + } +} + +#[derive(Deserialize)] +struct ZpoolStatusVdevRoot { + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + state: ZpoolState, + vdevs: HashMap, +} + +#[derive(Deserialize)] +struct ZpoolStatusVdevDisk { + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + state: ZpoolState, + path: PathBuf, +} + +#[derive(Deserialize)] +struct ZpoolStatusVdevMirror { + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + state: ZpoolState, + #[allow(dead_code)] + vdevs: HashMap, } fn setup_zpool(p: SetupZpool) -> anyhow::Result<()> { @@ -208,15 +240,34 @@ fn setup_zpool(p: SetupZpool) -> anyhow::Result<()> { { return Err(anyhow::format_err!("Zpool {} is not online", p.pool_name)); } - - // TODO: Run through the existing VDevs and add any missing vdevs, and add any missing disks - // as needed to any vdevs. Not exactly sure how this should be coded, but I guess we can utilize - // the fact we cannot really change vdev type beyond turning a disk vdev into a mirror vdev, - // and any single disk can only belong to one vdev. So we can simply not support moving disks between vdevs. - // Also, to begin with, we can simply not support any vdev other than disk vdevs, as it doesn't make much - // sense for my use case. + + let root_vdev = pool + .vdevs + .get(&p.pool_name) + .ok_or_else(|| anyhow::format_err!("Root vdev of pool not found"))?; + let root_vdev = root_vdev.as_root()?; for vdev in p.vdevs.iter() { + if vdev.mode != VdevMode::Mirror { + return Err(anyhow::format_err!( + "Vdev contains non mirror mode vdev, this is currently not supported" + )); + } + if vdev.members.len() != 1 { + return Err(anyhow::format_err!( + "Vdev contains more than one member, this is currently not supported" + )); + } + let main_member = &vdev.members[0]; + let resolved_main_member = disk_mapping.resolve(main_member)?; + let resolved_main_member = PathBuf::from(resolved_main_member); + if !root_vdev + .vdevs + .iter() + .any(|(_, vdev)| vdev.is_vdev_for_disk(&resolved_main_member)) + { + zfs::add_vdev_to_pool(&p.pool_name, &disk_mapping, vdev)?; + } for member in vdev.members.iter() { let resolved = disk_mapping.resolve(member)?; zfs::resize_disk(&p.pool_name, &resolved)?; diff --git a/rust/program/zpool-setup/src/zfs.rs b/rust/program/zpool-setup/src/zfs.rs index 6689aae..1bb60ba 100644 --- a/rust/program/zpool-setup/src/zfs.rs +++ b/rust/program/zpool-setup/src/zfs.rs @@ -4,7 +4,11 @@ use anyhow::Context as _; use common::proc::Command; use serde::Deserialize; -use crate::{SetupZpool, cli::Dataset, disk_mapping::DiskMapping}; +use crate::{ + SetupZpool, + cli::{Dataset, Vdev}, + disk_mapping::DiskMapping, +}; #[derive(Debug, Deserialize, PartialEq)] enum ZpoolState { @@ -88,6 +92,19 @@ pub fn create_pool( Ok(()) } +pub fn add_vdev_to_pool( + pool_name: &str, + disk_mapping: &DiskMapping, + vdev: &Vdev, +) -> anyhow::Result<()> { + let mut proc = Command::new("zpool"); + proc.args(["add", pool_name]); + proc.args(vdev.cli_args(disk_mapping)?); + + proc.try_spawn_to_bytes()?; + Ok(()) +} + pub fn create_dataset_recursive( pool_name: &str, dataset_name: &str,