{ pkgs, config, lib, ... }: with lib; let cfg = config.myModules.railbird-k3s; mount-path = "/var/lib/railbird/bucket"; bucket-name = "railbird-dev-videos"; plugins-path = pkgs.buildEnv { name = "combined-cni-plugins"; paths = [ pkgs.cni-plugins pkgs.calico-cni-plugin pkgs.calico-kube-controllers pkgs.cni-plugin-flannel ]; }; nvidia-device-plugin-version = "v0.19.1"; nvidia-device-plugin-manifest = pkgs.writeText "nvidia-device-plugin.yaml" '' apiVersion: node.k8s.io/v1 handler: nvidia kind: RuntimeClass metadata: name: nvidia labels: app.kubernetes.io/component: gpu-operator --- apiVersion: apps/v1 kind: DaemonSet metadata: name: nvidia-device-plugin-daemonset namespace: kube-system labels: app.kubernetes.io/name: nvidia-device-plugin spec: selector: matchLabels: app.kubernetes.io/name: nvidia-device-plugin updateStrategy: type: RollingUpdate template: metadata: labels: app.kubernetes.io/name: nvidia-device-plugin spec: runtimeClassName: nvidia priorityClassName: system-node-critical nodeSelector: nvidia.com/gpu.present: "true" tolerations: - key: nvidia.com/gpu operator: Exists effect: NoSchedule containers: - name: nvidia-device-plugin-ctr image: nvcr.io/nvidia/k8s-device-plugin:${nvidia-device-plugin-version} imagePullPolicy: IfNotPresent command: ["nvidia-device-plugin"] env: - name: DEVICE_ID_STRATEGY value: uuid - name: NVIDIA_VISIBLE_DEVICES value: all - name: NVIDIA_DRIVER_CAPABILITIES value: compute,utility securityContext: allowPrivilegeEscalation: false capabilities: drop: ["ALL"] volumeMounts: - name: kubelet-device-plugins-dir mountPath: /var/lib/kubelet/device-plugins - name: cdi-specs mountPath: /var/run/cdi readOnly: true volumes: - name: kubelet-device-plugins-dir hostPath: path: /var/lib/kubelet/device-plugins type: Directory - name: cdi-specs hostPath: path: /var/run/cdi type: DirectoryOrCreate ''; gpu-test-pod = pkgs.writeText "gpu-test-pod.yaml" '' apiVersion: v1 kind: Pod metadata: name: gpu-test namespace: default spec: restartPolicy: Never runtimeClassName: nvidia containers: - name: cuda-test image: nvcr.io/nvidia/cuda:12.6.3-base-ubuntu24.04 command: ["nvidia-smi"] resources: limits: nvidia.com/gpu: 1 ''; in { options = { myModules.railbird-k3s = { enable = mkEnableOption "railbird k3s"; serverAddr = mkOption { type = lib.types.str; default = ""; description = '' Existing k3s server URL to join. Leave empty for the first or only server; that enables cluster initialization with no peers. ''; }; extraFlags = mkOption { type = lib.types.listOf lib.types.str; default = []; }; }; }; config = mkIf cfg.enable { users.users.railbird = { isSystemUser = true; group = "users"; home = "/var/lib/railbird"; createHome = true; }; age.secrets."1896Folsom-k3s-token.age".file = ./secrets/1896Folsom-k3s-token.age; age.secrets."k3s-registry.yaml.age".file = ./secrets/k3s-registry.yaml.age; age.secrets.api-service-key = { file = ./secrets/api_service_account_key.json.age; owner = "railbird"; group = "users"; }; environment.etc."rancher/k3s/registries.yaml".source = config.age.secrets."k3s-registry.yaml.age".path; services.dockerRegistry = { enable = true; listenAddress = "0.0.0.0"; port = 5279; enableDelete = true; enableGarbageCollect = true; }; virtualisation.containerd = { enable = true; settings = { plugins."io.containerd.cri.v1.runtime" = { enable_cdi = true; cdi_spec_dirs = ["/var/run/cdi"]; }; plugins."io.containerd.grpc.v1.cri" = { enable_cdi = true; cdi_spec_dirs = ["/var/run/cdi"]; cni.bin_dir = "${plugins-path}/bin"; }; }; }; hardware.nvidia-container-toolkit = { enable = true; device-name-strategy = "uuid"; mount-nvidia-executables = true; }; virtualisation.containers = { containersConf.cniPlugins = [ pkgs.cni-plugins pkgs.calico-cni-plugin pkgs.calico-kube-controllers pkgs.cni-plugin-flannel ]; }; systemd.services = { # k3s can sit in sd_notify startup indefinitely while waiting for remote # etcd peers. Treat it as a long-running service so nixos-rebuild switch # does not block on cluster readiness. k3s.serviceConfig = { Type = mkForce "simple"; }; nvidia-container-toolkit-cdi-generator = { # Even with `--library-search-path`, `nvidia-ctk` won't find the libs # unless I bodge their path into the environment. environment.LD_LIBRARY_PATH = "${config.hardware.nvidia.package}/lib"; }; }; systemd.services.mount-railbird-bucket = { after = ["agenix.service"]; wantedBy = ["multi-user.target"]; description = "Mount railbird bucket"; serviceConfig = { Type = "simple"; RemainAfterExit = true; Restart = "on-failure"; # Restart the service on failure RestartSec = 5; # Wait 5 seconds before restarti TimeoutStopSec = 2; ExecStartPre = [ "-${pkgs.util-linux}/bin/umount -f ${mount-path}" "${pkgs.coreutils}/bin/mkdir -p ${mount-path}" "${pkgs.coreutils}/bin/chown railbird:users ${mount-path}" "${pkgs.coreutils}/bin/chmod 0775 ${mount-path}" ]; ExecStart = let key-file = config.age.secrets.api-service-key.path; in pkgs.writeShellScript "mount-railbird-bucket" '' while true; do if ${pkgs.util-linux}/bin/mount | grep -q "${mount-path}" && [ -d "${mount-path}/dev" ]; then echo "Mount path ${mount-path} is mounted and valid (contains directory 'dev')." else echo "Mount path is not valid or not mounted, attempting remount." ${pkgs.util-linux}/bin/umount -f "${mount-path}" || true ${pkgs.gcsfuse}/bin/gcsfuse --implicit-dirs --key-file "${key-file}" "${bucket-name}" "${mount-path}" fi echo "Sleeping" sleep 30 done ''; User = "root"; }; }; services.k3s = { enable = true; clusterInit = cfg.serverAddr == ""; serverAddr = cfg.serverAddr; configPath = pkgs.writeTextFile { name = "k3s-config.yaml"; text = '' kubelet-arg: - "eviction-hard=nodefs.available<2Gi" - "eviction-soft=nodefs.available<5Gi" - "eviction-soft-grace-period=nodefs.available=5m" ''; }; tokenFile = config.age.secrets."1896Folsom-k3s-token.age".path; extraFlags = [ "--tls-san ryzen-shine.local" "--tls-san nixquick.local" "--tls-san biskcomp.local" "--tls-san jimi-hendnix.local" "--tls-san dev.railbird.ai" "--disable=traefik" "--disable=servicelb" "--node-label nixos-nvidia-cdi=enabled" "--node-label nvidia.com/gpu.present=true" "--etcd-arg=quota-backend-bytes=8589934592" ] ++ cfg.extraFlags; containerdConfigTemplate = '' {{ template "base" . }} plugins."io.containerd.grpc.v1.cri".cdi_spec_dirs = [ "/var/run/cdi" ] plugins."io.containerd.grpc.v1.cri".enable_cdi = true [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] privileged_without_host_devices = false runtime_engine = "" runtime_root = "" runtime_type = "io.containerd.runc.v2" [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] BinaryName = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime.cdi" [debug] level = "trace" ''; gracefulNodeShutdown = { enable = true; }; }; environment.systemPackages = with pkgs; [ kubectl kubernetes-helm nvidia-container-toolkit nvidia-container-toolkit.tools ]; environment.etc."k3s/gpu-test-pod.yaml".source = gpu-test-pod; environment.etc."k3s/nvidia-device-plugin.yaml".source = nvidia-device-plugin-manifest; systemd.services.k3s-gpu-plugin-deploy = { description = "Deploy NVIDIA device plugin to k3s"; after = ["k3s.service"]; wants = ["k3s.service"]; wantedBy = ["multi-user.target"]; path = [pkgs.kubectl pkgs.coreutils]; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; ExecStart = pkgs.writeShellScript "deploy-nvidia-device-plugin" '' export KUBECONFIG=/etc/rancher/k3s/k3s.yaml echo "Waiting for k3s API server..." for i in $(seq 1 60); do if kubectl get nodes &>/dev/null; then echo "k3s API server is ready" break fi sleep 5 done kubectl delete daemonset -n kube-system generic-cdi-plugin --ignore-not-found=true kubectl apply -f ${nvidia-device-plugin-manifest} kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s || true ''; }; }; }; }