diff --git a/nixos/configuration.nix b/nixos/configuration.nix index 2c56f95e..5ff89f23 100644 --- a/nixos/configuration.nix +++ b/nixos/configuration.nix @@ -21,6 +21,7 @@ ./imalison.nix ./internet-computer.nix ./k3s.nix + ./k3s-single-node.nix ./kat.nix ./keybase.nix ./kubelet.nix diff --git a/nixos/k3s-single-node.nix b/nixos/k3s-single-node.nix new file mode 100644 index 00000000..2630ccea --- /dev/null +++ b/nixos/k3s-single-node.nix @@ -0,0 +1,205 @@ +{ + pkgs, + config, + lib, + ... +}: +with lib; let + cfg = config.myModules.k3s-single-node; + plugins-path = pkgs.buildEnv { + name = "combined-cni-plugins"; + paths = [ + pkgs.cni-plugins + pkgs.cni-plugin-flannel + ]; + }; + + # Generic CDI Plugin DaemonSet for GPU resource allocation + generic-cdi-plugin-manifest = pkgs.writeText "generic-cdi-plugin.yaml" '' + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: generic-cdi-plugin + namespace: kube-system + labels: + app: generic-cdi-plugin + spec: + selector: + matchLabels: + app: generic-cdi-plugin + template: + metadata: + labels: + app: generic-cdi-plugin + spec: + nodeSelector: + nixos-nvidia-cdi: "enabled" + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + containers: + - name: generic-cdi-plugin + image: ghcr.io/olfillasodikno/generic-cdi-plugin:main + imagePullPolicy: Always + args: + - "/var/run/cdi/nvidia-container-toolkit.json" + securityContext: + privileged: true + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: pod-resources + mountPath: /var/lib/kubelet/pod-resources + - name: cdi-specs + mountPath: /var/run/cdi + readOnly: true + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: pod-resources + hostPath: + path: /var/lib/kubelet/pod-resources + - name: cdi-specs + hostPath: + path: /var/run/cdi + ''; + + # Test pod to verify GPU access + gpu-test-pod = pkgs.writeText "gpu-test-pod.yaml" '' + apiVersion: v1 + kind: Pod + metadata: + name: gpu-test + namespace: default + spec: + restartPolicy: Never + containers: + - name: cuda-test + image: nvidia/cuda:12.6.3-base-ubuntu24.04 + command: ["nvidia-smi"] + resources: + limits: + nvidia.com/gpu-all: 1 + ''; +in { + options = { + myModules.k3s-single-node = { + enable = mkEnableOption "single-node k3s with GPU/CDI support"; + extraFlags = mkOption { + type = lib.types.listOf lib.types.str; + default = []; + description = "Extra flags to pass to k3s"; + }; + }; + }; + + config = mkIf cfg.enable { + # NVIDIA container toolkit for CDI spec generation + hardware.nvidia-container-toolkit.enable = true; + + # Ensure CDI generator has access to nvidia libs + systemd.services.nvidia-container-toolkit-cdi-generator = { + environment.LD_LIBRARY_PATH = "${config.hardware.nvidia.package}/lib"; + }; + + # k3s configuration + services.k3s = { + enable = true; + role = "server"; + clusterInit = true; + + configPath = pkgs.writeTextFile { + name = "k3s-config.yaml"; + text = '' + # Disable servicelb and traefik for a minimal single-node setup + disable: + - servicelb + - traefik + kubelet-arg: + - "eviction-hard=nodefs.available<2Gi" + - "eviction-soft=nodefs.available<5Gi" + - "eviction-soft-grace-period=nodefs.available=5m" + ''; + }; + + extraFlags = [ + "--node-label=nixos-nvidia-cdi=enabled" + "--tls-san=${config.networking.hostName}" + "--tls-san=${config.networking.hostName}.local" + "--tls-san=localhost" + ] ++ cfg.extraFlags; + + # Containerd config with CDI support + # k3s 1.31+ with containerd 2.0 has CDI enabled by default + # We only need to add the nvidia runtime configuration + containerdConfigTemplate = '' + {{ template "base" . }} + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "/run/current-system/sw/bin/nvidia-container-runtime.cdi" + ''; + + gracefulNodeShutdown.enable = true; + }; + + # Make nvidia-container-toolkit available in system PATH + environment.systemPackages = with pkgs; [ + nvidia-container-toolkit + nvidia-container-toolkit.tools + kubectl + kubernetes-helm + ]; + + # Symlink nvidia-container-runtime.cdi to system path + environment.etc."profile.d/k3s-gpu.sh".text = '' + export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + ''; + + # Create systemd service to deploy the generic-cdi-plugin after k3s is ready + systemd.services.k3s-gpu-plugin-deploy = { + description = "Deploy generic-cdi-plugin to k3s"; + after = [ "k3s.service" ]; + wants = [ "k3s.service" ]; + wantedBy = [ "multi-user.target" ]; + path = [ pkgs.kubectl pkgs.coreutils ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = pkgs.writeShellScript "deploy-cdi-plugin" '' + export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + + # Wait for k3s API to be ready + echo "Waiting for k3s API server..." + for i in $(seq 1 60); do + if kubectl get nodes &>/dev/null; then + echo "k3s API server is ready" + break + fi + sleep 5 + done + + # Check if plugin already exists + if kubectl get daemonset -n kube-system generic-cdi-plugin &>/dev/null; then + echo "generic-cdi-plugin already deployed, updating..." + kubectl apply -f ${generic-cdi-plugin-manifest} + else + echo "Deploying generic-cdi-plugin..." + kubectl apply -f ${generic-cdi-plugin-manifest} + fi + + echo "Waiting for generic-cdi-plugin to be ready..." + kubectl rollout status daemonset/generic-cdi-plugin -n kube-system --timeout=120s || true + ''; + }; + }; + + # Store test manifests in /etc for easy access + environment.etc."k3s/gpu-test-pod.yaml".source = gpu-test-pod; + environment.etc."k3s/generic-cdi-plugin.yaml".source = generic-cdi-plugin-manifest; + }; +}