nixos: use nvidia device plugin for k3s gpu

This commit is contained in:
2026-05-07 14:54:21 -07:00
parent 1ae061da47
commit 291e497d63

View File

@@ -14,56 +14,74 @@ with lib; let
]; ];
}; };
# Generic CDI Plugin DaemonSet for GPU resource allocation nvidia-device-plugin-version = "v0.19.1";
generic-cdi-plugin-manifest = pkgs.writeText "generic-cdi-plugin.yaml" ''
nvidia-device-plugin-manifest = pkgs.writeText "nvidia-device-plugin.yaml" ''
apiVersion: node.k8s.io/v1
handler: nvidia
kind: RuntimeClass
metadata:
name: nvidia
labels:
app.kubernetes.io/component: gpu-operator
---
apiVersion: apps/v1 apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
name: generic-cdi-plugin name: nvidia-device-plugin-daemonset
namespace: kube-system namespace: kube-system
labels: labels:
app: generic-cdi-plugin app.kubernetes.io/name: nvidia-device-plugin
spec: spec:
selector: selector:
matchLabels: matchLabels:
app: generic-cdi-plugin app.kubernetes.io/name: nvidia-device-plugin
updateStrategy:
type: RollingUpdate
template: template:
metadata: metadata:
labels: labels:
app: generic-cdi-plugin app.kubernetes.io/name: nvidia-device-plugin
spec: spec:
runtimeClassName: nvidia
priorityClassName: system-node-critical
nodeSelector: nodeSelector:
nixos-nvidia-cdi: "enabled" nvidia.com/gpu.present: "true"
tolerations: tolerations:
- key: nvidia.com/gpu - key: nvidia.com/gpu
operator: Exists operator: Exists
effect: NoSchedule effect: NoSchedule
containers: containers:
- name: generic-cdi-plugin - name: nvidia-device-plugin-ctr
image: ghcr.io/olfillasodikno/generic-cdi-plugin:main image: nvcr.io/nvidia/k8s-device-plugin:${nvidia-device-plugin-version}
imagePullPolicy: Always imagePullPolicy: IfNotPresent
args: command: ["nvidia-device-plugin"]
- "/var/run/cdi/nvidia-container-toolkit.json" env:
- name: DEVICE_ID_STRATEGY
value: uuid
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
securityContext: securityContext:
privileged: true allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts: volumeMounts:
- name: device-plugin - name: kubelet-device-plugins-dir
mountPath: /var/lib/kubelet/device-plugins mountPath: /var/lib/kubelet/device-plugins
- name: pod-resources
mountPath: /var/lib/kubelet/pod-resources
- name: cdi-specs - name: cdi-specs
mountPath: /var/run/cdi mountPath: /var/run/cdi
readOnly: true readOnly: true
volumes: volumes:
- name: device-plugin - name: kubelet-device-plugins-dir
hostPath: hostPath:
path: /var/lib/kubelet/device-plugins path: /var/lib/kubelet/device-plugins
- name: pod-resources type: Directory
hostPath:
path: /var/lib/kubelet/pod-resources
- name: cdi-specs - name: cdi-specs
hostPath: hostPath:
path: /var/run/cdi path: /var/run/cdi
type: DirectoryOrCreate
''; '';
# Test pod to verify GPU access # Test pod to verify GPU access
@@ -75,13 +93,14 @@ with lib; let
namespace: default namespace: default
spec: spec:
restartPolicy: Never restartPolicy: Never
runtimeClassName: nvidia
containers: containers:
- name: cuda-test - name: cuda-test
image: nvidia/cuda:12.6.3-base-ubuntu24.04 image: nvidia/cuda:12.6.3-base-ubuntu24.04
command: ["nvidia-smi"] command: ["nvidia-smi"]
resources: resources:
limits: limits:
nvidia.com/gpu-all: 1 nvidia.com/gpu: 1
''; '';
in { in {
options = { options = {
@@ -97,7 +116,11 @@ in {
config = mkIf cfg.enable { config = mkIf cfg.enable {
# NVIDIA container toolkit for CDI spec generation # NVIDIA container toolkit for CDI spec generation
hardware.nvidia-container-toolkit.enable = true; hardware.nvidia-container-toolkit = {
enable = true;
device-name-strategy = "uuid";
mount-nvidia-executables = true;
};
# Ensure CDI generator has access to nvidia libs # Ensure CDI generator has access to nvidia libs
systemd.services.nvidia-container-toolkit-cdi-generator = { systemd.services.nvidia-container-toolkit-cdi-generator = {
@@ -124,12 +147,15 @@ in {
''; '';
}; };
extraFlags = [ extraFlags =
[
"--node-label=nixos-nvidia-cdi=enabled" "--node-label=nixos-nvidia-cdi=enabled"
"--node-label=nvidia.com/gpu.present=true"
"--tls-san=${config.networking.hostName}" "--tls-san=${config.networking.hostName}"
"--tls-san=${config.networking.hostName}.local" "--tls-san=${config.networking.hostName}.local"
"--tls-san=localhost" "--tls-san=localhost"
] ++ cfg.extraFlags; ]
++ cfg.extraFlags;
# Containerd config with CDI support # Containerd config with CDI support
# k3s 1.31+ with containerd 2.0 has CDI enabled by default # k3s 1.31+ with containerd 2.0 has CDI enabled by default
@@ -138,10 +164,13 @@ in {
{{ template "base" . }} {{ template "base" . }}
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2" runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/run/current-system/sw/bin/nvidia-container-runtime.cdi" BinaryName = "${lib.getOutput "tools" config.hardware.nvidia-container-toolkit.package}/bin/nvidia-container-runtime.cdi"
''; '';
gracefulNodeShutdown.enable = true; gracefulNodeShutdown.enable = true;
@@ -160,13 +189,13 @@ in {
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
''; '';
# Create systemd service to deploy the generic-cdi-plugin after k3s is ready # Create systemd service to deploy the NVIDIA device plugin after k3s is ready
systemd.services.k3s-gpu-plugin-deploy = { systemd.services.k3s-gpu-plugin-deploy = {
description = "Deploy generic-cdi-plugin to k3s"; description = "Deploy NVIDIA device plugin to k3s";
after = [ "k3s.service" ]; after = ["k3s.service"];
wants = [ "k3s.service" ]; wants = ["k3s.service"];
wantedBy = [ "multi-user.target" ]; wantedBy = ["multi-user.target"];
path = [ pkgs.kubectl pkgs.coreutils ]; path = [pkgs.kubectl pkgs.coreutils];
serviceConfig = { serviceConfig = {
Type = "oneshot"; Type = "oneshot";
RemainAfterExit = true; RemainAfterExit = true;
@@ -183,23 +212,22 @@ in {
sleep 5 sleep 5
done done
# Check if plugin already exists
if kubectl get daemonset -n kube-system generic-cdi-plugin &>/dev/null; then if kubectl get daemonset -n kube-system generic-cdi-plugin &>/dev/null; then
echo "generic-cdi-plugin already deployed, updating..." echo "Removing old generic-cdi-plugin deployment..."
kubectl apply -f ${generic-cdi-plugin-manifest} kubectl delete daemonset -n kube-system generic-cdi-plugin --ignore-not-found=true
else
echo "Deploying generic-cdi-plugin..."
kubectl apply -f ${generic-cdi-plugin-manifest}
fi fi
echo "Waiting for generic-cdi-plugin to be ready..." echo "Deploying NVIDIA device plugin..."
kubectl rollout status daemonset/generic-cdi-plugin -n kube-system --timeout=120s || true kubectl apply -f ${nvidia-device-plugin-manifest}
echo "Waiting for NVIDIA device plugin to be ready..."
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n kube-system --timeout=120s || true
''; '';
}; };
}; };
# Store test manifests in /etc for easy access # Store test manifests in /etc for easy access
environment.etc."k3s/gpu-test-pod.yaml".source = gpu-test-pod; environment.etc."k3s/gpu-test-pod.yaml".source = gpu-test-pod;
environment.etc."k3s/generic-cdi-plugin.yaml".source = generic-cdi-plugin-manifest; environment.etc."k3s/nvidia-device-plugin.yaml".source = nvidia-device-plugin-manifest;
}; };
} }