Files
dotfiles/nixos/k3s.nix

166 lines
5.1 KiB
Nix
Raw Normal View History

2024-10-07 15:00:14 -06:00
{
pkgs,
config,
lib,
...
}:
with lib; let
cfg = config.myModules.railbird-k3s;
mount-path = "/var/lib/railbird/bucket";
bucket-name = "railbird-dev-videos";
2024-12-29 17:58:31 -07:00
plugins-path = pkgs.buildEnv {
name = "combined-cni-plugins";
paths = [
pkgs.cni-plugins
pkgs.calico-cni-plugin
pkgs.calico-kube-controllers
2024-12-29 19:53:13 -07:00
pkgs.cni-plugin-flannel
2024-12-29 17:58:31 -07:00
];
};
in {
options = {
2024-09-30 16:42:58 -06:00
myModules.railbird-k3s = {
enable = mkEnableOption "railbird k3s";
serverAddr = mkOption {
type = lib.types.str;
default = "";
};
extraFlags = mkOption {
type = lib.types.listOf lib.types.str;
default = [];
};
};
};
2024-09-30 16:42:58 -06:00
config = mkIf cfg.enable {
age.secrets."1896Folsom-k3s-token.age".file = ./secrets/1896Folsom-k3s-token.age;
age.secrets."k3s-registry.yaml.age".file = ./secrets/k3s-registry.yaml.age;
2024-10-07 15:00:14 -06:00
age.secrets.api-service-key = {
file = ./secrets/api_service_account_key.json.age;
owner = "railbird";
group = "users";
};
environment.etc."rancher/k3s/registries.yaml".source = config.age.secrets."k3s-registry.yaml.age".path;
services.dockerRegistry = {
enable = true;
listenAddress = "0.0.0.0";
port = 5279;
enableDelete = true;
enableGarbageCollect = true;
};
2024-12-30 15:20:54 -07:00
virtualisation.containerd = {
enable = true;
settings = {
plugins."io.containerd.cri.v1.runtime" = {
enable_cdi = true;
cdi_spec_dirs = [ "/var/run/cdi" ];
};
plugins."io.containerd.grpc.v1.cri" = {
enable_cdi = true;
cdi_spec_dirs = [ "/var/run/cdi" ];
cni.bin_dir = "${plugins-path}/bin";
};
};
};
2024-11-11 18:58:48 -07:00
hardware.nvidia-container-toolkit.enable = true;
2024-12-29 17:20:00 -07:00
virtualisation.containers = {
containersConf.cniPlugins = [
pkgs.cni-plugins
2024-12-29 17:20:00 -07:00
pkgs.calico-cni-plugin
pkgs.calico-kube-controllers
2024-12-29 19:53:13 -07:00
pkgs.cni-plugin-flannel
2024-12-29 17:17:03 -07:00
];
};
2024-12-30 18:33:23 -07:00
systemd.services = {
nvidia-container-toolkit-cdi-generator = {
# Even with `--library-search-path`, `nvidia-ctk` won't find the libs
# unless I bodge their path into the environment.
environment.LD_LIBRARY_PATH = "${config.hardware.nvidia.package}/lib";
};
};
2024-10-07 15:00:14 -06:00
systemd.services.mount-railbird-bucket = {
after = ["agenix.service"];
wantedBy = [ "multi-user.target" ];
2024-10-07 15:00:14 -06:00
description = "Mount railbird bucket";
serviceConfig = {
Type = "simple";
RemainAfterExit = true;
Restart = "on-failure"; # Restart the service on failure
RestartSec = 5; # Wait 5 seconds before restarti
TimeoutStopSec = 2;
2024-10-07 15:00:14 -06:00
ExecStartPre = [
2024-10-07 15:16:16 -06:00
"-${pkgs.util-linux}/bin/umount -f ${mount-path}"
"${pkgs.coreutils}/bin/mkdir -p ${mount-path}"
"${pkgs.coreutils}/bin/chown railbird:users ${mount-path}"
"${pkgs.coreutils}/bin/chmod 0775 ${mount-path}"
2024-10-07 15:00:14 -06:00
];
ExecStart = let
key-file = config.age.secrets.api-service-key.path;
in
pkgs.writeShellScript "mount-railbird-bucket" ''
while true; do
if ${pkgs.util-linux}/bin/mount | grep -q "${mount-path}" && [ -d "${mount-path}/dev" ]; then
echo "Mount path ${mount-path} is mounted and valid (contains directory 'dev')."
else
echo "Mount path is not valid or not mounted, attempting remount."
${pkgs.util-linux}/bin/umount -f "${mount-path}" || true
${pkgs.gcsfuse}/bin/gcsfuse --implicit-dirs --key-file "${key-file}" "${bucket-name}" "${mount-path}"
fi
echo "Sleeping"
sleep 30
done
'';
2024-10-07 15:16:16 -06:00
User = "root";
2024-10-07 15:00:14 -06:00
};
};
services.k3s = {
enable = true;
clusterInit = cfg.serverAddr == "";
serverAddr = cfg.serverAddr;
configPath = pkgs.writeTextFile {
name = "k3s-config.yaml";
text = ''
kubelet-arg:
- "eviction-hard=nodefs.available<2Gi"
- "eviction-soft=nodefs.available<5Gi"
- "eviction-soft-grace-period=nodefs.available=5m"
'';
};
tokenFile = config.age.secrets."1896Folsom-k3s-token.age".path;
extraFlags =
[
"--tls-san ryzen-shine.local"
"--tls-san nixquick.local"
"--tls-san biskcomp.local"
"--tls-san jimi-hendnix.local"
"--tls-san dev.railbird.ai"
"--node-label nixos-nvidia-cdi=enabled"
2025-07-02 11:33:54 -06:00
"--etcd-arg=quota-backend-bytes=8589934592"
]
++ cfg.extraFlags;
containerdConfigTemplate = ''
{{ template "base" . }}
2024-09-30 00:05:50 -06:00
2025-06-13 13:17:24 -06:00
plugins."io.containerd.grpc.v1.cri".cdi_spec_dirs = [ "/var/run/cdi" ]
plugins."io.containerd.grpc.v1.cri".enable_cdi = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/run/current-system/sw/bin/nvidia-container-runtime.cdi"
[debug]
level = "trace"
'';
gracefulNodeShutdown = {
enable = true;
};
2024-09-30 00:05:50 -06:00
};
};
}