Debug nvidia-container-toolkit commands

This commit is contained in:
2024-12-30 18:11:01 -07:00
parent 626d719e16
commit 8fd220c919
2 changed files with 129 additions and 29 deletions

View File

@@ -72,14 +72,6 @@ in {
]; ];
}; };
systemd.services = {
nvidia-container-toolkit-cdi-generator = {
# Even with `--library-search-path`, `nvidia-ctk` won't find the libs
# unless I bodge their path into the environment.
environment.LD_LIBRARY_PATH = "${config.hardware.nvidia.package}/lib";
};
};
systemd.services.mount-railbird-bucket = { systemd.services.mount-railbird-bucket = {
after = ["agenix.service"]; after = ["agenix.service"];
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];

View File

@@ -1,4 +1,112 @@
final: prev: { final: prev:
{
nvidia-container-toolkit = final.stdenv.mkDerivation {
pname = "nvidia-container-toolkit-debug";
version = prev.nvidia-container-toolkit.version;
# This is key: multiple outputs, so you can reference them later as:
# $out -> for main
# $tools -> for the 'tools' output in your new derivation
outputs = [ "out" "tools" ];
# No source required for a pure wrap
src = null;
dontUnpack = true;
dontPatchShell = true;
buildInputs = [ ];
# Create wrapper scripts for each set of binaries
buildPhase = ''
# --- Wrap binaries from the main output of the original toolkit ---
mkdir -p wrapper-out
if [ -d "${prev.nvidia-container-toolkit}/bin" ]; then
for exe in $(ls ${prev.nvidia-container-toolkit}/bin); do
cat > wrapper-out/$exe <<EOF
#!${final.bash}/bin/bash
if [ "\$(id -u)" -eq 0 ]; then
mkdir -p /var/log/nvidia-container-toolkit
chown root:users /var/log/nvidia-container-toolkit
chmod 2777 /var/log/nvidia-container-toolkit
fi
# --- STARTUP LOG ---
echo "\$(date '+%Y-%m-%d %H:%M:%S') - NVIDIA Container Toolkit (main) started" \
>> /var/log/nvidia-container-toolkit/$exe.startup.log 2>/dev/null || true
# --- COMMAND INVOCATION LOG ---
echo "\$(date '+%Y-%m-%d %H:%M:%S') - Executing $exe with args: \$@" \
>> /var/log/nvidia-container-toolkit/$exe.log 2>/dev/null || true
# --- Run the real tool, piping stdout+stderr to tee ---
${prev.nvidia-container-toolkit}/bin/$exe "\$@" > \
>(tee -a /var/log/nvidia-container-toolkit/$exe.stdout.log) \
2> >(tee -a /var/log/nvidia-container-toolkit/$exe.stderr.log >&2)
exit_code=\$?
# --- FINISHED LOG ---
echo "\$(date '+%Y-%m-%d %H:%M:%S') - Finished $exe with exit code: \$exit_code" >> \
/var/log/nvidia-container-toolkit/$exe.log 2>/dev/null || true
EOF
chmod +x wrapper-out/$exe
done
fi
# --- Wrap binaries from the 'tools' output of the original toolkit ---
mkdir -p wrapper-tools
if [ -d "${prev.nvidia-container-toolkit.tools}/bin" ]; then
for exe in $(ls ${prev.nvidia-container-toolkit.tools}/bin); do
cat > wrapper-tools/$exe <<EOF
#!${final.bash}/bin/bash
# --- STARTUP LOG ---
echo "\$(date '+%Y-%m-%d %H:%M:%S') - NVIDIA Container Toolkit (tools) started" \
>> /var/log/nvidia-container-toolkit/startup.log 2>/dev/null || true
# --- COMMAND INVOCATION LOG ---
echo "\$(date '+%Y-%m-%d %H:%M:%S') - Executing $exe with args: \$@" \
>> /var/log/nvidia-container-toolkit/tools.log 2>/dev/null || true
# --- Run the real tool, piping stdout+stderr to tee ---
exec ${prev.nvidia-container-toolkit.tools}/bin/$exe "\$@" 2>&1 | \
tee -a /var/log/nvidia-container-toolkit/tools.log
EOF
chmod +x wrapper-tools/$exe
done
fi
'';
installPhase = ''
# For the main output of our wrapper derivation
mkdir -p $out/bin
if [ -d wrapper-out ]; then
cp wrapper-out/* $out/bin/
fi
# For the 'tools' output of our wrapper derivation
mkdir -p $tools/bin
if [ -d wrapper-tools ]; then
cp wrapper-tools/* $tools/bin/
fi
'';
# If you need runtime dependencies, inherit them from the original package
propagatedBuildInputs = prev.nvidia-container-toolkit.propagatedBuildInputs or [];
propagatedUserEnvPkgs = prev.nvidia-container-toolkit.propagatedUserEnvPkgs or [];
meta = {
description = "Debug-wrapped NVIDIA Container Toolkit with separate tools output.";
homepage = prev.nvidia-container-toolkit.meta.homepage;
license = prev.nvidia-container-toolkit.meta.license;
maintainers = prev.nvidia-container-toolkit.meta.maintainers;
platforms = prev.nvidia-container-toolkit.meta.platforms;
};
};
rofi-systemd = prev.rofi-systemd.overrideAttrs (_: { rofi-systemd = prev.rofi-systemd.overrideAttrs (_: {
src = prev.fetchFromGitHub { src = prev.fetchFromGitHub {
repo = "rofi-systemd"; repo = "rofi-systemd";
@@ -55,27 +163,27 @@ final: prev: {
python-with-my-packages = let python-with-my-packages = let
my-python-packages = python-packages: my-python-packages = python-packages:
with python-packages; [ with python-packages; [
argcomplete argcomplete
appdirs appdirs
ipdb ipdb
ipython ipython
numpy numpy
openpyxl openpyxl
pip pip
requests requests
tox tox
]; ];
in in
final.python311.withPackages my-python-packages; final.python311.withPackages my-python-packages;
# gitea = prev.gitea.overrideAttrs(_: { # gitea = prev.gitea.overrideAttrs(_: {
# src = prev.fetchFromGitHub { # src = prev.fetchFromGitHub {
# repo = "gitea"; # repo = "gitea";
# owner = "colonelpanic8"; # owner = "colonelpanic8";
# rev = "40e15b12bf104f8018f56e5b826d8a2f8e2587ea"; # rev = "40e15b12bf104f8018f56e5b826d8a2f8e2587ea";
# sha256 = "sha256-VXP8Ga681rcKn548rOZq9I19abY0GzXRpdiYGpwyMJ4="; # sha256 = "sha256-VXP8Ga681rcKn548rOZq9I19abY0GzXRpdiYGpwyMJ4=";
# }; # };
# go = final.buildPackages.go_1_21; # go = final.buildPackages.go_1_21;
# }); # });
} }