voicebox/scripts/package_cuda.py

"""
Package the PyInstaller --onedir CUDA build into two archives.

Takes the PyInstaller --onedir output directory and splits it into:
  1. voicebox-server-cuda.tar.gz  — server core (exe + non-NVIDIA deps)
  2. cuda-libs-cu128.tar.gz       — NVIDIA runtime libraries only
  3. cuda-libs.json                — version manifest for the CUDA libs

Usage:
    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/
    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/
    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1
"""

import argparse
import hashlib
import json
import sys
import tarfile
from pathlib import Path

# DLL name prefixes that identify NVIDIA CUDA runtime libraries.
# These DLLs may appear in different locations depending on the torch
# and PyInstaller version:
#   - nvidia/ subdirectories (older torch with separate nvidia-* packages)
#   - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly)
#   - Top-level directory (some PyInstaller versions)
NVIDIA_DLL_PREFIXES = (
    "cublas",
    "cublaslt",
    "cudart",
    "cudnn",
    "cufft",
    "cufftw",
    "curand",
    "cusolver",
    "cusolvermg",
    "cusparse",
    "nvjitlink",
    "nvrtc",
    "nccl",
    "caffe2_nvrtc",
)

# Files to keep in the server core even if they match NVIDIA prefixes.
# These are small Python modules or stubs, not the large runtime DLLs.
NVIDIA_KEEP_IN_CORE = {
    "torch/cuda/nccl.py",
    "torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py",
}


def is_nvidia_file(rel_path: str) -> bool:
    """Check if a relative path belongs to the NVIDIA CUDA libs.

    Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where
    PyInstaller placed them. Excludes small Python stubs that happen to
    share NVIDIA-related names.
    """
    rel_lower = rel_path.lower().replace("\\", "/")

    # Never split out Python source files or small stubs
    if rel_lower in NVIDIA_KEEP_IN_CORE:
        return False

    # Files under nvidia/ subdirectory tree (older torch layout)
    if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower:
        # Only DLLs/shared objects — not .py, .dist-info, etc.
        if rel_lower.endswith((".dll", ".so")):
            return True
        # Include entire nvidia/ namespace package tree
        for part in rel_lower.split("/"):
            if part == "nvidia":
                return True

    # NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll)
    name = rel_lower.rsplit("/", 1)[-1]
    if name.endswith(".dll") or name.endswith(".so"):
        name_no_ext = name.rsplit(".", 1)[0]
        for prefix in NVIDIA_DLL_PREFIXES:
            if name_no_ext.startswith(prefix):
                return True

    return False


def sha256_file(path: Path) -> str:
    """Compute SHA-256 hex digest of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        while True:
            chunk = f.read(1024 * 1024)
            if not chunk:
                break
            h.update(chunk)
    return h.hexdigest()


def package(
    onedir_path: Path,
    output_dir: Path,
    cuda_libs_version: str,
    torch_compat: str,
):
    output_dir.mkdir(parents=True, exist_ok=True)

    # Collect all files in the onedir output, split into core vs nvidia
    core_files = []
    nvidia_files = []

    for item in sorted(onedir_path.rglob("*")):
        if item.is_dir():
            continue
        rel = item.relative_to(onedir_path)
        rel_str = str(rel)
        if is_nvidia_file(rel_str):
            nvidia_files.append((rel_str, item))
        else:
            core_files.append((rel_str, item))

    core_size = sum(f.stat().st_size for _, f in core_files)
    nvidia_size = sum(f.stat().st_size for _, f in nvidia_files)

    print(f"Input directory: {onedir_path}")
    print(f"Core files:  {len(core_files)} ({core_size / (1024**2):.1f} MB)")
    print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)")

    if not nvidia_files:
        print(
            f"ERROR: No NVIDIA files found in {onedir_path}. "
            "Refusing to create an empty CUDA libs archive.",
            file=sys.stderr,
        )
        print(
            "Make sure you built with --cuda and the NVIDIA packages are present.",
            file=sys.stderr,
        )
        sys.exit(1)

    # Create server core archive
    # Files are stored relative to the archive root (no parent directory prefix)
    # so extracting to backends/cuda/ puts everything at the right level.
    server_archive = output_dir / "voicebox-server-cuda.tar.gz"
    print(f"\nCreating server core archive: {server_archive.name}")
    with tarfile.open(server_archive, "w:gz") as tar:
        for rel_str, full_path in core_files:
            tar.add(full_path, arcname=rel_str)
    server_sha = sha256_file(server_archive)
    (output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text(
        f"{server_sha}  voicebox-server-cuda.tar.gz\n"
    )
    print(f"  Size: {server_archive.stat().st_size / (1024**2):.1f} MB")
    print(f"  SHA-256: {server_sha[:16]}...")

    # Create CUDA libs archive
    cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz"
    print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}")
    with tarfile.open(cuda_libs_archive, "w:gz") as tar:
        for rel_str, full_path in nvidia_files:
            tar.add(full_path, arcname=rel_str)
    cuda_sha = sha256_file(cuda_libs_archive)
    (output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text(
        f"{cuda_sha}  cuda-libs-{cuda_libs_version}.tar.gz\n"
    )
    print(f"  Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB")
    print(f"  SHA-256: {cuda_sha[:16]}...")

    # Write cuda-libs.json manifest
    manifest = {
        "version": cuda_libs_version,
        "torch_compat": torch_compat,
        "archive": cuda_libs_archive.name,
        "sha256": cuda_sha,
    }
    manifest_path = output_dir / "cuda-libs.json"
    manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
    print(f"\nManifest: {manifest_path.name}")
    print(json.dumps(manifest, indent=2))

    # Summary
    total_input = core_size + nvidia_size
    total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size
    print(f"\nTotal input:  {total_input / (1024**3):.2f} GB")
    print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)")
    print(
        f"Server core:  {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)"
    )
    print(
        f"CUDA libs:    {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)"
    )


def main():
    parser = argparse.ArgumentParser(
        description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives"
    )
    parser.add_argument(
        "input",
        type=Path,
        help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)",
    )
    parser.add_argument(
        "--output",
        type=Path,
        default=None,
        help="Output directory for archives (default: same as input parent)",
    )
    parser.add_argument(
        "--cuda-libs-version",
        type=str,
        default="cu128-v1",
        help="Version string for the CUDA libs archive (default: cu128-v1)",
    )
    parser.add_argument(
        "--torch-compat",
        type=str,
        default=">=2.7.0,<2.11.0",
        help="Torch version compatibility range (default: >=2.6.0,<2.11.0)",
    )
    args = parser.parse_args()

    if not args.input.is_dir():
        print(f"Error: {args.input} is not a directory", file=sys.stderr)
        print("Expected a PyInstaller --onedir output directory.", file=sys.stderr)
        sys.exit(1)

    output_dir = args.output or args.input.parent
    package(args.input, output_dir, args.cuda_libs_version, args.torch_compat)


if __name__ == "__main__":
    main()