Initial commit

2026-04-24 19:18:15 +08:00
commit fbcbe08696
555 changed files with 96692 additions and 0 deletions
--- a/scripts/package_cuda.py
+++ b/scripts/package_cuda.py
@@ -0,0 +1,232 @@
+"""
+Package the PyInstaller --onedir CUDA build into two archives.
+
+Takes the PyInstaller --onedir output directory and splits it into:
+  1. voicebox-server-cuda.tar.gz  — server core (exe + non-NVIDIA deps)
+  2. cuda-libs-cu128.tar.gz       — NVIDIA runtime libraries only
+  3. cuda-libs.json                — version manifest for the CUDA libs
+
+Usage:
+    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/
+    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/
+    python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1
+"""
+
+import argparse
+import hashlib
+import json
+import sys
+import tarfile
+from pathlib import Path
+
+# DLL name prefixes that identify NVIDIA CUDA runtime libraries.
+# These DLLs may appear in different locations depending on the torch
+# and PyInstaller version:
+#   - nvidia/ subdirectories (older torch with separate nvidia-* packages)
+#   - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly)
+#   - Top-level directory (some PyInstaller versions)
+NVIDIA_DLL_PREFIXES = (
+    "cublas",
+    "cublaslt",
+    "cudart",
+    "cudnn",
+    "cufft",
+    "cufftw",
+    "curand",
+    "cusolver",
+    "cusolvermg",
+    "cusparse",
+    "nvjitlink",
+    "nvrtc",
+    "nccl",
+    "caffe2_nvrtc",
+)
+
+# Files to keep in the server core even if they match NVIDIA prefixes.
+# These are small Python modules or stubs, not the large runtime DLLs.
+NVIDIA_KEEP_IN_CORE = {
+    "torch/cuda/nccl.py",
+    "torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py",
+}
+
+
+def is_nvidia_file(rel_path: str) -> bool:
+    """Check if a relative path belongs to the NVIDIA CUDA libs.
+
+    Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where
+    PyInstaller placed them. Excludes small Python stubs that happen to
+    share NVIDIA-related names.
+    """
+    rel_lower = rel_path.lower().replace("\\", "/")
+
+    # Never split out Python source files or small stubs
+    if rel_lower in NVIDIA_KEEP_IN_CORE:
+        return False
+
+    # Files under nvidia/ subdirectory tree (older torch layout)
+    if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower:
+        # Only DLLs/shared objects — not .py, .dist-info, etc.
+        if rel_lower.endswith((".dll", ".so")):
+            return True
+        # Include entire nvidia/ namespace package tree
+        for part in rel_lower.split("/"):
+            if part == "nvidia":
+                return True
+
+    # NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll)
+    name = rel_lower.rsplit("/", 1)[-1]
+    if name.endswith(".dll") or name.endswith(".so"):
+        name_no_ext = name.rsplit(".", 1)[0]
+        for prefix in NVIDIA_DLL_PREFIXES:
+            if name_no_ext.startswith(prefix):
+                return True
+
+    return False
+
+
+def sha256_file(path: Path) -> str:
+    """Compute SHA-256 hex digest of a file."""
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        while True:
+            chunk = f.read(1024 * 1024)
+            if not chunk:
+                break
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def package(
+    onedir_path: Path,
+    output_dir: Path,
+    cuda_libs_version: str,
+    torch_compat: str,
+):
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Collect all files in the onedir output, split into core vs nvidia
+    core_files = []
+    nvidia_files = []
+
+    for item in sorted(onedir_path.rglob("*")):
+        if item.is_dir():
+            continue
+        rel = item.relative_to(onedir_path)
+        rel_str = str(rel)
+        if is_nvidia_file(rel_str):
+            nvidia_files.append((rel_str, item))
+        else:
+            core_files.append((rel_str, item))
+
+    core_size = sum(f.stat().st_size for _, f in core_files)
+    nvidia_size = sum(f.stat().st_size for _, f in nvidia_files)
+
+    print(f"Input directory: {onedir_path}")
+    print(f"Core files:  {len(core_files)} ({core_size / (1024**2):.1f} MB)")
+    print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)")
+
+    if not nvidia_files:
+        print(
+            f"ERROR: No NVIDIA files found in {onedir_path}. "
+            "Refusing to create an empty CUDA libs archive.",
+            file=sys.stderr,
+        )
+        print(
+            "Make sure you built with --cuda and the NVIDIA packages are present.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # Create server core archive
+    # Files are stored relative to the archive root (no parent directory prefix)
+    # so extracting to backends/cuda/ puts everything at the right level.
+    server_archive = output_dir / "voicebox-server-cuda.tar.gz"
+    print(f"\nCreating server core archive: {server_archive.name}")
+    with tarfile.open(server_archive, "w:gz") as tar:
+        for rel_str, full_path in core_files:
+            tar.add(full_path, arcname=rel_str)
+    server_sha = sha256_file(server_archive)
+    (output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text(
+        f"{server_sha}  voicebox-server-cuda.tar.gz\n"
+    )
+    print(f"  Size: {server_archive.stat().st_size / (1024**2):.1f} MB")
+    print(f"  SHA-256: {server_sha[:16]}...")
+
+    # Create CUDA libs archive
+    cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz"
+    print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}")
+    with tarfile.open(cuda_libs_archive, "w:gz") as tar:
+        for rel_str, full_path in nvidia_files:
+            tar.add(full_path, arcname=rel_str)
+    cuda_sha = sha256_file(cuda_libs_archive)
+    (output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text(
+        f"{cuda_sha}  cuda-libs-{cuda_libs_version}.tar.gz\n"
+    )
+    print(f"  Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB")
+    print(f"  SHA-256: {cuda_sha[:16]}...")
+
+    # Write cuda-libs.json manifest
+    manifest = {
+        "version": cuda_libs_version,
+        "torch_compat": torch_compat,
+        "archive": cuda_libs_archive.name,
+        "sha256": cuda_sha,
+    }
+    manifest_path = output_dir / "cuda-libs.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
+    print(f"\nManifest: {manifest_path.name}")
+    print(json.dumps(manifest, indent=2))
+
+    # Summary
+    total_input = core_size + nvidia_size
+    total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size
+    print(f"\nTotal input:  {total_input / (1024**3):.2f} GB")
+    print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)")
+    print(
+        f"Server core:  {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)"
+    )
+    print(
+        f"CUDA libs:    {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)"
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives"
+    )
+    parser.add_argument(
+        "input",
+        type=Path,
+        help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help="Output directory for archives (default: same as input parent)",
+    )
+    parser.add_argument(
+        "--cuda-libs-version",
+        type=str,
+        default="cu128-v1",
+        help="Version string for the CUDA libs archive (default: cu128-v1)",
+    )
+    parser.add_argument(
+        "--torch-compat",
+        type=str,
+        default=">=2.7.0,<2.11.0",
+        help="Torch version compatibility range (default: >=2.6.0,<2.11.0)",
+    )
+    args = parser.parse_args()
+
+    if not args.input.is_dir():
+        print(f"Error: {args.input} is not a directory", file=sys.stderr)
+        print("Expected a PyInstaller --onedir output directory.", file=sys.stderr)
+        sys.exit(1)
+
+    output_dir = args.output or args.input.parent
+    package(args.input, output_dir, args.cuda_libs_version, args.torch_compat)
+
+
+if __name__ == "__main__":
+    main()