233 lines
7.9 KiB
Python
233 lines
7.9 KiB
Python
"""
|
|
Package the PyInstaller --onedir CUDA build into two archives.
|
|
|
|
Takes the PyInstaller --onedir output directory and splits it into:
|
|
1. voicebox-server-cuda.tar.gz — server core (exe + non-NVIDIA deps)
|
|
2. cuda-libs-cu128.tar.gz — NVIDIA runtime libraries only
|
|
3. cuda-libs.json — version manifest for the CUDA libs
|
|
|
|
Usage:
|
|
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/
|
|
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/
|
|
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import sys
|
|
import tarfile
|
|
from pathlib import Path
|
|
|
|
# DLL name prefixes that identify NVIDIA CUDA runtime libraries.
|
|
# These DLLs may appear in different locations depending on the torch
|
|
# and PyInstaller version:
|
|
# - nvidia/ subdirectories (older torch with separate nvidia-* packages)
|
|
# - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly)
|
|
# - Top-level directory (some PyInstaller versions)
|
|
NVIDIA_DLL_PREFIXES = (
|
|
"cublas",
|
|
"cublaslt",
|
|
"cudart",
|
|
"cudnn",
|
|
"cufft",
|
|
"cufftw",
|
|
"curand",
|
|
"cusolver",
|
|
"cusolvermg",
|
|
"cusparse",
|
|
"nvjitlink",
|
|
"nvrtc",
|
|
"nccl",
|
|
"caffe2_nvrtc",
|
|
)
|
|
|
|
# Files to keep in the server core even if they match NVIDIA prefixes.
|
|
# These are small Python modules or stubs, not the large runtime DLLs.
|
|
NVIDIA_KEEP_IN_CORE = {
|
|
"torch/cuda/nccl.py",
|
|
"torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py",
|
|
}
|
|
|
|
|
|
def is_nvidia_file(rel_path: str) -> bool:
|
|
"""Check if a relative path belongs to the NVIDIA CUDA libs.
|
|
|
|
Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where
|
|
PyInstaller placed them. Excludes small Python stubs that happen to
|
|
share NVIDIA-related names.
|
|
"""
|
|
rel_lower = rel_path.lower().replace("\\", "/")
|
|
|
|
# Never split out Python source files or small stubs
|
|
if rel_lower in NVIDIA_KEEP_IN_CORE:
|
|
return False
|
|
|
|
# Files under nvidia/ subdirectory tree (older torch layout)
|
|
if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower:
|
|
# Only DLLs/shared objects — not .py, .dist-info, etc.
|
|
if rel_lower.endswith((".dll", ".so")):
|
|
return True
|
|
# Include entire nvidia/ namespace package tree
|
|
for part in rel_lower.split("/"):
|
|
if part == "nvidia":
|
|
return True
|
|
|
|
# NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll)
|
|
name = rel_lower.rsplit("/", 1)[-1]
|
|
if name.endswith(".dll") or name.endswith(".so"):
|
|
name_no_ext = name.rsplit(".", 1)[0]
|
|
for prefix in NVIDIA_DLL_PREFIXES:
|
|
if name_no_ext.startswith(prefix):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
"""Compute SHA-256 hex digest of a file."""
|
|
h = hashlib.sha256()
|
|
with open(path, "rb") as f:
|
|
while True:
|
|
chunk = f.read(1024 * 1024)
|
|
if not chunk:
|
|
break
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def package(
|
|
onedir_path: Path,
|
|
output_dir: Path,
|
|
cuda_libs_version: str,
|
|
torch_compat: str,
|
|
):
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Collect all files in the onedir output, split into core vs nvidia
|
|
core_files = []
|
|
nvidia_files = []
|
|
|
|
for item in sorted(onedir_path.rglob("*")):
|
|
if item.is_dir():
|
|
continue
|
|
rel = item.relative_to(onedir_path)
|
|
rel_str = str(rel)
|
|
if is_nvidia_file(rel_str):
|
|
nvidia_files.append((rel_str, item))
|
|
else:
|
|
core_files.append((rel_str, item))
|
|
|
|
core_size = sum(f.stat().st_size for _, f in core_files)
|
|
nvidia_size = sum(f.stat().st_size for _, f in nvidia_files)
|
|
|
|
print(f"Input directory: {onedir_path}")
|
|
print(f"Core files: {len(core_files)} ({core_size / (1024**2):.1f} MB)")
|
|
print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)")
|
|
|
|
if not nvidia_files:
|
|
print(
|
|
f"ERROR: No NVIDIA files found in {onedir_path}. "
|
|
"Refusing to create an empty CUDA libs archive.",
|
|
file=sys.stderr,
|
|
)
|
|
print(
|
|
"Make sure you built with --cuda and the NVIDIA packages are present.",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
# Create server core archive
|
|
# Files are stored relative to the archive root (no parent directory prefix)
|
|
# so extracting to backends/cuda/ puts everything at the right level.
|
|
server_archive = output_dir / "voicebox-server-cuda.tar.gz"
|
|
print(f"\nCreating server core archive: {server_archive.name}")
|
|
with tarfile.open(server_archive, "w:gz") as tar:
|
|
for rel_str, full_path in core_files:
|
|
tar.add(full_path, arcname=rel_str)
|
|
server_sha = sha256_file(server_archive)
|
|
(output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text(
|
|
f"{server_sha} voicebox-server-cuda.tar.gz\n"
|
|
)
|
|
print(f" Size: {server_archive.stat().st_size / (1024**2):.1f} MB")
|
|
print(f" SHA-256: {server_sha[:16]}...")
|
|
|
|
# Create CUDA libs archive
|
|
cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz"
|
|
print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}")
|
|
with tarfile.open(cuda_libs_archive, "w:gz") as tar:
|
|
for rel_str, full_path in nvidia_files:
|
|
tar.add(full_path, arcname=rel_str)
|
|
cuda_sha = sha256_file(cuda_libs_archive)
|
|
(output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text(
|
|
f"{cuda_sha} cuda-libs-{cuda_libs_version}.tar.gz\n"
|
|
)
|
|
print(f" Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB")
|
|
print(f" SHA-256: {cuda_sha[:16]}...")
|
|
|
|
# Write cuda-libs.json manifest
|
|
manifest = {
|
|
"version": cuda_libs_version,
|
|
"torch_compat": torch_compat,
|
|
"archive": cuda_libs_archive.name,
|
|
"sha256": cuda_sha,
|
|
}
|
|
manifest_path = output_dir / "cuda-libs.json"
|
|
manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
|
|
print(f"\nManifest: {manifest_path.name}")
|
|
print(json.dumps(manifest, indent=2))
|
|
|
|
# Summary
|
|
total_input = core_size + nvidia_size
|
|
total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size
|
|
print(f"\nTotal input: {total_input / (1024**3):.2f} GB")
|
|
print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)")
|
|
print(
|
|
f"Server core: {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)"
|
|
)
|
|
print(
|
|
f"CUDA libs: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)"
|
|
)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives"
|
|
)
|
|
parser.add_argument(
|
|
"input",
|
|
type=Path,
|
|
help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)",
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
type=Path,
|
|
default=None,
|
|
help="Output directory for archives (default: same as input parent)",
|
|
)
|
|
parser.add_argument(
|
|
"--cuda-libs-version",
|
|
type=str,
|
|
default="cu128-v1",
|
|
help="Version string for the CUDA libs archive (default: cu128-v1)",
|
|
)
|
|
parser.add_argument(
|
|
"--torch-compat",
|
|
type=str,
|
|
default=">=2.7.0,<2.11.0",
|
|
help="Torch version compatibility range (default: >=2.6.0,<2.11.0)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if not args.input.is_dir():
|
|
print(f"Error: {args.input} is not a directory", file=sys.stderr)
|
|
print("Expected a PyInstaller --onedir output directory.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
output_dir = args.output or args.input.parent
|
|
package(args.input, output_dir, args.cuda_libs_version, args.torch_compat)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|