459 lines
16 KiB
Python
459 lines
16 KiB
Python
"""
|
|
PyInstaller build script for creating standalone Python server binary.
|
|
|
|
Usage:
|
|
python build_binary.py # Build default (CPU) server binary
|
|
python build_binary.py --cuda # Build CUDA-enabled server binary
|
|
"""
|
|
|
|
import PyInstaller.__main__
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import platform
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def is_apple_silicon():
|
|
"""Check if running on Apple Silicon."""
|
|
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
|
|
|
|
|
def build_server(cuda=False):
|
|
"""Build Python server as standalone binary.
|
|
|
|
Args:
|
|
cuda: If True, build with CUDA support and name the binary
|
|
voicebox-server-cuda instead of voicebox-server.
|
|
"""
|
|
backend_dir = Path(__file__).parent
|
|
|
|
binary_name = "voicebox-server-cuda" if cuda else "voicebox-server"
|
|
|
|
# PyInstaller arguments
|
|
# CUDA builds use --onedir so we can split the output into two archives:
|
|
# 1. Server core (~200-400MB) — versioned with the app
|
|
# 2. CUDA libs (~2GB) — versioned independently (only redownloaded on
|
|
# CUDA toolkit / torch major version changes)
|
|
# CPU builds remain --onefile for simplicity.
|
|
pack_mode = "--onedir" if cuda else "--onefile"
|
|
args = [
|
|
"server.py", # Use server.py as entry point instead of main.py
|
|
pack_mode,
|
|
"--name",
|
|
binary_name,
|
|
]
|
|
|
|
# Hide console window on Windows only. On macOS/Linux the sidecar needs
|
|
# stdout/stderr for Tauri to capture logs.
|
|
if platform.system() == "Windows":
|
|
args.append("--noconsole")
|
|
|
|
# numpy 2.x / torch ABI mismatch fix: install memmove fallback for
|
|
# torch.from_numpy() before the app starts. Runtime hooks run after
|
|
# FrozenImporter is registered so frozen torch/numpy are importable.
|
|
# Paths are passed relative to backend_dir because os.chdir(backend_dir)
|
|
# runs before PyInstaller. Absolute paths would get baked into the
|
|
# generated .spec, breaking reproducible builds on other machines / CI.
|
|
args.extend(
|
|
[
|
|
"--runtime-hook",
|
|
"pyi_rth_numpy_compat.py",
|
|
# Stub torch.compiler.disable before transformers imports
|
|
# flex_attention, which otherwise triggers torch._dynamo →
|
|
# torch._numpy._ufuncs and crashes at module load under
|
|
# PyInstaller. See pyi_rth_torch_compiler_disable.py.
|
|
"--runtime-hook",
|
|
"pyi_rth_torch_compiler_disable.py",
|
|
# Per-module collection overrides (e.g. forcing scipy.stats._distn_infrastructure
|
|
# to bundle .py source alongside .pyc so the runtime hook can source-patch it).
|
|
"--additional-hooks-dir",
|
|
"pyi_hooks",
|
|
]
|
|
)
|
|
|
|
# Add local qwen_tts path if specified (for editable installs)
|
|
qwen_tts_path = os.getenv("QWEN_TTS_PATH")
|
|
if qwen_tts_path and Path(qwen_tts_path).exists():
|
|
args.extend(["--paths", str(qwen_tts_path)])
|
|
logger.info("Using local qwen_tts source from: %s", qwen_tts_path)
|
|
|
|
# Add common hidden imports
|
|
args.extend(
|
|
[
|
|
"--hidden-import",
|
|
"backend",
|
|
"--hidden-import",
|
|
"backend.main",
|
|
"--hidden-import",
|
|
"backend.config",
|
|
"--hidden-import",
|
|
"backend.database",
|
|
"--hidden-import",
|
|
"backend.models",
|
|
"--hidden-import",
|
|
"backend.services.profiles",
|
|
"--hidden-import",
|
|
"backend.services.history",
|
|
"--hidden-import",
|
|
"backend.services.tts",
|
|
"--hidden-import",
|
|
"backend.services.transcribe",
|
|
"--hidden-import",
|
|
"backend.utils.platform_detect",
|
|
"--hidden-import",
|
|
"backend.backends",
|
|
"--hidden-import",
|
|
"backend.backends.pytorch_backend",
|
|
"--hidden-import",
|
|
"backend.backends.qwen_custom_voice_backend",
|
|
"--hidden-import",
|
|
"backend.utils.audio",
|
|
"--hidden-import",
|
|
"backend.utils.cache",
|
|
"--hidden-import",
|
|
"backend.utils.progress",
|
|
"--hidden-import",
|
|
"backend.utils.hf_progress",
|
|
"--hidden-import",
|
|
"backend.services.cuda",
|
|
"--hidden-import",
|
|
"backend.services.effects",
|
|
"--hidden-import",
|
|
"backend.utils.effects",
|
|
"--hidden-import",
|
|
"backend.services.versions",
|
|
"--hidden-import",
|
|
"pedalboard",
|
|
"--hidden-import",
|
|
"chatterbox",
|
|
"--hidden-import",
|
|
"chatterbox.tts_turbo",
|
|
"--hidden-import",
|
|
"chatterbox.mtl_tts",
|
|
"--hidden-import",
|
|
"backend.backends.chatterbox_backend",
|
|
"--hidden-import",
|
|
"backend.backends.chatterbox_turbo_backend",
|
|
# chatterbox multilingual uses spacy_pkuseg for Chinese word
|
|
# segmentation, which ships pickled dict files (dicts/default.pkl)
|
|
# and native .so extensions that --hidden-import alone won't bundle.
|
|
"--collect-all",
|
|
"spacy_pkuseg",
|
|
"--hidden-import",
|
|
"backend.backends.luxtts_backend",
|
|
"--hidden-import",
|
|
"zipvoice",
|
|
"--hidden-import",
|
|
"zipvoice.luxvoice",
|
|
"--collect-all",
|
|
"zipvoice",
|
|
"--collect-all",
|
|
"linacodec",
|
|
"--hidden-import",
|
|
"torch",
|
|
"--hidden-import",
|
|
"transformers",
|
|
"--hidden-import",
|
|
"fastapi",
|
|
"--hidden-import",
|
|
"uvicorn",
|
|
"--hidden-import",
|
|
"sqlalchemy",
|
|
# librosa uses lazy_loader which generates .pyi stub files at
|
|
# install time and reads them at runtime to discover submodules.
|
|
# --hidden-import alone doesn't bundle the stubs, causing
|
|
# "Cannot load imports from non-existent stub" at runtime.
|
|
"--collect-all",
|
|
"lazy_loader",
|
|
"--collect-all",
|
|
"librosa",
|
|
"--hidden-import",
|
|
"soundfile",
|
|
"--hidden-import",
|
|
"qwen_tts",
|
|
"--hidden-import",
|
|
"qwen_tts.inference",
|
|
"--hidden-import",
|
|
"qwen_tts.inference.qwen3_tts_model",
|
|
"--hidden-import",
|
|
"qwen_tts.inference.qwen3_tts_tokenizer",
|
|
"--hidden-import",
|
|
"qwen_tts.core",
|
|
"--hidden-import",
|
|
"qwen_tts.cli",
|
|
"--copy-metadata",
|
|
"qwen-tts",
|
|
"--copy-metadata",
|
|
"requests",
|
|
"--copy-metadata",
|
|
"transformers",
|
|
"--copy-metadata",
|
|
"huggingface-hub",
|
|
"--copy-metadata",
|
|
"tokenizers",
|
|
"--copy-metadata",
|
|
"safetensors",
|
|
"--copy-metadata",
|
|
"tqdm",
|
|
"--hidden-import",
|
|
"requests",
|
|
# qwen_tts uses inspect.getsource() at runtime to locate
|
|
# modeling_qwen3_tts.py — needs physical .py source files bundled
|
|
"--collect-all",
|
|
"qwen_tts",
|
|
# Fix for pkg_resources and jaraco namespace packages
|
|
"--hidden-import",
|
|
"pkg_resources.extern",
|
|
"--collect-submodules",
|
|
"jaraco",
|
|
# inflect uses typeguard @typechecked which calls inspect.getsource()
|
|
# at import time — needs .py source files, not just .pyc bytecode
|
|
"--collect-all",
|
|
"inflect",
|
|
# perth ships pretrained watermark model files (hparams.yaml, .pth.tar)
|
|
# in perth/perth_net/pretrained/ — needed by chatterbox at runtime
|
|
"--collect-all",
|
|
"perth",
|
|
# piper_phonemize ships espeak-ng-data/ (phoneme tables, language dicts)
|
|
# needed by LuxTTS for text-to-phoneme conversion
|
|
"--collect-all",
|
|
"piper_phonemize",
|
|
# HumeAI TADA — speech-language model using Llama + flow matching
|
|
"--hidden-import",
|
|
"backend.backends.hume_backend",
|
|
"--hidden-import",
|
|
"tada",
|
|
"--hidden-import",
|
|
"tada.modules",
|
|
"--hidden-import",
|
|
"tada.modules.tada",
|
|
"--hidden-import",
|
|
"tada.modules.encoder",
|
|
"--hidden-import",
|
|
"tada.modules.decoder",
|
|
"--hidden-import",
|
|
"tada.modules.aligner",
|
|
"--hidden-import",
|
|
"tada.modules.acoustic_spkr_verf",
|
|
"--hidden-import",
|
|
"tada.nn",
|
|
"--hidden-import",
|
|
"tada.nn.vibevoice",
|
|
"--hidden-import",
|
|
"tada.utils",
|
|
"--hidden-import",
|
|
"tada.utils.gray_code",
|
|
"--hidden-import",
|
|
"tada.utils.text",
|
|
# DAC shim — provides dac.nn.layers.Snake1d without the real
|
|
# descript-audio-codec package (which pulls onnx/tensorboard via
|
|
# descript-audiotools). The shim is in backend/utils/dac_shim.py.
|
|
"--hidden-import",
|
|
"backend.utils.dac_shim",
|
|
"--hidden-import",
|
|
"torchaudio",
|
|
"--collect-submodules",
|
|
"tada",
|
|
# Kokoro 82M — lightweight TTS engine using misaki G2P
|
|
# collect-all is required because transformers introspects .py source
|
|
# files at runtime (e.g. _can_set_attn_implementation opens the class
|
|
# file); hidden-import alone only bundles bytecode.
|
|
"--hidden-import",
|
|
"backend.backends.kokoro_backend",
|
|
"--collect-all",
|
|
"kokoro",
|
|
# misaki ships G2P data files (dictionaries, phoneme tables)
|
|
# that must be bundled for espeak/en/ja/zh G2P to work
|
|
"--collect-all",
|
|
"misaki",
|
|
# language_tags ships JSON data files (index.json etc.) loaded at
|
|
# runtime via: misaki → phonemizer → segments → csvw → language_tags
|
|
"--collect-all",
|
|
"language_tags",
|
|
# espeakng_loader ships the entire espeak-ng-data directory (369 files)
|
|
# loaded at import time by misaki.espeak via get_data_path()
|
|
"--collect-all",
|
|
"espeakng_loader",
|
|
# spacy en_core_web_sm model — misaki.en tries to spacy.cli.download()
|
|
# at runtime if not found, which calls pip as a subprocess and crashes
|
|
# the frozen binary. Bundle the model so spacy.util.is_package() passes.
|
|
"--collect-all",
|
|
"en_core_web_sm",
|
|
"--copy-metadata",
|
|
"en_core_web_sm",
|
|
"--hidden-import",
|
|
"en_core_web_sm",
|
|
# unidic-lite ships the MeCab dictionary used by fugashi (pulled in
|
|
# by misaki[ja]). The dict lives in unidic_lite/dicdir/ and is
|
|
# discovered via the package's DICDIR constant, so the data files
|
|
# must be collected or Japanese Kokoro voices crash at runtime.
|
|
"--collect-all",
|
|
"unidic_lite",
|
|
"--hidden-import",
|
|
"loguru",
|
|
]
|
|
)
|
|
|
|
# Add CUDA-specific hidden imports
|
|
if cuda:
|
|
logger.info("Building with CUDA support")
|
|
args.extend(
|
|
[
|
|
"--hidden-import",
|
|
"torch.cuda",
|
|
"--hidden-import",
|
|
"torch.backends.cudnn",
|
|
]
|
|
)
|
|
else:
|
|
# Exclude NVIDIA CUDA packages from CPU-only builds to keep binary small.
|
|
# When building from a venv with CUDA torch installed, PyInstaller would
|
|
# bundle ~3GB of NVIDIA shared libraries. We exclude both the Python
|
|
# modules and the binary DLLs.
|
|
nvidia_packages = [
|
|
"nvidia",
|
|
"nvidia.cublas",
|
|
"nvidia.cuda_cupti",
|
|
"nvidia.cuda_nvrtc",
|
|
"nvidia.cuda_runtime",
|
|
"nvidia.cudnn",
|
|
"nvidia.cufft",
|
|
"nvidia.curand",
|
|
"nvidia.cusolver",
|
|
"nvidia.cusparse",
|
|
"nvidia.nccl",
|
|
"nvidia.nvjitlink",
|
|
"nvidia.nvtx",
|
|
]
|
|
for pkg in nvidia_packages:
|
|
args.extend(["--exclude-module", pkg])
|
|
|
|
# Add MLX-specific imports if building on Apple Silicon (never for CUDA builds)
|
|
if is_apple_silicon() and not cuda:
|
|
logger.info("Building for Apple Silicon - including MLX dependencies")
|
|
args.extend(
|
|
[
|
|
"--hidden-import",
|
|
"backend.backends.mlx_backend",
|
|
"--hidden-import",
|
|
"mlx",
|
|
"--hidden-import",
|
|
"mlx.core",
|
|
"--hidden-import",
|
|
"mlx.nn",
|
|
"--hidden-import",
|
|
"mlx_audio",
|
|
"--hidden-import",
|
|
"mlx_audio.tts",
|
|
"--hidden-import",
|
|
"mlx_audio.stt",
|
|
"--collect-submodules",
|
|
"mlx",
|
|
"--collect-submodules",
|
|
"mlx_audio",
|
|
# Use --collect-all so PyInstaller bundles both data files AND
|
|
# native shared libraries (.dylib, .metallib) for MLX.
|
|
# Previously only --collect-data was used, which caused MLX to
|
|
# raise OSError at runtime inside the bundled binary because
|
|
# the Metal shader libraries were missing.
|
|
"--collect-all",
|
|
"mlx",
|
|
"--collect-all",
|
|
"mlx_audio",
|
|
]
|
|
)
|
|
elif not cuda:
|
|
logger.info("Building for non-Apple Silicon platform - PyTorch only")
|
|
|
|
dist_dir = str(backend_dir / "dist")
|
|
build_dir = str(backend_dir / "build")
|
|
|
|
args.extend(
|
|
[
|
|
"--distpath",
|
|
dist_dir,
|
|
"--workpath",
|
|
build_dir,
|
|
"--noconfirm",
|
|
"--clean",
|
|
]
|
|
)
|
|
|
|
# Change to backend directory
|
|
os.chdir(backend_dir)
|
|
|
|
# For CPU builds on Windows, ensure we're using CPU-only torch.
|
|
# If CUDA torch is installed (local dev), swap to CPU torch before building,
|
|
# then restore CUDA torch after. This prevents PyInstaller from bundling
|
|
# ~3GB of CUDA DLLs into the CPU binary.
|
|
restore_cuda = False
|
|
if not cuda and platform.system() == "Windows":
|
|
import subprocess
|
|
|
|
result = subprocess.run(
|
|
[sys.executable, "-c", "import torch; print(torch.version.cuda or '')"], capture_output=True, text=True
|
|
)
|
|
has_cuda_torch = bool(result.stdout.strip())
|
|
if has_cuda_torch:
|
|
logger.info("CUDA torch detected — installing CPU torch for CPU build...")
|
|
subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"pip",
|
|
"install",
|
|
"torch",
|
|
"torchvision",
|
|
"torchaudio",
|
|
"--index-url",
|
|
"https://download.pytorch.org/whl/cpu",
|
|
"--force-reinstall",
|
|
"-q",
|
|
],
|
|
check=True,
|
|
)
|
|
restore_cuda = True
|
|
|
|
# Run PyInstaller
|
|
try:
|
|
PyInstaller.__main__.run(args)
|
|
finally:
|
|
# Restore CUDA torch if we swapped it out (even on build failure)
|
|
if restore_cuda:
|
|
logger.info("Restoring CUDA torch...")
|
|
import subprocess
|
|
|
|
subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"pip",
|
|
"install",
|
|
"torch",
|
|
"torchvision",
|
|
"torchaudio",
|
|
"--index-url",
|
|
"https://download.pytorch.org/whl/cu128",
|
|
"--force-reinstall",
|
|
"-q",
|
|
],
|
|
check=True,
|
|
)
|
|
|
|
logger.info("Binary built in %s", backend_dir / "dist" / binary_name)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Build voicebox-server binary")
|
|
parser.add_argument(
|
|
"--cuda",
|
|
action="store_true",
|
|
help="Build CUDA-enabled binary (voicebox-server-cuda)",
|
|
)
|
|
cli_args = parser.parse_args()
|
|
build_server(cuda=cli_args.cuda)
|