""" PyInstaller build script for creating standalone Python server binary. Usage: python build_binary.py # Build default (CPU) server binary python build_binary.py --cuda # Build CUDA-enabled server binary """ import PyInstaller.__main__ import argparse import logging import os import platform import sys from pathlib import Path logger = logging.getLogger(__name__) def is_apple_silicon(): """Check if running on Apple Silicon.""" return platform.system() == "Darwin" and platform.machine() == "arm64" def build_server(cuda=False): """Build Python server as standalone binary. Args: cuda: If True, build with CUDA support and name the binary voicebox-server-cuda instead of voicebox-server. """ backend_dir = Path(__file__).parent binary_name = "voicebox-server-cuda" if cuda else "voicebox-server" # PyInstaller arguments # CUDA builds use --onedir so we can split the output into two archives: # 1. Server core (~200-400MB) — versioned with the app # 2. CUDA libs (~2GB) — versioned independently (only redownloaded on # CUDA toolkit / torch major version changes) # CPU builds remain --onefile for simplicity. pack_mode = "--onedir" if cuda else "--onefile" args = [ "server.py", # Use server.py as entry point instead of main.py pack_mode, "--name", binary_name, ] # Hide console window on Windows only. On macOS/Linux the sidecar needs # stdout/stderr for Tauri to capture logs. if platform.system() == "Windows": args.append("--noconsole") # numpy 2.x / torch ABI mismatch fix: install memmove fallback for # torch.from_numpy() before the app starts. Runtime hooks run after # FrozenImporter is registered so frozen torch/numpy are importable. # Paths are passed relative to backend_dir because os.chdir(backend_dir) # runs before PyInstaller. Absolute paths would get baked into the # generated .spec, breaking reproducible builds on other machines / CI. args.extend( [ "--runtime-hook", "pyi_rth_numpy_compat.py", # Stub torch.compiler.disable before transformers imports # flex_attention, which otherwise triggers torch._dynamo → # torch._numpy._ufuncs and crashes at module load under # PyInstaller. See pyi_rth_torch_compiler_disable.py. "--runtime-hook", "pyi_rth_torch_compiler_disable.py", # Per-module collection overrides (e.g. forcing scipy.stats._distn_infrastructure # to bundle .py source alongside .pyc so the runtime hook can source-patch it). "--additional-hooks-dir", "pyi_hooks", ] ) # Add local qwen_tts path if specified (for editable installs) qwen_tts_path = os.getenv("QWEN_TTS_PATH") if qwen_tts_path and Path(qwen_tts_path).exists(): args.extend(["--paths", str(qwen_tts_path)]) logger.info("Using local qwen_tts source from: %s", qwen_tts_path) # Add common hidden imports args.extend( [ "--hidden-import", "backend", "--hidden-import", "backend.main", "--hidden-import", "backend.config", "--hidden-import", "backend.database", "--hidden-import", "backend.models", "--hidden-import", "backend.services.profiles", "--hidden-import", "backend.services.history", "--hidden-import", "backend.services.tts", "--hidden-import", "backend.services.transcribe", "--hidden-import", "backend.utils.platform_detect", "--hidden-import", "backend.backends", "--hidden-import", "backend.backends.pytorch_backend", "--hidden-import", "backend.backends.qwen_custom_voice_backend", "--hidden-import", "backend.utils.audio", "--hidden-import", "backend.utils.cache", "--hidden-import", "backend.utils.progress", "--hidden-import", "backend.utils.hf_progress", "--hidden-import", "backend.services.cuda", "--hidden-import", "backend.services.effects", "--hidden-import", "backend.utils.effects", "--hidden-import", "backend.services.versions", "--hidden-import", "pedalboard", "--hidden-import", "chatterbox", "--hidden-import", "chatterbox.tts_turbo", "--hidden-import", "chatterbox.mtl_tts", "--hidden-import", "backend.backends.chatterbox_backend", "--hidden-import", "backend.backends.chatterbox_turbo_backend", # chatterbox multilingual uses spacy_pkuseg for Chinese word # segmentation, which ships pickled dict files (dicts/default.pkl) # and native .so extensions that --hidden-import alone won't bundle. "--collect-all", "spacy_pkuseg", "--hidden-import", "backend.backends.luxtts_backend", "--hidden-import", "zipvoice", "--hidden-import", "zipvoice.luxvoice", "--collect-all", "zipvoice", "--collect-all", "linacodec", "--hidden-import", "torch", "--hidden-import", "transformers", "--hidden-import", "fastapi", "--hidden-import", "uvicorn", "--hidden-import", "sqlalchemy", # librosa uses lazy_loader which generates .pyi stub files at # install time and reads them at runtime to discover submodules. # --hidden-import alone doesn't bundle the stubs, causing # "Cannot load imports from non-existent stub" at runtime. "--collect-all", "lazy_loader", "--collect-all", "librosa", "--hidden-import", "soundfile", "--hidden-import", "qwen_tts", "--hidden-import", "qwen_tts.inference", "--hidden-import", "qwen_tts.inference.qwen3_tts_model", "--hidden-import", "qwen_tts.inference.qwen3_tts_tokenizer", "--hidden-import", "qwen_tts.core", "--hidden-import", "qwen_tts.cli", "--copy-metadata", "qwen-tts", "--copy-metadata", "requests", "--copy-metadata", "transformers", "--copy-metadata", "huggingface-hub", "--copy-metadata", "tokenizers", "--copy-metadata", "safetensors", "--copy-metadata", "tqdm", "--hidden-import", "requests", # qwen_tts uses inspect.getsource() at runtime to locate # modeling_qwen3_tts.py — needs physical .py source files bundled "--collect-all", "qwen_tts", # Fix for pkg_resources and jaraco namespace packages "--hidden-import", "pkg_resources.extern", "--collect-submodules", "jaraco", # inflect uses typeguard @typechecked which calls inspect.getsource() # at import time — needs .py source files, not just .pyc bytecode "--collect-all", "inflect", # perth ships pretrained watermark model files (hparams.yaml, .pth.tar) # in perth/perth_net/pretrained/ — needed by chatterbox at runtime "--collect-all", "perth", # piper_phonemize ships espeak-ng-data/ (phoneme tables, language dicts) # needed by LuxTTS for text-to-phoneme conversion "--collect-all", "piper_phonemize", # HumeAI TADA — speech-language model using Llama + flow matching "--hidden-import", "backend.backends.hume_backend", "--hidden-import", "tada", "--hidden-import", "tada.modules", "--hidden-import", "tada.modules.tada", "--hidden-import", "tada.modules.encoder", "--hidden-import", "tada.modules.decoder", "--hidden-import", "tada.modules.aligner", "--hidden-import", "tada.modules.acoustic_spkr_verf", "--hidden-import", "tada.nn", "--hidden-import", "tada.nn.vibevoice", "--hidden-import", "tada.utils", "--hidden-import", "tada.utils.gray_code", "--hidden-import", "tada.utils.text", # DAC shim — provides dac.nn.layers.Snake1d without the real # descript-audio-codec package (which pulls onnx/tensorboard via # descript-audiotools). The shim is in backend/utils/dac_shim.py. "--hidden-import", "backend.utils.dac_shim", "--hidden-import", "torchaudio", "--collect-submodules", "tada", # Kokoro 82M — lightweight TTS engine using misaki G2P # collect-all is required because transformers introspects .py source # files at runtime (e.g. _can_set_attn_implementation opens the class # file); hidden-import alone only bundles bytecode. "--hidden-import", "backend.backends.kokoro_backend", "--collect-all", "kokoro", # misaki ships G2P data files (dictionaries, phoneme tables) # that must be bundled for espeak/en/ja/zh G2P to work "--collect-all", "misaki", # language_tags ships JSON data files (index.json etc.) loaded at # runtime via: misaki → phonemizer → segments → csvw → language_tags "--collect-all", "language_tags", # espeakng_loader ships the entire espeak-ng-data directory (369 files) # loaded at import time by misaki.espeak via get_data_path() "--collect-all", "espeakng_loader", # spacy en_core_web_sm model — misaki.en tries to spacy.cli.download() # at runtime if not found, which calls pip as a subprocess and crashes # the frozen binary. Bundle the model so spacy.util.is_package() passes. "--collect-all", "en_core_web_sm", "--copy-metadata", "en_core_web_sm", "--hidden-import", "en_core_web_sm", # unidic-lite ships the MeCab dictionary used by fugashi (pulled in # by misaki[ja]). The dict lives in unidic_lite/dicdir/ and is # discovered via the package's DICDIR constant, so the data files # must be collected or Japanese Kokoro voices crash at runtime. "--collect-all", "unidic_lite", "--hidden-import", "loguru", ] ) # Add CUDA-specific hidden imports if cuda: logger.info("Building with CUDA support") args.extend( [ "--hidden-import", "torch.cuda", "--hidden-import", "torch.backends.cudnn", ] ) else: # Exclude NVIDIA CUDA packages from CPU-only builds to keep binary small. # When building from a venv with CUDA torch installed, PyInstaller would # bundle ~3GB of NVIDIA shared libraries. We exclude both the Python # modules and the binary DLLs. nvidia_packages = [ "nvidia", "nvidia.cublas", "nvidia.cuda_cupti", "nvidia.cuda_nvrtc", "nvidia.cuda_runtime", "nvidia.cudnn", "nvidia.cufft", "nvidia.curand", "nvidia.cusolver", "nvidia.cusparse", "nvidia.nccl", "nvidia.nvjitlink", "nvidia.nvtx", ] for pkg in nvidia_packages: args.extend(["--exclude-module", pkg]) # Add MLX-specific imports if building on Apple Silicon (never for CUDA builds) if is_apple_silicon() and not cuda: logger.info("Building for Apple Silicon - including MLX dependencies") args.extend( [ "--hidden-import", "backend.backends.mlx_backend", "--hidden-import", "mlx", "--hidden-import", "mlx.core", "--hidden-import", "mlx.nn", "--hidden-import", "mlx_audio", "--hidden-import", "mlx_audio.tts", "--hidden-import", "mlx_audio.stt", "--collect-submodules", "mlx", "--collect-submodules", "mlx_audio", # Use --collect-all so PyInstaller bundles both data files AND # native shared libraries (.dylib, .metallib) for MLX. # Previously only --collect-data was used, which caused MLX to # raise OSError at runtime inside the bundled binary because # the Metal shader libraries were missing. "--collect-all", "mlx", "--collect-all", "mlx_audio", ] ) elif not cuda: logger.info("Building for non-Apple Silicon platform - PyTorch only") dist_dir = str(backend_dir / "dist") build_dir = str(backend_dir / "build") args.extend( [ "--distpath", dist_dir, "--workpath", build_dir, "--noconfirm", "--clean", ] ) # Change to backend directory os.chdir(backend_dir) # For CPU builds on Windows, ensure we're using CPU-only torch. # If CUDA torch is installed (local dev), swap to CPU torch before building, # then restore CUDA torch after. This prevents PyInstaller from bundling # ~3GB of CUDA DLLs into the CPU binary. restore_cuda = False if not cuda and platform.system() == "Windows": import subprocess result = subprocess.run( [sys.executable, "-c", "import torch; print(torch.version.cuda or '')"], capture_output=True, text=True ) has_cuda_torch = bool(result.stdout.strip()) if has_cuda_torch: logger.info("CUDA torch detected — installing CPU torch for CPU build...") subprocess.run( [ sys.executable, "-m", "pip", "install", "torch", "torchvision", "torchaudio", "--index-url", "https://download.pytorch.org/whl/cpu", "--force-reinstall", "-q", ], check=True, ) restore_cuda = True # Run PyInstaller try: PyInstaller.__main__.run(args) finally: # Restore CUDA torch if we swapped it out (even on build failure) if restore_cuda: logger.info("Restoring CUDA torch...") import subprocess subprocess.run( [ sys.executable, "-m", "pip", "install", "torch", "torchvision", "torchaudio", "--index-url", "https://download.pytorch.org/whl/cu128", "--force-reinstall", "-q", ], check=True, ) logger.info("Binary built in %s", backend_dir / "dist" / binary_name) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Build voicebox-server binary") parser.add_argument( "--cuda", action="store_true", help="Build CUDA-enabled binary (voicebox-server-cuda)", ) cli_args = parser.parse_args() build_server(cuda=cli_args.cuda)