249 lines
7.4 KiB
Python
249 lines
7.4 KiB
Python
"""Health and infrastructure endpoints."""
|
|
|
|
import asyncio
|
|
import os
|
|
import signal
|
|
from pathlib import Path
|
|
|
|
import torch
|
|
from fastapi import APIRouter, Depends
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
|
|
from .. import config, models
|
|
from ..services import tts
|
|
from ..database import get_db
|
|
from ..utils.platform_detect import get_backend_type
|
|
|
|
router = APIRouter()
|
|
|
|
# Frontend build directory — present in Docker, absent in dev/API-only mode
|
|
_frontend_dir = Path(__file__).resolve().parent.parent.parent / "frontend"
|
|
|
|
|
|
@router.get("/")
|
|
async def root():
|
|
"""Root endpoint — serves SPA index.html in Docker, JSON otherwise."""
|
|
from .. import __version__
|
|
|
|
index = _frontend_dir / "index.html"
|
|
if index.is_file():
|
|
return FileResponse(index, media_type="text/html")
|
|
return {"message": "voicebox API", "version": __version__}
|
|
|
|
|
|
@router.post("/shutdown")
|
|
async def shutdown():
|
|
"""Gracefully shutdown the server."""
|
|
|
|
async def shutdown_async():
|
|
await asyncio.sleep(0.1)
|
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
|
|
asyncio.create_task(shutdown_async())
|
|
return {"message": "Shutting down..."}
|
|
|
|
|
|
@router.post("/watchdog/disable")
|
|
async def watchdog_disable():
|
|
"""Disable the parent process watchdog so the server keeps running."""
|
|
from backend.server import disable_watchdog
|
|
|
|
disable_watchdog()
|
|
return {"message": "Watchdog disabled"}
|
|
|
|
|
|
@router.get("/health", response_model=models.HealthResponse)
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
from huggingface_hub import constants as hf_constants
|
|
from pathlib import Path
|
|
|
|
tts_model = tts.get_tts_model()
|
|
backend_type = get_backend_type()
|
|
|
|
has_cuda = torch.cuda.is_available()
|
|
has_mps = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
|
|
has_xpu = False
|
|
xpu_name = None
|
|
try:
|
|
import intel_extension_for_pytorch as ipex # noqa: F401 -- side-effect import enables XPU
|
|
|
|
if hasattr(torch, "xpu") and torch.xpu.is_available():
|
|
has_xpu = True
|
|
try:
|
|
xpu_name = torch.xpu.get_device_name(0)
|
|
except Exception:
|
|
xpu_name = "Intel GPU"
|
|
except ImportError:
|
|
pass
|
|
|
|
has_directml = False
|
|
directml_name = None
|
|
try:
|
|
import torch_directml
|
|
|
|
if torch_directml.device_count() > 0:
|
|
has_directml = True
|
|
try:
|
|
directml_name = torch_directml.device_name(0)
|
|
except Exception:
|
|
directml_name = "DirectML GPU"
|
|
except ImportError:
|
|
pass
|
|
|
|
gpu_compat_warning = None
|
|
if has_cuda:
|
|
from ..backends.base import check_cuda_compatibility
|
|
|
|
_compatible, gpu_compat_warning = check_cuda_compatibility()
|
|
|
|
gpu_available = has_cuda or has_mps or has_xpu or has_directml or backend_type == "mlx"
|
|
|
|
gpu_type = None
|
|
if has_cuda:
|
|
gpu_type = f"CUDA ({torch.cuda.get_device_name(0)})"
|
|
elif has_mps:
|
|
gpu_type = "MPS (Apple Silicon)"
|
|
elif backend_type == "mlx":
|
|
gpu_type = "Metal (Apple Silicon via MLX)"
|
|
elif has_xpu:
|
|
gpu_type = f"XPU ({xpu_name})"
|
|
elif has_directml:
|
|
gpu_type = f"DirectML ({directml_name})"
|
|
|
|
vram_used = None
|
|
if has_cuda:
|
|
vram_used = torch.cuda.memory_allocated() / 1024 / 1024
|
|
elif has_xpu:
|
|
try:
|
|
vram_used = torch.xpu.memory_allocated() / 1024 / 1024
|
|
except Exception:
|
|
pass # memory_allocated() may not be available on all IPEX versions
|
|
|
|
model_loaded = False
|
|
model_size = None
|
|
try:
|
|
if tts_model.is_loaded():
|
|
model_loaded = True
|
|
model_size = getattr(tts_model, "_current_model_size", None)
|
|
if not model_size:
|
|
model_size = getattr(tts_model, "model_size", None)
|
|
except Exception:
|
|
model_loaded = False
|
|
model_size = None
|
|
|
|
model_downloaded = None
|
|
try:
|
|
from ..backends import get_model_config
|
|
|
|
default_config = get_model_config("qwen-tts-1.7B")
|
|
default_model_id = default_config.hf_repo_id if default_config else "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
|
|
|
|
try:
|
|
from huggingface_hub import scan_cache_dir
|
|
|
|
cache_info = scan_cache_dir()
|
|
for repo in cache_info.repos:
|
|
if repo.repo_id == default_model_id:
|
|
model_downloaded = True
|
|
break
|
|
except (ImportError, Exception):
|
|
cache_dir = hf_constants.HF_HUB_CACHE
|
|
repo_cache = Path(cache_dir) / ("models--" + default_model_id.replace("/", "--"))
|
|
if repo_cache.exists():
|
|
has_model_files = (
|
|
any(repo_cache.rglob("*.bin"))
|
|
or any(repo_cache.rglob("*.safetensors"))
|
|
or any(repo_cache.rglob("*.pt"))
|
|
or any(repo_cache.rglob("*.pth"))
|
|
or any(repo_cache.rglob("*.npz"))
|
|
)
|
|
model_downloaded = has_model_files
|
|
except Exception:
|
|
pass
|
|
|
|
return models.HealthResponse(
|
|
status="healthy",
|
|
model_loaded=model_loaded,
|
|
model_downloaded=model_downloaded,
|
|
model_size=model_size,
|
|
gpu_available=gpu_available,
|
|
gpu_type=gpu_type,
|
|
vram_used_mb=vram_used,
|
|
backend_type=backend_type,
|
|
backend_variant=os.environ.get(
|
|
"VOICEBOX_BACKEND_VARIANT",
|
|
"cuda" if torch.cuda.is_available() else ("xpu" if has_xpu else "cpu"),
|
|
),
|
|
gpu_compatibility_warning=gpu_compat_warning,
|
|
)
|
|
|
|
|
|
@router.get("/health/filesystem", response_model=models.FilesystemHealthResponse)
|
|
async def filesystem_health():
|
|
"""Check filesystem health: directory existence, write permissions, and disk space."""
|
|
import shutil
|
|
|
|
dirs_to_check = {
|
|
"generations": config.get_generations_dir(),
|
|
"profiles": config.get_profiles_dir(),
|
|
"data": config.get_data_dir(),
|
|
}
|
|
|
|
checks: list[models.DirectoryCheck] = []
|
|
all_ok = True
|
|
|
|
for _label, dir_path in dirs_to_check.items():
|
|
exists = dir_path.exists()
|
|
writable = False
|
|
error = None
|
|
if exists:
|
|
probe = dir_path / ".voicebox_probe"
|
|
try:
|
|
probe.write_text("ok")
|
|
probe.unlink()
|
|
writable = True
|
|
except PermissionError:
|
|
error = "Permission denied"
|
|
except OSError as e:
|
|
error = str(e)
|
|
finally:
|
|
try:
|
|
probe.unlink(missing_ok=True)
|
|
except Exception:
|
|
pass
|
|
else:
|
|
error = "Directory does not exist"
|
|
|
|
if not exists or not writable:
|
|
all_ok = False
|
|
|
|
checks.append(
|
|
models.DirectoryCheck(
|
|
path=str(dir_path.resolve()),
|
|
exists=exists,
|
|
writable=writable,
|
|
error=error,
|
|
)
|
|
)
|
|
|
|
disk_free_mb = None
|
|
disk_total_mb = None
|
|
try:
|
|
usage = shutil.disk_usage(str(config.get_data_dir()))
|
|
disk_free_mb = round(usage.free / (1024 * 1024), 1)
|
|
disk_total_mb = round(usage.total / (1024 * 1024), 1)
|
|
if disk_free_mb < 500:
|
|
all_ok = False
|
|
except OSError:
|
|
all_ok = False
|
|
|
|
return models.FilesystemHealthResponse(
|
|
healthy=all_ok,
|
|
disk_free_mb=disk_free_mb,
|
|
disk_total_mb=disk_total_mb,
|
|
directories=checks,
|
|
)
|