Initial commit

2026-04-24 19:18:15 +08:00
commit fbcbe08696
555 changed files with 96692 additions and 0 deletions
--- a/backend/database/init.py
+++ b/backend/database/init.py
@@ -0,0 +1,44 @@
+"""Database package — ORM models, session management, and migrations.
+
+Re-exports all public symbols so that ``from .database import get_db``
+and ``from .database import Generation as DBGeneration`` continue to work
+without changing any importers.
+"""
+
+from .models import (
+    Base,
+    AudioChannel,
+    ChannelDeviceMapping,
+    EffectPreset,
+    Generation,
+    GenerationVersion,
+    ProfileChannelMapping,
+    ProfileSample,
+    Project,
+    Story,
+    StoryItem,
+    VoiceProfile,
+)
+from .session import engine, SessionLocal, _db_path, init_db, get_db
+
+__all__ = [
+    # Models
+    "Base",
+    "AudioChannel",
+    "ChannelDeviceMapping",
+    "EffectPreset",
+    "Generation",
+    "GenerationVersion",
+    "ProfileChannelMapping",
+    "ProfileSample",
+    "Project",
+    "Story",
+    "StoryItem",
+    "VoiceProfile",
+    # Session
+    "engine",
+    "SessionLocal",
+    "_db_path",
+    "init_db",
+    "get_db",
+]
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -0,0 +1,226 @@
+"""Column-level migrations for the voicebox SQLite database.
+
+Why not Alembic?  voicebox is a single-user desktop app shipping as a
+PyInstaller binary.  Every user has exactly one SQLite file.  Alembic's
+strengths -- migration tracking across environments, rollback, team
+coordination -- don't apply here and would add bundling complexity
+(alembic.ini, env.py, versions/ directory all need to survive
+PyInstaller).  The column-existence checks below are idempotent, run in
+<50 ms on startup, and have worked reliably across 12 schema changes.
+If the project ever moves to a server-based deployment or Postgres, this
+decision should be revisited.
+
+Adding a new migration:
+    1. Append a new ``_migrate_*`` helper at the bottom of this file.
+    2. Call it from ``run_migrations()`` in the appropriate spot.
+    3. The helper should check column/table existence before acting
+       (idempotent) and print a short message when it does real work.
+"""
+
+import logging
+
+from sqlalchemy import inspect, text
+
+logger = logging.getLogger(__name__)
+
+
+def run_migrations(engine) -> None:
+    """Run all schema migrations.  Safe to call on every startup."""
+    inspector = inspect(engine)
+    tables = set(inspector.get_table_names())
+
+    _migrate_story_items(engine, inspector, tables)
+    _migrate_profiles(engine, inspector, tables)
+    _migrate_generations(engine, inspector, tables)
+    _migrate_effect_presets(engine, inspector, tables)
+    _migrate_generation_versions(engine, inspector, tables)
+    _normalize_storage_paths(engine, tables)
+
+
+# -- helpers ---------------------------------------------------------------
+
+def _get_columns(inspector, table: str) -> set[str]:
+    return {col["name"] for col in inspector.get_columns(table)}
+
+
+def _add_column(engine, table: str, column_sql: str, label: str) -> None:
+    """Add a column if it doesn't already exist."""
+    with engine.connect() as conn:
+        conn.execute(text(f"ALTER TABLE {table} ADD COLUMN {column_sql}"))
+        conn.commit()
+    logger.info("Added %s column to %s", label, table)
+
+
+# -- per-table migrations --------------------------------------------------
+
+def _migrate_story_items(engine, inspector, tables: set[str]) -> None:
+    if "story_items" not in tables:
+        return
+
+    columns = _get_columns(inspector, "story_items")
+
+    # Replace position-based ordering with absolute timecodes
+    if "position" in columns:
+        logger.info("Migrating story_items: removing position column, using start_time_ms")
+        with engine.connect() as conn:
+            if "start_time_ms" not in columns:
+                conn.execute(text(
+                    "ALTER TABLE story_items ADD COLUMN start_time_ms INTEGER DEFAULT 0"
+                ))
+                result = conn.execute(text("""
+                    SELECT si.id, si.story_id, si.position, g.duration
+                    FROM story_items si
+                    JOIN generations g ON si.generation_id = g.id
+                    ORDER BY si.story_id, si.position
+                """))
+                current_story_id = None
+                current_time_ms = 0
+                for item_id, story_id, _position, duration in result.fetchall():
+                    if story_id != current_story_id:
+                        current_story_id = story_id
+                        current_time_ms = 0
+                    conn.execute(
+                        text("UPDATE story_items SET start_time_ms = :time WHERE id = :id"),
+                        {"time": current_time_ms, "id": item_id},
+                    )
+                    current_time_ms += int((duration or 0) * 1000) + 200
+                conn.commit()
+
+            # Recreate table without the position column (SQLite lacks DROP COLUMN)
+            conn.execute(text("""
+                CREATE TABLE story_items_new (
+                    id VARCHAR PRIMARY KEY,
+                    story_id VARCHAR NOT NULL,
+                    generation_id VARCHAR NOT NULL,
+                    start_time_ms INTEGER NOT NULL DEFAULT 0,
+                    track INTEGER NOT NULL DEFAULT 0,
+                    trim_start_ms INTEGER NOT NULL DEFAULT 0,
+                    trim_end_ms INTEGER NOT NULL DEFAULT 0,
+                    version_id VARCHAR,
+                    created_at DATETIME,
+                    FOREIGN KEY (story_id) REFERENCES stories(id),
+                    FOREIGN KEY (generation_id) REFERENCES generations(id)
+                )
+            """))
+            conn.execute(text("""
+                INSERT INTO story_items_new (id, story_id, generation_id, start_time_ms, track, trim_start_ms, trim_end_ms, version_id, created_at)
+                SELECT id, story_id, generation_id, start_time_ms,
+                    COALESCE(track, 0), COALESCE(trim_start_ms, 0), COALESCE(trim_end_ms, 0), version_id, created_at
+                FROM story_items
+            """))
+            conn.execute(text("DROP TABLE story_items"))
+            conn.execute(text("ALTER TABLE story_items_new RENAME TO story_items"))
+            conn.commit()
+
+        # Re-read after table recreation
+        columns = _get_columns(inspector, "story_items")
+
+    if "track" not in columns:
+        _add_column(engine, "story_items", "track INTEGER NOT NULL DEFAULT 0", "track")
+    # Re-read so subsequent checks see new columns
+    columns = _get_columns(inspector, "story_items")
+    if "trim_start_ms" not in columns:
+        _add_column(engine, "story_items", "trim_start_ms INTEGER NOT NULL DEFAULT 0", "trim_start_ms")
+    if "trim_end_ms" not in columns:
+        _add_column(engine, "story_items", "trim_end_ms INTEGER NOT NULL DEFAULT 0", "trim_end_ms")
+    if "version_id" not in columns:
+        _add_column(engine, "story_items", "version_id VARCHAR", "version_id")
+
+
+def _migrate_profiles(engine, inspector, tables: set[str]) -> None:
+    if "profiles" not in tables:
+        return
+    columns = _get_columns(inspector, "profiles")
+    if "avatar_path" not in columns:
+        _add_column(engine, "profiles", "avatar_path VARCHAR", "avatar_path")
+    if "effects_chain" not in columns:
+        _add_column(engine, "profiles", "effects_chain TEXT", "effects_chain")
+    # Voice type system — v0.3.x
+    if "voice_type" not in columns:
+        _add_column(engine, "profiles", "voice_type VARCHAR DEFAULT 'cloned'", "voice_type")
+    if "preset_engine" not in columns:
+        _add_column(engine, "profiles", "preset_engine VARCHAR", "preset_engine")
+    if "preset_voice_id" not in columns:
+        _add_column(engine, "profiles", "preset_voice_id VARCHAR", "preset_voice_id")
+    if "design_prompt" not in columns:
+        _add_column(engine, "profiles", "design_prompt TEXT", "design_prompt")
+    if "default_engine" not in columns:
+        _add_column(engine, "profiles", "default_engine VARCHAR", "default_engine")
+
+
+def _migrate_generations(engine, inspector, tables: set[str]) -> None:
+    if "generations" not in tables:
+        return
+    columns = _get_columns(inspector, "generations")
+    if "status" not in columns:
+        _add_column(engine, "generations", "status VARCHAR DEFAULT 'completed'", "status")
+    if "error" not in columns:
+        _add_column(engine, "generations", "error TEXT", "error")
+    if "engine" not in columns:
+        _add_column(engine, "generations", "engine VARCHAR DEFAULT 'qwen'", "engine")
+    # Re-read after engine column (variable name shadows outer scope in old code)
+    columns = _get_columns(inspector, "generations")
+    if "model_size" not in columns:
+        _add_column(engine, "generations", "model_size VARCHAR", "model_size")
+    if "is_favorited" not in columns:
+        _add_column(engine, "generations", "is_favorited BOOLEAN DEFAULT 0", "is_favorited")
+
+
+def _migrate_effect_presets(engine, inspector, tables: set[str]) -> None:
+    if "effect_presets" not in tables:
+        return
+    columns = _get_columns(inspector, "effect_presets")
+    if "sort_order" not in columns:
+        _add_column(engine, "effect_presets", "sort_order INTEGER DEFAULT 100", "sort_order")
+
+
+def _migrate_generation_versions(engine, inspector, tables: set[str]) -> None:
+    if "generation_versions" not in tables:
+        return
+    columns = _get_columns(inspector, "generation_versions")
+    if "source_version_id" not in columns:
+        _add_column(engine, "generation_versions", "source_version_id VARCHAR", "source_version_id")
+
+
+def _normalize_storage_paths(engine, tables: set[str]) -> None:
+    """Normalize stored file paths to be relative to the configured data dir."""
+    from pathlib import Path
+
+    from ..config import get_data_dir, to_storage_path, resolve_storage_path
+
+    data_dir = get_data_dir()
+
+    path_columns = [
+        ("generations", "audio_path"),
+        ("generation_versions", "audio_path"),
+        ("profile_samples", "audio_path"),
+        ("profiles", "avatar_path"),
+    ]
+
+    total_fixed = 0
+    with engine.connect() as conn:
+        for table, column in path_columns:
+            if table not in tables:
+                continue
+            rows = conn.execute(
+                text(f"SELECT id, {column} FROM {table} WHERE {column} IS NOT NULL")
+            ).fetchall()
+            for row_id, path_val in rows:
+                if not path_val:
+                    continue
+                p = Path(path_val)
+                resolved = resolve_storage_path(p)
+                if resolved is None:
+                    continue
+
+                normalized = to_storage_path(resolved)
+
+                if normalized != path_val:
+                    conn.execute(
+                        text(f"UPDATE {table} SET {column} = :path WHERE id = :id"),
+                        {"path": normalized, "id": row_id},
+                    )
+                    total_fixed += 1
+        if total_fixed > 0:
+            conn.commit()
+            logger.info("Normalized %d stored file paths", total_fixed)
--- a/backend/database/models.py
+++ b/backend/database/models.py
@@ -0,0 +1,169 @@
+"""ORM model definitions for the voicebox SQLite database."""
+
+from datetime import datetime
+import uuid
+
+from sqlalchemy import Column, String, Integer, Float, DateTime, Text, ForeignKey, Boolean
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+
+class VoiceProfile(Base):
+    """Voice profile.
+
+    voice_type discriminates three flavours:
+      - "cloned"   — traditional reference-audio profiles (all cloning engines)
+      - "preset"   — engine-specific pre-built voice (e.g. Kokoro voices)
+      - "designed"  — text-described voice (e.g. Qwen CustomVoice, future)
+    """
+
+    __tablename__ = "profiles"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String, unique=True, nullable=False)
+    description = Column(Text)
+    language = Column(String, default="en")
+    avatar_path = Column(String, nullable=True)
+    effects_chain = Column(Text, nullable=True)
+
+    # Voice type system — added v0.3.x
+    voice_type = Column(String, default="cloned")  # "cloned" | "preset" | "designed"
+    preset_engine = Column(String, nullable=True)   # e.g. "kokoro" — only for preset
+    preset_voice_id = Column(String, nullable=True)  # e.g. "am_adam" — only for preset
+    design_prompt = Column(Text, nullable=True)      # text description — only for designed
+    default_engine = Column(String, nullable=True)   # auto-selected engine, locked for preset
+
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+
+class ProfileSample(Base):
+    """Audio sample attached to a voice profile."""
+
+    __tablename__ = "profile_samples"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    profile_id = Column(String, ForeignKey("profiles.id"), nullable=False)
+    audio_path = Column(String, nullable=False)
+    reference_text = Column(Text, nullable=False)
+
+
+class Generation(Base):
+    """A single TTS generation."""
+
+    __tablename__ = "generations"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    profile_id = Column(String, ForeignKey("profiles.id"), nullable=False)
+    text = Column(Text, nullable=False)
+    language = Column(String, default="en")
+    audio_path = Column(String, nullable=True)
+    duration = Column(Float, nullable=True)
+    seed = Column(Integer)
+    instruct = Column(Text)
+    engine = Column(String, default="qwen")
+    model_size = Column(String, nullable=True)
+    status = Column(String, default="completed")
+    error = Column(Text, nullable=True)
+    is_favorited = Column(Boolean, default=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class Story(Base):
+    """A story that sequences multiple generations."""
+
+    __tablename__ = "stories"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String, nullable=False)
+    description = Column(Text)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+
+class StoryItem(Base):
+    """Links a generation to a story at a specific timecode."""
+
+    __tablename__ = "story_items"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    story_id = Column(String, ForeignKey("stories.id"), nullable=False)
+    generation_id = Column(String, ForeignKey("generations.id"), nullable=False)
+    version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True)
+    start_time_ms = Column(Integer, nullable=False, default=0)
+    track = Column(Integer, nullable=False, default=0)
+    trim_start_ms = Column(Integer, nullable=False, default=0)
+    trim_end_ms = Column(Integer, nullable=False, default=0)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class Project(Base):
+    """Audio studio project (JSON blob)."""
+
+    __tablename__ = "projects"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String, nullable=False)
+    data = Column(Text)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+
+
+class GenerationVersion(Base):
+    """A version of a generation's audio (original, processed, alternate takes)."""
+
+    __tablename__ = "generation_versions"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    generation_id = Column(String, ForeignKey("generations.id"), nullable=False)
+    label = Column(String, nullable=False)
+    audio_path = Column(String, nullable=False)
+    effects_chain = Column(Text, nullable=True)
+    source_version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True)
+    is_default = Column(Boolean, default=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class EffectPreset(Base):
+    """Saved effect chain preset."""
+
+    __tablename__ = "effect_presets"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String, unique=True, nullable=False)
+    description = Column(Text, nullable=True)
+    effects_chain = Column(Text, nullable=False)
+    is_builtin = Column(Boolean, default=False)
+    sort_order = Column(Integer, default=100)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class AudioChannel(Base):
+    """Audio output channel (bus)."""
+
+    __tablename__ = "audio_channels"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    name = Column(String, nullable=False)
+    is_default = Column(Boolean, default=False)
+    created_at = Column(DateTime, default=datetime.utcnow)
+
+
+class ChannelDeviceMapping(Base):
+    """Mapping between a channel and an OS audio device."""
+
+    __tablename__ = "channel_device_mappings"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    channel_id = Column(String, ForeignKey("audio_channels.id"), nullable=False)
+    device_id = Column(String, nullable=False)
+
+
+class ProfileChannelMapping(Base):
+    """Many-to-many mapping between voice profiles and audio channels."""
+
+    __tablename__ = "profile_channel_mappings"
+
+    profile_id = Column(String, ForeignKey("profiles.id"), primary_key=True)
+    channel_id = Column(String, ForeignKey("audio_channels.id"), primary_key=True)
--- a/backend/database/seed.py
+++ b/backend/database/seed.py
@@ -0,0 +1,73 @@
+"""Post-migration data seeding and backfills."""
+
+import json
+import logging
+import uuid
+
+from .. import config
+
+logger = logging.getLogger(__name__)
+
+
+def backfill_generation_versions(SessionLocal, Generation, GenerationVersion) -> None:
+    """Create 'clean' version entries for generations that predate the versions feature."""
+    db = SessionLocal()
+    try:
+        existing_version_gen_ids = {
+            row[0] for row in db.query(GenerationVersion.generation_id).all()
+        }
+        generations = db.query(Generation).filter(
+            Generation.status == "completed",
+            Generation.audio_path.isnot(None),
+            Generation.audio_path != "",
+        ).all()
+
+        count = 0
+        for gen in generations:
+            if gen.id in existing_version_gen_ids:
+                continue
+            resolved_audio_path = config.resolve_storage_path(gen.audio_path)
+            if resolved_audio_path is None or not resolved_audio_path.exists():
+                continue
+            version = GenerationVersion(
+                id=str(uuid.uuid4()),
+                generation_id=gen.id,
+                label="clean",
+                audio_path=gen.audio_path,
+                effects_chain=None,
+                is_default=True,
+            )
+            db.add(version)
+            count += 1
+
+        if count > 0:
+            db.commit()
+            logger.info("Backfilled %d generation version entries", count)
+    finally:
+        db.close()
+
+
+def seed_builtin_presets(SessionLocal, EffectPreset) -> None:
+    """Ensure built-in effect presets exist in the database."""
+    from ..utils.effects import BUILTIN_PRESETS
+
+    db = SessionLocal()
+    try:
+        for idx, (_key, preset_data) in enumerate(BUILTIN_PRESETS.items()):
+            sort_order = preset_data.get("sort_order", idx)
+            existing = db.query(EffectPreset).filter_by(name=preset_data["name"]).first()
+            if not existing:
+                preset = EffectPreset(
+                    id=str(uuid.uuid4()),
+                    name=preset_data["name"],
+                    description=preset_data.get("description"),
+                    effects_chain=json.dumps(preset_data["effects_chain"]),
+                    is_builtin=True,
+                    sort_order=sort_order,
+                )
+                db.add(preset)
+            elif existing.sort_order != sort_order:
+                existing.sort_order = sort_order
+        db.commit()
+    finally:
+        db.close()
--- a/backend/database/session.py
+++ b/backend/database/session.py
@@ -0,0 +1,78 @@
+"""Engine creation, initialization, and session management."""
+
+import logging
+import uuid
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from .. import config
+from .models import (
+    Base,
+    AudioChannel,
+    EffectPreset,
+    Generation,
+    GenerationVersion,
+    ProfileChannelMapping,
+    VoiceProfile,
+)
+from .migrations import run_migrations
+from .seed import backfill_generation_versions, seed_builtin_presets
+
+logger = logging.getLogger(__name__)
+
+# Initialized by init_db()
+engine = None
+SessionLocal = None
+_db_path = None
+
+
+def init_db() -> None:
+    """Initialize the database engine, run migrations, create tables, and seed data."""
+    global engine, SessionLocal, _db_path
+
+    _db_path = config.get_db_path()
+    _db_path.parent.mkdir(parents=True, exist_ok=True)
+
+    engine = create_engine(
+        f"sqlite:///{_db_path}",
+        connect_args={"check_same_thread": False},
+    )
+
+    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+    run_migrations(engine)
+    Base.metadata.create_all(bind=engine)
+
+    # Create default audio channel if it doesn't exist
+    db = SessionLocal()
+    try:
+        default_channel = db.query(AudioChannel).filter(AudioChannel.is_default == True).first()
+        if not default_channel:
+            default_channel = AudioChannel(
+                id=str(uuid.uuid4()),
+                name="Default",
+                is_default=True,
+            )
+            db.add(default_channel)
+
+            for profile in db.query(VoiceProfile).all():
+                db.add(ProfileChannelMapping(
+                    profile_id=profile.id,
+                    channel_id=default_channel.id,
+                ))
+            db.commit()
+    finally:
+        db.close()
+
+    backfill_generation_versions(SessionLocal, Generation, GenerationVersion)
+    seed_builtin_presets(SessionLocal, EffectPreset)
+
+
+def get_db():
+    """Yield a database session (FastAPI dependency)."""
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()