Initial commit

2026-04-24 19:18:15 +08:00
commit fbcbe08696
555 changed files with 96692 additions and 0 deletions
--- a/tauri/src-tauri/Cargo.lock
+++ b/tauri/src-tauri/Cargo.lock
--- a/tauri/src-tauri/Cargo.toml
+++ b/tauri/src-tauri/Cargo.toml
@@ -0,0 +1,49 @@
+[package]
+name = "voicebox"
+version = "0.4.5"
+description = "A production-quality desktop app for Qwen3-TTS voice cloning and generation"
+authors = ["you"]
+license = ""
+repository = ""
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[build-dependencies]
+tauri-build = { version = "2.0", features = [] }
+
+[dependencies]
+tauri = { version = "2.0", features = [] }
+tauri-plugin-dialog = "2.0"
+tauri-plugin-fs = "2.0"
+tauri-plugin-shell = "2.0"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+tokio = { version = "1", features = ["full"] }
+reqwest = { version = "0.12", features = ["blocking", "json"] }
+hound = "3.5"
+base64 = "0.22"
+cpal = "0.15"
+symphonia = { version = "0.5", features = ["all"] }
+scopeguard = "1.2.0"
+
+[target.'cfg(target_os = "macos")'.dependencies]
+screencapturekit = { version = "1", features = ["async"] }
+coreaudio-sys = "0.2"
+objc = "0.2"
+core-foundation-sys = "0.8"
+
+[target.'cfg(target_os = "windows")'.dependencies]
+wasapi = "0.22"
+windows = { version = "0.62", features = ["Win32_Foundation", "Win32_UI_WindowsAndMessaging", "Win32_System_Com"] }
+
+[target.'cfg(target_os = "linux")'.dependencies]
+webkit2gtk = "2.0"
+
+[target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
+tauri-plugin-updater = "2.0"
+tauri-plugin-process = "2.0"
+
+[features]
+# This feature is used for production builds or when `devPath` points to the filesystem
+custom-protocol = ["tauri/custom-protocol"]
--- a/tauri/src-tauri/Entitlements.plist
+++ b/tauri/src-tauri/Entitlements.plist
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>com.apple.security.cs.allow-jit</key>
+    <true/>
+    <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
+    <true/>
+    <key>com.apple.security.cs.disable-library-validation</key>
+    <true/>
+    <key>com.apple.security.device.audio-input</key>
+    <true/>
+    <key>com.apple.security.files.user-selected.read-write</key>
+    <true/>
+</dict>
+</plist>
--- a/tauri/src-tauri/Info.plist
+++ b/tauri/src-tauri/Info.plist
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleIconFile</key>
+	<string>voicebox</string>
+	<key>CFBundleIconName</key>
+	<string>voicebox</string>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>voicebox needs microphone access to record voice samples for voice cloning.</string>
+	<key>NSScreenCaptureUsageDescription</key>
+	<string>Voicebox needs screen capture access to record system audio for voice samples.</string>
+</dict>
+</plist>
--- a/tauri/src-tauri/build.rs
+++ b/tauri/src-tauri/build.rs
@@ -0,0 +1,170 @@
+#[cfg(target_os = "macos")]
+use std::process::Command;
+
+fn main() {
+    // Link Swift runtime libraries for screencapturekit crate
+    #[cfg(target_os = "macos")]
+    {
+        // ScreenCaptureKit does not exist on macOS 11, so weak-link it to
+        // allow the app to launch and gate usage at runtime instead.
+        println!("cargo:rustc-link-arg=-Wl,-weak_framework,ScreenCaptureKit");
+
+        // Add Swift runtime library paths to RPATH
+        println!("cargo:rustc-link-arg=-Wl,-rpath,/usr/lib/swift");
+        println!("cargo:rustc-link-arg=-L/usr/lib/swift");
+
+        // Also try Xcode's Swift libraries
+        if let Ok(output) = Command::new("xcode-select").arg("-p").output() {
+            if output.status.success() {
+                let xcode_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
+                let swift_lib_path = format!(
+                    "{}/Toolchains/XcodeDefault.xctoolchain/usr/lib/swift/macosx",
+                    xcode_path
+                );
+                println!("cargo:rustc-link-arg=-Wl,-rpath,{}", swift_lib_path);
+                println!("cargo:rustc-link-arg=-L{}", swift_lib_path);
+            }
+        }
+    }
+
+    let project_root = env!("CARGO_MANIFEST_DIR");
+    let gen_dir = format!("{}/gen", project_root);
+    std::fs::create_dir_all(&gen_dir).expect("Failed to create gen directory");
+
+    // Compile macOS Liquid Glass icon
+    #[cfg(target_os = "macos")]
+    {
+        // voicebox.icon is in tauri/assets/voicebox.icon (one level up from src-tauri)
+        let icon_source = format!("{}/../assets/voicebox.icon", project_root);
+
+        if std::path::Path::new(&icon_source).exists() {
+            println!("cargo:rerun-if-changed={}", icon_source);
+            println!("cargo:rerun-if-changed={}/icon.json", icon_source);
+            println!("cargo:rerun-if-changed={}/Assets", icon_source);
+
+            let partial_plist = format!("{}/partial.plist", gen_dir);
+            let output = Command::new("xcrun")
+                .args([
+                    "actool",
+                    "--compile",
+                    &gen_dir,
+                    "--output-format",
+                    "human-readable-text",
+                    "--output-partial-info-plist",
+                    &partial_plist,
+                    "--app-icon",
+                    "voicebox",
+                    "--include-all-app-icons",
+                    "--target-device",
+                    "mac",
+                    "--minimum-deployment-target",
+                    "11.0",
+                    "--platform",
+                    "macosx",
+                    &icon_source,
+                ])
+                .output();
+
+            match output {
+                Ok(output) => {
+                    if !output.status.success() {
+                        eprintln!("actool stderr: {}", String::from_utf8_lossy(&output.stderr));
+                        eprintln!("actool stdout: {}", String::from_utf8_lossy(&output.stdout));
+                        panic!("actool failed to compile icon");
+                    }
+                    println!("Successfully compiled icon to {}", gen_dir);
+                }
+                Err(e) => {
+                    eprintln!("Failed to execute xcrun actool: {}", e);
+                    eprintln!("Make sure you have Xcode Command Line Tools installed");
+                    panic!("Icon compilation failed");
+                }
+            }
+
+            // Generate voicebox.icns from the source PNG via sips + iconutil
+            let icns_path = format!("{}/voicebox.icns", gen_dir);
+            if !std::path::Path::new(&icns_path).exists() {
+                let source_png = format!("{}/Assets/Voicebox.png", icon_source);
+                if std::path::Path::new(&source_png).exists() {
+                    let iconset_dir = format!("{}/voicebox.iconset", gen_dir);
+                    std::fs::create_dir_all(&iconset_dir).ok();
+
+                    let sizes: &[(u32, &str)] = &[
+                        (16, "icon_16x16.png"),
+                        (32, "icon_16x16@2x.png"),
+                        (32, "icon_32x32.png"),
+                        (64, "icon_32x32@2x.png"),
+                        (128, "icon_128x128.png"),
+                        (256, "icon_128x128@2x.png"),
+                        (256, "icon_256x256.png"),
+                        (512, "icon_256x256@2x.png"),
+                        (512, "icon_512x512.png"),
+                        (1024, "icon_512x512@2x.png"),
+                    ];
+
+                    for (size, name) in sizes {
+                        let dest = format!("{}/{}", iconset_dir, name);
+                        let status = Command::new("sips")
+                            .args([
+                                "-z",
+                                &size.to_string(),
+                                &size.to_string(),
+                                &source_png,
+                                "--out",
+                                &dest,
+                            ])
+                            .output();
+                        if let Ok(out) = status {
+                            if !out.status.success() {
+                                eprintln!(
+                                    "sips failed for {}: {}",
+                                    name,
+                                    String::from_utf8_lossy(&out.stderr)
+                                );
+                            }
+                        }
+                    }
+
+                    let iconutil_output = Command::new("iconutil")
+                        .args(["-c", "icns", "-o", &icns_path, &iconset_dir])
+                        .output();
+
+                    match iconutil_output {
+                        Ok(out) if out.status.success() => {
+                            println!("Generated voicebox.icns");
+                        }
+                        Ok(out) => {
+                            eprintln!("iconutil failed: {}", String::from_utf8_lossy(&out.stderr));
+                        }
+                        Err(e) => {
+                            eprintln!("Failed to run iconutil: {}", e);
+                        }
+                    }
+
+                    // Clean up iconset directory
+                    std::fs::remove_dir_all(&iconset_dir).ok();
+                }
+            }
+        } else {
+            println!(
+                "cargo:warning=Icon source not found at {}, skipping icon compilation",
+                icon_source
+            );
+        }
+    }
+
+    // Ensure all resource files exist so Tauri's bundler doesn't fail.
+    // On non-macOS these are always stubs. On macOS, actool may not produce
+    // Assets.car if the Xcode version doesn't support the .icon format.
+    {
+        let required = ["Assets.car", "voicebox.icns", "partial.plist"];
+        for name in required {
+            let path = format!("{}/{}", gen_dir, name);
+            if !std::path::Path::new(&path).exists() {
+                std::fs::write(&path, b"").ok();
+            }
+        }
+    }
+
+    tauri_build::build()
+}
--- a/tauri/src-tauri/capabilities/default.json
+++ b/tauri/src-tauri/capabilities/default.json
@@ -0,0 +1,28 @@
+{
+  "$schema": "https://schema.tauri.app/config/2",
+  "identifier": "default",
+  "description": "Default permissions for voicebox",
+  "platforms": ["linux", "macOS", "windows"],
+  "windows": ["main"],
+  "remote": {
+    "urls": ["http://localhost:*"]
+  },
+  "permissions": [
+    "core:default",
+    "core:window:default",
+    "core:window:allow-start-dragging",
+    "core:webview:default",
+    "core:webview:allow-internal-toggle-devtools",
+    "shell:allow-open",
+    "shell:allow-execute",
+    "shell:allow-spawn",
+    "updater:default",
+    "process:default",
+    "dialog:default",
+    "dialog:allow-save",
+    "dialog:allow-open",
+    "fs:default",
+    "fs:read-all",
+    "fs:write-all"
+  ]
+}
--- a/tauri/src-tauri/gen/schemas/acl-manifests.json
+++ b/tauri/src-tauri/gen/schemas/acl-manifests.json
--- a/tauri/src-tauri/gen/schemas/capabilities.json
+++ b/tauri/src-tauri/gen/schemas/capabilities.json
@@ -0,0 +1 @@
+{"default":{"identifier":"default","description":"Default permissions for voicebox","remote":{"urls":["http://localhost:*"]},"local":true,"windows":["main"],"permissions":["core:default","core:window:default","core:window:allow-start-dragging","core:webview:default","core:webview:allow-internal-toggle-devtools","shell:allow-open","shell:allow-execute","shell:allow-spawn","updater:default","process:default","dialog:default","dialog:allow-save","dialog:allow-open","fs:default","fs:read-all","fs:write-all"],"platforms":["linux","macOS","windows"]}}
--- a/tauri/src-tauri/gen/schemas/desktop-schema.json
+++ b/tauri/src-tauri/gen/schemas/desktop-schema.json
--- a/tauri/src-tauri/gen/schemas/macOS-schema.json
+++ b/tauri/src-tauri/gen/schemas/macOS-schema.json
--- a/tauri/src-tauri/gen/schemas/windows-schema.json
+++ b/tauri/src-tauri/gen/schemas/windows-schema.json
--- a/tauri/src-tauri/icons/128x128.png
+++ b/tauri/src-tauri/icons/128x128.png
--- a/tauri/src-tauri/icons/128x128@2x.png
+++ b/tauri/src-tauri/icons/128x128@2x.png
--- a/tauri/src-tauri/icons/32x32.png
+++ b/tauri/src-tauri/icons/32x32.png
--- a/tauri/src-tauri/icons/64x64.png
+++ b/tauri/src-tauri/icons/64x64.png
--- a/tauri/src-tauri/icons/Square107x107Logo.png
+++ b/tauri/src-tauri/icons/Square107x107Logo.png
--- a/tauri/src-tauri/icons/Square142x142Logo.png
+++ b/tauri/src-tauri/icons/Square142x142Logo.png
--- a/tauri/src-tauri/icons/Square150x150Logo.png
+++ b/tauri/src-tauri/icons/Square150x150Logo.png
--- a/tauri/src-tauri/icons/Square284x284Logo.png
+++ b/tauri/src-tauri/icons/Square284x284Logo.png
--- a/tauri/src-tauri/icons/Square30x30Logo.png
+++ b/tauri/src-tauri/icons/Square30x30Logo.png
--- a/tauri/src-tauri/icons/Square310x310Logo.png
+++ b/tauri/src-tauri/icons/Square310x310Logo.png
--- a/tauri/src-tauri/icons/Square44x44Logo.png
+++ b/tauri/src-tauri/icons/Square44x44Logo.png
--- a/tauri/src-tauri/icons/Square71x71Logo.png
+++ b/tauri/src-tauri/icons/Square71x71Logo.png
--- a/tauri/src-tauri/icons/Square89x89Logo.png
+++ b/tauri/src-tauri/icons/Square89x89Logo.png
--- a/tauri/src-tauri/icons/StoreLogo.png
+++ b/tauri/src-tauri/icons/StoreLogo.png
--- a/tauri/src-tauri/icons/android/mipmap-anydpi-v26/ic_launcher.xml
+++ b/tauri/src-tauri/icons/android/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+  <foreground android:drawable="@mipmap/ic_launcher_foreground"/>
+  <background android:drawable="@color/ic_launcher_background"/>
+</adaptive-icon>
--- a/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher.png
+++ b/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher.png
--- a/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher_foreground.png
+++ b/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher_foreground.png
--- a/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher_round.png
+++ b/tauri/src-tauri/icons/android/mipmap-hdpi/ic_launcher_round.png
--- a/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher.png
+++ b/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher.png
--- a/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher_foreground.png
+++ b/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher_foreground.png
--- a/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher_round.png
+++ b/tauri/src-tauri/icons/android/mipmap-mdpi/ic_launcher_round.png
--- a/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher.png
+++ b/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher.png
--- a/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher_foreground.png
+++ b/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher_foreground.png
--- a/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher_round.png
+++ b/tauri/src-tauri/icons/android/mipmap-xhdpi/ic_launcher_round.png
--- a/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher.png
--- a/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher_foreground.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher_foreground.png
--- a/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher_round.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxhdpi/ic_launcher_round.png
--- a/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher.png
--- a/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher_foreground.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher_foreground.png
--- a/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher_round.png
+++ b/tauri/src-tauri/icons/android/mipmap-xxxhdpi/ic_launcher_round.png
--- a/tauri/src-tauri/icons/android/values/ic_launcher_background.xml
+++ b/tauri/src-tauri/icons/android/values/ic_launcher_background.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+  <color name="ic_launcher_background">#fff</color>
+</resources>
--- a/tauri/src-tauri/icons/icon.icns
+++ b/tauri/src-tauri/icons/icon.icns
--- a/tauri/src-tauri/icons/icon.ico
+++ b/tauri/src-tauri/icons/icon.ico
--- a/tauri/src-tauri/icons/icon.png
+++ b/tauri/src-tauri/icons/icon.png
--- a/tauri/src-tauri/icons/ios/AppIcon-20x20@1x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-20x20@1x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-20x20@2x-1.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-20x20@2x-1.png
--- a/tauri/src-tauri/icons/ios/AppIcon-20x20@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-20x20@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-20x20@3x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-20x20@3x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-29x29@1x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-29x29@1x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-29x29@2x-1.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-29x29@2x-1.png
--- a/tauri/src-tauri/icons/ios/AppIcon-29x29@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-29x29@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-29x29@3x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-29x29@3x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-40x40@1x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-40x40@1x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-40x40@2x-1.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-40x40@2x-1.png
--- a/tauri/src-tauri/icons/ios/AppIcon-40x40@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-40x40@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-40x40@3x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-40x40@3x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-512@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-512@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-60x60@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-60x60@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-60x60@3x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-60x60@3x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-76x76@1x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-76x76@1x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-76x76@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-76x76@2x.png
--- a/tauri/src-tauri/icons/ios/AppIcon-83.5x83.5@2x.png
+++ b/tauri/src-tauri/icons/ios/AppIcon-83.5x83.5@2x.png
--- a/tauri/src-tauri/src/audio_capture/linux.rs
+++ b/tauri/src-tauri/src/audio_capture/linux.rs
@@ -0,0 +1,406 @@
+use crate::audio_capture::AudioCaptureState;
+use base64::{engine::general_purpose, Engine as _};
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use cpal::{SampleFormat, StreamConfig};
+use hound::{WavSpec, WavWriter};
+use std::io::Cursor;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::thread;
+
+/// Try to find a PulseAudio/PipeWire monitor source using `pactl`.
+/// Returns the source name (e.g. "alsa_output.pci-0000_0d_00.6.analog-stereo.monitor") if found.
+fn find_monitor_source_via_pactl() -> Option<String> {
+    let output = std::process::Command::new("pactl")
+        .args(["list", "short", "sources"])
+        .output()
+        .ok()?;
+
+    if !output.status.success() {
+        return None;
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+
+    // First, try to find the monitor of the default sink
+    let default_sink = std::process::Command::new("pactl")
+        .args(["get-default-sink"])
+        .output()
+        .ok()
+        .and_then(|o| {
+            if o.status.success() {
+                Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
+            } else {
+                None
+            }
+        });
+
+    // If we know the default sink, look for its .monitor specifically
+    if let Some(sink_name) = &default_sink {
+        let monitor_name = format!("{}.monitor", sink_name);
+        for line in stdout.lines() {
+            let parts: Vec<&str> = line.split('\t').collect();
+            if parts.len() >= 2 && parts[1] == monitor_name {
+                eprintln!(
+                    "Linux audio capture: Found default sink monitor via pactl: {}",
+                    monitor_name
+                );
+                return Some(monitor_name);
+            }
+        }
+    }
+
+    // Fallback: find any .monitor source
+    for line in stdout.lines() {
+        let parts: Vec<&str> = line.split('\t').collect();
+        if parts.len() >= 2 && parts[1].ends_with(".monitor") {
+            let name = parts[1].to_string();
+            eprintln!(
+                "Linux audio capture: Found monitor source via pactl: {}",
+                name
+            );
+            return Some(name);
+        }
+    }
+
+    None
+}
+
+/// Start capturing system audio on Linux using PulseAudio monitor sources.
+///
+/// On modern Linux with PulseAudio or PipeWire, we first try to detect the
+/// monitor source via `pactl` and set the `PULSE_SOURCE` environment variable.
+/// This tells PulseAudio's ALSA plugin to use the monitor as the default input
+/// source for this process. If `pactl` is unavailable, we fall back to searching
+/// cpal device names for "monitor".
+pub async fn start_capture(
+    state: &AudioCaptureState,
+    max_duration_secs: u32,
+) -> Result<(), String> {
+    // Reset previous samples
+    state.reset();
+
+    let samples = state.samples.clone();
+    let sample_rate_arc = state.sample_rate.clone();
+    let channels_arc = state.channels.clone();
+    let stop_tx = state.stop_tx.clone();
+    let error_arc = state.error.clone();
+
+    // Use AtomicBool for stop signal (works across threads)
+    let stop_flag = Arc::new(AtomicBool::new(false));
+    let stop_flag_clone = stop_flag.clone();
+
+    // Create tokio channel and spawn a task to bridge it to the AtomicBool
+    let (tx, mut rx) = tokio::sync::mpsc::channel::<()>(1);
+    *stop_tx.lock().unwrap() = Some(tx);
+
+    tokio::spawn(async move {
+        rx.recv().await;
+        stop_flag_clone.store(true, Ordering::Relaxed);
+    });
+
+    // Spawn capture on a dedicated thread
+    thread::spawn(move || {
+        // Try to set PULSE_SOURCE to a monitor before initializing cpal.
+        // This tells PulseAudio/PipeWire's ALSA plugin to use the monitor
+        // as the default input source for this process.
+        let monitor_source = find_monitor_source_via_pactl();
+        if let Some(ref source_name) = monitor_source {
+            eprintln!(
+                "Linux audio capture: Setting PULSE_SOURCE={}",
+                source_name
+            );
+            std::env::set_var("PULSE_SOURCE", source_name);
+        }
+
+        let host = cpal::default_host();
+
+        // Select the capture device.
+        // If PULSE_SOURCE was set, the default input device IS the monitor.
+        // Otherwise, fall back to searching device names for "monitor".
+        let device = if monitor_source.is_some() {
+            // PULSE_SOURCE was set — default input IS the monitor now
+            match host.default_input_device() {
+                Some(d) => {
+                    let name = d.name().unwrap_or_default();
+                    eprintln!(
+                        "Linux audio capture: Using PULSE_SOURCE monitor device: {}",
+                        name
+                    );
+                    d
+                }
+                None => {
+                    let error_msg = "No audio input device available".to_string();
+                    eprintln!("{}", error_msg);
+                    *error_arc.lock().unwrap() = Some(error_msg);
+                    return;
+                }
+            }
+        } else {
+            // pactl not available — try to find monitor by name (original approach)
+            let mut monitor_device = None;
+            if let Ok(devices) = host.input_devices() {
+                for d in devices {
+                    if let Ok(name) = d.name() {
+                        let name_lower = name.to_lowercase();
+                        if name_lower.contains("monitor") {
+                            eprintln!(
+                                "Linux audio capture: Found monitor device by name: {}",
+                                name
+                            );
+                            monitor_device = Some(d);
+                            break;
+                        }
+                    }
+                }
+            }
+            match monitor_device {
+                Some(d) => d,
+                None => {
+                    eprintln!("Linux audio capture: No monitor device found, falling back to default input");
+                    match host.default_input_device() {
+                        Some(d) => d,
+                        None => {
+                            let error_msg = "No audio input device available".to_string();
+                            eprintln!("{}", error_msg);
+                            *error_arc.lock().unwrap() = Some(error_msg);
+                            return;
+                        }
+                    }
+                }
+            }
+        };
+
+        let device_name = device.name().unwrap_or_else(|_| "unknown".to_string());
+        eprintln!("Linux audio capture: Using device: {}", device_name);
+
+        // Get supported config
+        let config = match device.default_input_config() {
+            Ok(c) => c,
+            Err(e) => {
+                let error_msg = format!("Failed to get default input config: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        let sample_rate = config.sample_rate().0;
+        let channels = config.channels();
+        let sample_format = config.sample_format();
+
+        eprintln!(
+            "Linux audio capture: Config - {}Hz, {} channels, format: {:?}",
+            sample_rate, channels, sample_format
+        );
+
+        *sample_rate_arc.lock().unwrap() = sample_rate;
+        *channels_arc.lock().unwrap() = channels;
+
+        let stream_config = StreamConfig {
+            channels,
+            sample_rate: cpal::SampleRate(sample_rate),
+            buffer_size: cpal::BufferSize::Default,
+        };
+
+        let samples_clone = samples.clone();
+        let error_arc_clone = error_arc.clone();
+        let stop_flag_for_stream = stop_flag.clone();
+
+        let err_fn = {
+            let error_arc = error_arc.clone();
+            move |err: cpal::StreamError| {
+                let error_msg = format!("Stream error: {}", err);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+            }
+        };
+
+        let stream = match sample_format {
+            SampleFormat::F32 => {
+                let samples = samples_clone.clone();
+                let stop = stop_flag_for_stream.clone();
+                device.build_input_stream(
+                    &stream_config,
+                    move |data: &[f32], _: &cpal::InputCallbackInfo| {
+                        if stop.load(Ordering::Relaxed) {
+                            return;
+                        }
+                        let mut guard = samples.lock().unwrap();
+                        guard.extend_from_slice(data);
+                    },
+                    err_fn,
+                    None,
+                )
+            }
+            SampleFormat::I16 => {
+                let samples = samples_clone.clone();
+                let stop = stop_flag_for_stream.clone();
+                device.build_input_stream(
+                    &stream_config,
+                    move |data: &[i16], _: &cpal::InputCallbackInfo| {
+                        if stop.load(Ordering::Relaxed) {
+                            return;
+                        }
+                        let mut guard = samples.lock().unwrap();
+                        for &s in data {
+                            guard.push(s as f32 / 32768.0);
+                        }
+                    },
+                    err_fn,
+                    None,
+                )
+            }
+            SampleFormat::U16 => {
+                let samples = samples_clone.clone();
+                let stop = stop_flag_for_stream.clone();
+                device.build_input_stream(
+                    &stream_config,
+                    move |data: &[u16], _: &cpal::InputCallbackInfo| {
+                        if stop.load(Ordering::Relaxed) {
+                            return;
+                        }
+                        let mut guard = samples.lock().unwrap();
+                        for &s in data {
+                            guard.push((s as f32 / 32768.0) - 1.0);
+                        }
+                    },
+                    err_fn,
+                    None,
+                )
+            }
+            _ => {
+                let error_msg = format!("Unsupported sample format: {:?}", sample_format);
+                eprintln!("{}", error_msg);
+                *error_arc_clone.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        let stream = match stream {
+            Ok(s) => s,
+            Err(e) => {
+                let error_msg = format!("Failed to build input stream: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc_clone.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        if let Err(e) = stream.play() {
+            let error_msg = format!("Failed to start stream: {}", e);
+            eprintln!("{}", error_msg);
+            *error_arc_clone.lock().unwrap() = Some(error_msg);
+            return;
+        }
+
+        eprintln!("Linux audio capture: Stream started successfully");
+
+        // Keep thread alive until stop signal
+        loop {
+            if stop_flag.load(Ordering::Relaxed) {
+                break;
+            }
+            std::thread::sleep(std::time::Duration::from_millis(100));
+        }
+
+        // Stream will be dropped here, stopping capture
+        eprintln!("Linux audio capture: Stream stopped");
+    });
+
+    // Spawn timeout task
+    let stop_tx_clone = state.stop_tx.clone();
+    tokio::spawn(async move {
+        tokio::time::sleep(tokio::time::Duration::from_secs(max_duration_secs as u64)).await;
+        let tx = stop_tx_clone.lock().unwrap().take();
+        if let Some(tx) = tx {
+            let _ = tx.send(()).await;
+        }
+    });
+
+    Ok(())
+}
+
+pub async fn stop_capture(state: &AudioCaptureState) -> Result<String, String> {
+    // Signal stop
+    if let Some(tx) = state.stop_tx.lock().unwrap().take() {
+        let _ = tx.send(());
+    }
+
+    // Wait a bit for capture to stop
+    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
+
+    // Check if there was an error during capture
+    if let Some(error) = state.error.lock().unwrap().as_ref() {
+        return Err(error.clone());
+    }
+
+    // Get samples
+    let samples = state.samples.lock().unwrap().clone();
+    let sample_rate = *state.sample_rate.lock().unwrap();
+    let channels = *state.channels.lock().unwrap();
+
+    if samples.is_empty() {
+        return Err(
+            "No audio samples captured. Make sure audio is playing on your system during recording."
+                .to_string(),
+        );
+    }
+
+    // Convert to WAV
+    let wav_data = samples_to_wav(&samples, sample_rate, channels)?;
+
+    // Encode to base64
+    let base64_data = general_purpose::STANDARD.encode(&wav_data);
+
+    Ok(base64_data)
+}
+
+pub fn is_supported() -> bool {
+    // Check via pactl first (most reliable on modern Linux)
+    if find_monitor_source_via_pactl().is_some() {
+        return true;
+    }
+    // Fallback: check cpal devices
+    let host = cpal::default_host();
+    if let Ok(devices) = host.input_devices() {
+        for d in devices {
+            if let Ok(name) = d.name() {
+                if name.to_lowercase().contains("monitor") {
+                    return true;
+                }
+            }
+        }
+    }
+    host.default_input_device().is_some()
+}
+
+fn samples_to_wav(samples: &[f32], sample_rate: u32, channels: u16) -> Result<Vec<u8>, String> {
+    let mut buffer = Vec::new();
+    let cursor = Cursor::new(&mut buffer);
+
+    let spec = WavSpec {
+        channels,
+        sample_rate,
+        bits_per_sample: 16,
+        sample_format: hound::SampleFormat::Int,
+    };
+
+    let mut writer =
+        WavWriter::new(cursor, spec).map_err(|e| format!("Failed to create WAV writer: {}", e))?;
+
+    // Convert f32 samples to i16
+    for sample in samples {
+        let clamped = sample.clamp(-1.0, 1.0);
+        let i16_sample = (clamped * 32767.0) as i16;
+        writer
+            .write_sample(i16_sample)
+            .map_err(|e| format!("Failed to write sample: {}", e))?;
+    }
+
+    writer
+        .finalize()
+        .map_err(|e| format!("Failed to finalize WAV: {}", e))?;
+
+    Ok(buffer)
+}
--- a/tauri/src-tauri/src/audio_capture/macos.rs
+++ b/tauri/src-tauri/src/audio_capture/macos.rs
@@ -0,0 +1,265 @@
+use crate::audio_capture::AudioCaptureState;
+use base64::{engine::general_purpose, Engine as _};
+use hound::{WavSpec, WavWriter};
+use screencapturekit::{
+    cm::CMSampleBuffer,
+    shareable_content::SCShareableContent,
+    stream::{
+        configuration::SCStreamConfiguration,
+        content_filter::SCContentFilter,
+        output_trait::SCStreamOutputTrait,
+        output_type::SCStreamOutputType,
+        sc_stream::SCStream,
+    },
+};
+use std::io::Cursor;
+use std::process::Command;
+use std::sync::{Arc, Mutex};
+use tokio::sync::mpsc;
+
+pub async fn start_capture(
+    state: &AudioCaptureState,
+    max_duration_secs: u32,
+) -> Result<(), String> {
+    if !is_supported() {
+        return Err("System audio capture requires macOS 12.3 or newer.".to_string());
+    }
+
+    // Reset previous samples
+    state.reset();
+
+    // Get shareable content
+    let content = SCShareableContent::get()
+        .map_err(|e| format!("Failed to get shareable content: {}", e))?;
+
+    // Get first display
+    let displays = content.displays();
+    if displays.is_empty() {
+        return Err("No displays available".to_string());
+    }
+    let display = &displays[0];
+
+    // Create content filter for desktop audio
+    let filter = SCContentFilter::create()
+        .with_display(display)
+        .with_excluding_windows(&[])
+        .build();
+
+    // Create stream configuration - audio only
+    let mut config = SCStreamConfiguration::default();
+    config.set_captures_audio(true);
+    config.set_excludes_current_process_audio(false);
+    config.set_sample_rate(48000); // Use i32 directly
+    config.set_channel_count(2); // Use i32 directly
+
+    // Create stream using builder
+    let (tx, mut rx) = mpsc::channel::<()>(1);
+    *state.stop_tx.lock().unwrap() = Some(tx);
+
+    let samples = state.samples.clone();
+    let sample_rate = state.sample_rate.clone();
+    let channels = state.channels.clone();
+
+    // Set sample rate and channels
+    *sample_rate.lock().unwrap() = 48000;
+    *channels.lock().unwrap() = 2;
+
+    // Create output handler struct
+    struct AudioHandler {
+        samples: Arc<Mutex<Vec<f32>>>,
+    }
+
+    impl SCStreamOutputTrait for AudioHandler {
+        fn did_output_sample_buffer(
+            &self,
+            sample: CMSampleBuffer,
+            _type: SCStreamOutputType,
+        ) {
+            if _type == SCStreamOutputType::Audio {
+                if let Ok(audio_samples) = extract_audio_samples(sample) {
+                    let mut samples_guard = self.samples.lock().unwrap();
+                    samples_guard.extend_from_slice(&audio_samples);
+                }
+            }
+        }
+    }
+
+    let handler = AudioHandler {
+        samples: samples.clone(),
+    };
+
+    // Create stream
+    let mut stream = SCStream::new(&filter, &config);
+    
+    // Add output handler for audio (order: handler, then output_type)
+    stream.add_output_handler(handler, SCStreamOutputType::Audio);
+
+    // Store stream reference
+    *state.stream.lock().unwrap() = Some(stream.clone());
+
+    stream.start_capture().map_err(|e| format!("Failed to start capture: {}", e))?;
+
+    // Spawn task to stop after max duration
+    let stream_clone = stream.clone();
+    tokio::spawn(async move {
+        tokio::select! {
+            _ = tokio::time::sleep(tokio::time::Duration::from_secs(max_duration_secs as u64)) => {
+                // Timeout reached
+            }
+            _ = rx.recv() => {
+                // Manual stop
+            }
+        }
+        let _ = stream_clone.stop_capture();
+    });
+
+    Ok(())
+}
+
+pub async fn stop_capture(state: &AudioCaptureState) -> Result<String, String> {
+    // Signal stop
+    if let Some(tx) = state.stop_tx.lock().unwrap().take() {
+        let _ = tx.send(());
+    }
+
+    // Stop stream if still active
+    if let Some(stream) = state.stream.lock().unwrap().take() {
+        let _ = stream.stop_capture();
+    }
+
+    // Wait a bit for capture to stop
+    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
+
+    // Get samples
+    let samples = state.samples.lock().unwrap().clone();
+    let sample_rate = *state.sample_rate.lock().unwrap();
+    let channels = *state.channels.lock().unwrap();
+
+    if samples.is_empty() {
+        return Err("No audio samples captured".to_string());
+    }
+
+    // Convert to WAV
+    let wav_data = samples_to_wav(&samples, sample_rate, channels)?;
+    
+    // Encode to base64
+    let base64_data = general_purpose::STANDARD.encode(&wav_data);
+    
+    Ok(base64_data)
+}
+
+pub fn is_supported() -> bool {
+    macos_version_at_least(12, 3)
+}
+
+fn macos_version_at_least(required_major: u64, required_minor: u64) -> bool {
+    let output = match Command::new("sw_vers").arg("-productVersion").output() {
+        Ok(output) if output.status.success() => output,
+        _ => return false,
+    };
+
+    let version = String::from_utf8_lossy(&output.stdout);
+    let mut parts = version.trim().split('.');
+
+    let major = parts.next().and_then(|part| part.parse::<u64>().ok()).unwrap_or(0);
+    let minor = parts.next().and_then(|part| part.parse::<u64>().ok()).unwrap_or(0);
+
+    major > required_major || (major == required_major && minor >= required_minor)
+}
+
+fn extract_audio_samples(sample_buffer: CMSampleBuffer) -> Result<Vec<f32>, String> {
+    // Use the crate's built-in method to get audio buffer list
+    let audio_buffer_list = sample_buffer
+        .audio_buffer_list()
+        .ok_or_else(|| "Failed to get audio buffer list".to_string())?;
+
+    let buffers: Vec<_> = audio_buffer_list.iter().collect();
+    let num_buffers = buffers.len();
+    
+    if num_buffers == 0 {
+        return Ok(Vec::new());
+    }
+
+    // ScreenCaptureKit on macOS provides audio in Float32 format
+    // The audio can be either:
+    // - Interleaved (1 buffer with L,R,L,R,... samples)
+    // - Planar (2 buffers, one for L channel, one for R channel)
+    
+    if num_buffers == 1 {
+        // Interleaved stereo or mono in a single buffer
+        let buffer = &buffers[0];
+        let data_bytes = buffer.data();
+        let num_samples = data_bytes.len() / std::mem::size_of::<f32>();
+        
+        if num_samples > 0 {
+            unsafe {
+                let data_ptr = data_bytes.as_ptr() as *const f32;
+                let data = std::slice::from_raw_parts(data_ptr, num_samples);
+                return Ok(data.to_vec());
+            }
+        }
+    } else {
+        // Planar format - separate buffer for each channel
+        // We need to interleave them: L0, R0, L1, R1, ...
+        let mut channel_data: Vec<Vec<f32>> = Vec::new();
+        let mut max_samples = 0;
+        
+        for buffer in &buffers {
+            let data_bytes = buffer.data();
+            let num_samples = data_bytes.len() / std::mem::size_of::<f32>();
+            
+            if num_samples > 0 {
+                unsafe {
+                    let data_ptr = data_bytes.as_ptr() as *const f32;
+                    let data = std::slice::from_raw_parts(data_ptr, num_samples);
+                    channel_data.push(data.to_vec());
+                    max_samples = max_samples.max(num_samples);
+                }
+            }
+        }
+        
+        // Interleave the channels
+        let mut interleaved = Vec::with_capacity(max_samples * num_buffers);
+        for i in 0..max_samples {
+            for channel in &channel_data {
+                if i < channel.len() {
+                    interleaved.push(channel[i]);
+                } else {
+                    interleaved.push(0.0); // Pad with silence if needed
+                }
+            }
+        }
+        
+        return Ok(interleaved);
+    }
+
+    Ok(Vec::new())
+}
+
+fn samples_to_wav(samples: &[f32], sample_rate: u32, channels: u16) -> Result<Vec<u8>, String> {
+    let mut buffer = Vec::new();
+    let cursor = Cursor::new(&mut buffer);
+    
+    let spec = WavSpec {
+        channels,
+        sample_rate,
+        bits_per_sample: 16,
+        sample_format: hound::SampleFormat::Int,
+    };
+
+    let mut writer = WavWriter::new(cursor, spec)
+        .map_err(|e| format!("Failed to create WAV writer: {}", e))?;
+
+    // Convert f32 samples to i16
+    for sample in samples {
+        let clamped = sample.clamp(-1.0, 1.0);
+        let i16_sample = (clamped * 32767.0) as i16;
+        writer.write_sample(i16_sample)
+            .map_err(|e| format!("Failed to write sample: {}", e))?;
+    }
+
+    writer.finalize()
+        .map_err(|e| format!("Failed to finalize WAV: {}", e))?;
+
+    Ok(buffer)
+}
--- a/tauri/src-tauri/src/audio_capture/mod.rs
+++ b/tauri/src-tauri/src/audio_capture/mod.rs
@@ -0,0 +1,47 @@
+#[cfg(target_os = "macos")]
+mod macos;
+#[cfg(target_os = "windows")]
+mod windows;
+#[cfg(target_os = "linux")]
+mod linux;
+
+#[cfg(target_os = "macos")]
+pub use macos::*;
+#[cfg(target_os = "windows")]
+pub use windows::*;
+#[cfg(target_os = "linux")]
+pub use linux::*;
+
+use std::sync::{Arc, Mutex};
+
+#[cfg(target_os = "macos")]
+use screencapturekit::stream::sc_stream::SCStream;
+
+pub struct AudioCaptureState {
+    pub samples: Arc<Mutex<Vec<f32>>>,
+    pub sample_rate: Arc<Mutex<u32>>,
+    pub channels: Arc<Mutex<u16>>,
+    pub stop_tx: Arc<Mutex<Option<tokio::sync::mpsc::Sender<()>>>>,
+    pub error: Arc<Mutex<Option<String>>>,
+    #[cfg(target_os = "macos")]
+    pub stream: Arc<Mutex<Option<SCStream>>>,
+}
+
+impl AudioCaptureState {
+    pub fn new() -> Self {
+        Self {
+            samples: Arc::new(Mutex::new(Vec::new())),
+            sample_rate: Arc::new(Mutex::new(44100)),
+            channels: Arc::new(Mutex::new(2)),
+            stop_tx: Arc::new(Mutex::new(None)),
+            error: Arc::new(Mutex::new(None)),
+            #[cfg(target_os = "macos")]
+            stream: Arc::new(Mutex::new(None)),
+        }
+    }
+
+    pub fn reset(&self) {
+        *self.samples.lock().unwrap() = Vec::new();
+        *self.error.lock().unwrap() = None;
+    }
+}
--- a/tauri/src-tauri/src/audio_capture/windows.rs
+++ b/tauri/src-tauri/src/audio_capture/windows.rs
@@ -0,0 +1,288 @@
+use crate::audio_capture::AudioCaptureState;
+use base64::{engine::general_purpose, Engine as _};
+use hound::{WavSpec, WavWriter};
+use std::io::Cursor;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::thread;
+use wasapi::*;
+use windows::Win32::System::Com::{CoInitializeEx, CoUninitialize, COINIT_MULTITHREADED};
+
+pub async fn start_capture(
+    state: &AudioCaptureState,
+    max_duration_secs: u32,
+) -> Result<(), String> {
+    // Reset previous samples
+    state.reset();
+
+    let samples = state.samples.clone();
+    let sample_rate_arc = state.sample_rate.clone();
+    let channels_arc = state.channels.clone();
+    let stop_tx = state.stop_tx.clone();
+    let error_arc = state.error.clone();
+
+    // Use AtomicBool for stop signal (works with non-Send types)
+    let stop_flag = Arc::new(AtomicBool::new(false));
+    let stop_flag_clone = stop_flag.clone();
+
+    // Create tokio channel and spawn a task to bridge it to the AtomicBool
+    let (tx, mut rx) = tokio::sync::mpsc::channel::<()>(1);
+    *stop_tx.lock().unwrap() = Some(tx);
+
+    tokio::spawn(async move {
+        rx.recv().await;
+        stop_flag_clone.store(true, Ordering::Relaxed);
+    });
+
+    // Spawn capture task on a dedicated thread (WASAPI COM objects are not Send)
+    // All WASAPI objects must be created and used on the same thread
+    thread::spawn(move || {
+        // Initialize COM for this thread
+        unsafe {
+            let hr = CoInitializeEx(None, COINIT_MULTITHREADED);
+            if hr.is_err() {
+                eprintln!("Failed to initialize COM: {:?}", hr);
+                return;
+            }
+        }
+
+        // Ensure COM is uninitialized when thread exits
+        let _com_guard = scopeguard::guard((), |_| unsafe {
+            CoUninitialize();
+        });
+
+        // Initialize WASAPI on this thread
+        let device = match DeviceEnumerator::new()
+            .and_then(|enumerator| enumerator.get_default_device(&Direction::Render))
+        {
+            Ok(d) => d,
+            Err(e) => {
+                let error_msg = format!("Failed to get audio device: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        let mut audio_client = match device.get_iaudioclient() {
+            Ok(client) => client,
+            Err(e) => {
+                let error_msg = format!("Failed to get audio client: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        let mix_format = match audio_client.get_mixformat() {
+            Ok(format) => format,
+            Err(e) => {
+                let error_msg = format!("Failed to get mix format: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        // Set sample rate and channels
+        let channels = mix_format.get_nchannels() as usize;
+        let bytes_per_sample = (mix_format.get_bitspersample() / 8) as usize;
+        *sample_rate_arc.lock().unwrap() = mix_format.get_samplespersec();
+        *channels_arc.lock().unwrap() = mix_format.get_nchannels();
+
+        // Get device period
+        let (_def_period, min_period) = match audio_client.get_device_period() {
+            Ok(periods) => periods,
+            Err(e) => {
+                eprintln!("Failed to get device period: {}", e);
+                return;
+            }
+        };
+
+        // Initialize audio client for loopback with StreamMode
+        // For loopback mode: get Render device, initialize with Capture direction
+        // This triggers AUDCLNT_STREAMFLAGS_LOOPBACK in the wasapi crate
+        let stream_mode = StreamMode::EventsShared {
+            autoconvert: true,  // Enable automatic format conversion
+            buffer_duration_hns: min_period, // Use minimum period
+        };
+
+        if let Err(e) = audio_client.initialize_client(&mix_format, &Direction::Capture, &stream_mode) {
+            let error_msg = format!("Failed to initialize audio client: {}", e);
+            eprintln!("{}", error_msg);
+            *error_arc.lock().unwrap() = Some(error_msg);
+            return;
+        }
+
+        // Set up event handle for EventsShared mode
+        let h_event = match audio_client.set_get_eventhandle() {
+            Ok(event) => event,
+            Err(e) => {
+                eprintln!("Failed to set event handle: {}", e);
+                return;
+            }
+        };
+
+        let capture_client = match audio_client.get_audiocaptureclient() {
+            Ok(client) => client,
+            Err(e) => {
+                let error_msg = format!("Failed to get capture client: {}", e);
+                eprintln!("{}", error_msg);
+                *error_arc.lock().unwrap() = Some(error_msg);
+                return;
+            }
+        };
+
+        if let Err(e) = audio_client.start_stream() {
+            let error_msg = format!("Failed to start stream: {}", e);
+            eprintln!("{}", error_msg);
+            *error_arc.lock().unwrap() = Some(error_msg);
+            return;
+        }
+
+        loop {
+            // Check if stop signal was received
+            if stop_flag.load(Ordering::Relaxed) {
+                break;
+            }
+
+            // Try to get available data
+            match capture_client.get_next_packet_size() {
+                Ok(Some(frames_available)) => {
+                    if frames_available > 0 {
+                        // Calculate buffer size needed (frames * channels * bytes_per_sample)
+                        let buffer_size = frames_available as usize * channels * bytes_per_sample;
+
+                        let mut buffer = vec![0u8; buffer_size];
+                        match capture_client.read_from_device(&mut buffer) {
+                            Ok((frames_read, _buffer_info)) => {
+                                if frames_read > 0 {
+                                    // Convert bytes to f32 samples
+                                    let samples_read = (frames_read as usize * channels) as usize;
+                                    let mut samples_guard = samples.lock().unwrap();
+
+                                    // Assuming 32-bit float format
+                                    if bytes_per_sample == 4 {
+                                        for i in 0..samples_read {
+                                            let byte_offset = i * 4;
+                                            if byte_offset + 4 <= buffer.len() {
+                                                let sample = f32::from_le_bytes([
+                                                    buffer[byte_offset],
+                                                    buffer[byte_offset + 1],
+                                                    buffer[byte_offset + 2],
+                                                    buffer[byte_offset + 3],
+                                                ]);
+                                                samples_guard.push(sample);
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                eprintln!("Error reading from device: {}", e);
+                            }
+                        }
+                    }
+                }
+                Ok(None) => {
+                    // Exclusive mode - handle differently if needed
+                }
+                Err(e) => {
+                    eprintln!("Error getting next packet size: {}", e);
+                }
+            }
+
+            // Wait for event signal (with timeout to allow checking stop flag)
+            if h_event.wait_for_event(100).is_err() {
+                // Timeout is expected - just continue to check stop flag
+            }
+        }
+
+        // Stop the stream when done
+        audio_client.stop_stream().ok();
+    });
+
+    // Spawn timeout task
+    let stop_tx_clone = state.stop_tx.clone();
+    tokio::spawn(async move {
+        tokio::time::sleep(tokio::time::Duration::from_secs(max_duration_secs as u64)).await;
+        // Take the sender out of the mutex before awaiting
+        let tx = stop_tx_clone.lock().unwrap().take();
+        if let Some(tx) = tx {
+            let _ = tx.send(()).await;
+        }
+    });
+
+    Ok(())
+}
+
+pub async fn stop_capture(state: &AudioCaptureState) -> Result<String, String> {
+    // Signal stop
+    if let Some(tx) = state.stop_tx.lock().unwrap().take() {
+        let _ = tx.send(());
+    }
+
+    // Wait a bit for capture to stop
+    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
+
+    // Check if there was an error during capture
+    if let Some(error) = state.error.lock().unwrap().as_ref() {
+        return Err(error.clone());
+    }
+
+    // Get samples
+    let samples = state.samples.lock().unwrap().clone();
+    let sample_rate = *state.sample_rate.lock().unwrap();
+    let channels = *state.channels.lock().unwrap();
+
+    if samples.is_empty() {
+        return Err("No audio samples captured. Make sure audio is playing on your system during recording.".to_string());
+    }
+
+    // Convert to WAV
+    let wav_data = samples_to_wav(&samples, sample_rate, channels)?;
+    
+    // Encode to base64
+    let base64_data = general_purpose::STANDARD.encode(&wav_data);
+    
+    Ok(base64_data)
+}
+
+pub fn is_supported() -> bool {
+    #[cfg(target_os = "windows")]
+    {
+        true
+    }
+    #[cfg(not(target_os = "windows"))]
+    {
+        false
+    }
+}
+
+fn samples_to_wav(samples: &[f32], sample_rate: u32, channels: u16) -> Result<Vec<u8>, String> {
+    let mut buffer = Vec::new();
+    let cursor = Cursor::new(&mut buffer);
+    
+    let spec = WavSpec {
+        channels,
+        sample_rate,
+        bits_per_sample: 16,
+        sample_format: hound::SampleFormat::Int,
+    };
+
+    let mut writer = WavWriter::new(cursor, spec)
+        .map_err(|e| format!("Failed to create WAV writer: {}", e))?;
+
+    // Convert f32 samples to i16
+    for sample in samples {
+        let clamped = sample.clamp(-1.0, 1.0);
+        let i16_sample = (clamped * 32767.0) as i16;
+        writer.write_sample(i16_sample)
+            .map_err(|e| format!("Failed to write sample: {}", e))?;
+    }
+
+    writer.finalize()
+        .map_err(|e| format!("Failed to finalize WAV: {}", e))?;
+
+    Ok(buffer)
+}
--- a/tauri/src-tauri/src/audio_output.rs
+++ b/tauri/src-tauri/src/audio_output.rs
@@ -0,0 +1,481 @@
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use cpal::{Device, Host, SampleFormat, StreamConfig};
+use std::sync::{Arc, Mutex};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct AudioOutputDevice {
+    pub id: String,
+    pub name: String,
+    pub is_default: bool,
+}
+
+pub struct AudioOutputState {
+    host: Host,
+    stop_flag: Arc<AtomicBool>,
+}
+
+impl AudioOutputState {
+    pub fn new() -> Self {
+        Self {
+            host: cpal::default_host(),
+            stop_flag: Arc::new(AtomicBool::new(false)),
+        }
+    }
+
+    pub fn stop_all_playback(&self) -> Result<(), String> {
+        eprintln!("stop_all_playback: Setting stop flag");
+        self.stop_flag.store(true, Ordering::Relaxed);
+        eprintln!("stop_all_playback: Stop flag set - active streams will output silence");
+        Ok(())
+    }
+
+    pub fn list_output_devices(&self) -> Result<Vec<AudioOutputDevice>, String> {
+        let devices = self
+            .host
+            .output_devices()
+            .map_err(|e| format!("Failed to enumerate output devices: {}", e))?;
+
+        let default_device = self.host.default_output_device();
+
+        let mut result = Vec::new();
+        for device in devices {
+            let name = device
+                .name()
+                .map_err(|e| format!("Failed to get device name: {}", e))?;
+
+            // Generate a stable ID from the device name (cpal doesn't provide stable IDs)
+            let id = format!("device_{}", name.replace(' ', "_").to_lowercase());
+
+            let is_default = default_device
+                .as_ref()
+                .map(|d| d.name().unwrap_or_default() == name)
+                .unwrap_or(false);
+
+            result.push(AudioOutputDevice {
+                id,
+                name,
+                is_default,
+            });
+        }
+
+        Ok(result)
+    }
+
+    pub async fn play_audio_to_devices(
+        &self,
+        audio_data: Vec<u8>,
+        device_ids: Vec<String>,
+    ) -> Result<(), String> {
+        eprintln!("play_audio_to_devices called with {} bytes, {} device IDs", audio_data.len(), device_ids.len());
+        eprintln!("Requested device IDs: {:?}", device_ids);
+        
+        // Decode audio file (assuming WAV format)
+        eprintln!("Decoding audio data...");
+        let (samples, sample_rate, channels) = self.decode_wav(&audio_data)?;
+        eprintln!("Audio decoded: {} samples, {}Hz, {} channels", samples.len(), sample_rate, channels);
+
+        // Find devices by ID
+        eprintln!("Enumerating output devices...");
+        let devices: Vec<Device> = self
+            .host
+            .output_devices()
+            .map_err(|e| format!("Failed to enumerate devices: {}", e))?
+            .filter_map(|device| {
+                let name = device.name().ok()?;
+                let id = format!("device_{}", name.replace(' ', "_").to_lowercase());
+                eprintln!("Found device: {} (id: {})", name, id);
+                if device_ids.contains(&id) {
+                    eprintln!("  -> Matched! Will play to this device");
+                    Some(device)
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        if devices.is_empty() {
+            eprintln!("ERROR: No matching devices found");
+            return Err("No matching devices found".to_string());
+        }
+
+        eprintln!("Playing to {} device(s)", devices.len());
+        
+        // Stop any existing playback first
+        self.stop_all_playback().ok();
+        
+        // Reset stop flag for new playback
+        self.stop_flag.store(false, Ordering::Relaxed);
+        
+        // Play to each device
+        for (i, device) in devices.iter().enumerate() {
+            let device_name = device.name().unwrap_or_else(|_| "unknown".to_string());
+            eprintln!("Playing to device {}/{}: {}", i + 1, devices.len(), device_name);
+            self.play_to_device(device, samples.clone(), sample_rate, channels, self.stop_flag.clone())
+                .map_err(|e| format!("Failed to play to device {}: {}", device_name, e))?;
+            eprintln!("Successfully started playback on device: {}", device_name);
+        }
+
+        eprintln!("play_audio_to_devices completed successfully");
+        Ok(())
+    }
+
+    fn decode_wav(&self, data: &[u8]) -> Result<(Vec<f32>, u32, u16), String> {
+        use symphonia::core::formats::FormatOptions;
+        use symphonia::core::io::MediaSourceStream;
+        use symphonia::core::meta::MetadataOptions;
+
+        eprintln!("decode_wav: Creating MediaSourceStream from {} bytes", data.len());
+        let mss = MediaSourceStream::new(
+            Box::new(std::io::Cursor::new(data.to_vec())),
+            Default::default(),
+        );
+
+        eprintln!("decode_wav: Probing audio format...");
+        let mut format = symphonia::default::get_probe()
+            .format(
+                &Default::default(),
+                mss,
+                &FormatOptions::default(),
+                &MetadataOptions::default(),
+            )
+            .map_err(|e| {
+                eprintln!("decode_wav: Failed to probe audio: {}", e);
+                format!("Failed to probe audio: {}", e)
+            })?
+            .format;
+        
+        eprintln!("decode_wav: Audio format probed successfully");
+
+        eprintln!("decode_wav: Finding audio track...");
+        let track = format
+            .tracks()
+            .iter()
+            .find(|t| t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL)
+            .ok_or_else(|| {
+                eprintln!("decode_wav: No audio track found");
+                "No audio track found".to_string()
+            })?;
+
+        let sample_rate = track
+            .codec_params
+            .sample_rate
+            .ok_or_else(|| {
+                eprintln!("decode_wav: No sample rate found in track");
+                "No sample rate found".to_string()
+            })?;
+
+        let channels = track
+            .codec_params
+            .channels
+            .ok_or_else(|| {
+                eprintln!("decode_wav: No channels found in track");
+                "No channels found".to_string()
+            })?
+            .count() as u16;
+
+        eprintln!("decode_wav: Track info - sample_rate: {}, channels: {}", sample_rate, channels);
+
+        eprintln!("decode_wav: Creating decoder...");
+        let mut decoder = symphonia::default::get_codecs()
+            .make(&track.codec_params, &Default::default())
+            .map_err(|e| {
+                eprintln!("decode_wav: Failed to create decoder: {}", e);
+                format!("Failed to create decoder: {}", e)
+            })?;
+        
+        eprintln!("decode_wav: Decoder created successfully");
+
+        let mut samples = Vec::new();
+        let mut packet_count = 0;
+        eprintln!("decode_wav: Starting packet decoding loop...");
+        loop {
+            let packet = match format.next_packet() {
+                Ok(packet) => packet,
+                Err(e) => {
+                    eprintln!("decode_wav: End of stream or error: {:?}", e);
+                    break;
+                }
+            };
+
+            packet_count += 1;
+            let decoded = decoder
+                .decode(&packet)
+                .map_err(|e| {
+                    eprintln!("decode_wav: Decode error on packet {}: {}", packet_count, e);
+                    format!("Decode error: {}", e)
+                })?;
+
+            // Convert to f32 samples by matching on the buffer type
+            use symphonia::core::audio::{AudioBufferRef, Signal};
+            use symphonia::core::conv::FromSample;
+
+            let spec = *decoded.spec();
+            let num_channels = spec.channels.count();
+            let num_frames = decoded.frames();
+
+            eprintln!("decode_wav: Packet {} - {} frames, {} channels", packet_count, num_frames, num_channels);
+
+            // Interleave samples from all channels
+            for frame_idx in 0..num_frames {
+                for ch in 0..num_channels {
+                    let sample_f32 = match &decoded {
+                        AudioBufferRef::U8(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::U16(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::U24(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::U32(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::S8(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::S16(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::S24(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::S32(buf) => f32::from_sample(buf.chan(ch)[frame_idx]),
+                        AudioBufferRef::F32(buf) => buf.chan(ch)[frame_idx],
+                        AudioBufferRef::F64(buf) => buf.chan(ch)[frame_idx] as f32,
+                    };
+                    samples.push(sample_f32);
+                }
+            }
+        }
+
+        eprintln!("decode_wav: Decoded {} packets, total {} samples", packet_count, samples.len());
+        eprintln!("decode_wav: Returning sample_rate={}, channels={}", sample_rate, channels);
+        Ok((samples, sample_rate, channels))
+    }
+
+    fn play_to_device(
+        &self,
+        device: &Device,
+        samples: Vec<f32>,
+        sample_rate: u32,
+        channels: u16,
+        stop_flag: Arc<AtomicBool>,
+    ) -> Result<(), String> {
+        let device_name = device.name().unwrap_or_else(|_| "unknown".to_string());
+        eprintln!("play_to_device: Starting playback to device: {}", device_name);
+        eprintln!("play_to_device: Input - {} samples, {}Hz, {} channels", samples.len(), sample_rate, channels);
+        
+        let config = device
+            .default_output_config()
+            .map_err(|e| format!("Failed to get default config: {}", e))?;
+
+        // Prepare samples for the device's format
+        let device_sample_rate = config.sample_rate().0;
+        let device_channels = config.channels();
+        let device_sample_format = config.sample_format();
+        
+        eprintln!("play_to_device: Device config - {}Hz, {} channels, format: {:?}", 
+                  device_sample_rate, device_channels, device_sample_format);
+
+        // Resample if needed (simple linear interpolation for now)
+        let resampled = if device_sample_rate != sample_rate {
+            eprintln!("play_to_device: Resampling from {}Hz to {}Hz", sample_rate, device_sample_rate);
+            let result = self.resample(&samples, sample_rate, device_sample_rate);
+            eprintln!("play_to_device: Resampled {} samples to {} samples", samples.len(), result.len());
+            result
+        } else {
+            eprintln!("play_to_device: No resampling needed");
+            samples
+        };
+
+        // Interleave/convert channels if needed
+        eprintln!("play_to_device: Interleaving channels from {} to {} channels", channels, device_channels);
+        let interleaved = self.interleave_channels(&resampled, channels, device_channels);
+        eprintln!("play_to_device: Interleaved to {} samples", interleaved.len());
+
+        // Create shared buffer for playback
+        let buffer: Arc<Mutex<Vec<f32>>> = Arc::new(Mutex::new(interleaved));
+        let position = Arc::new(AtomicUsize::new(0));
+        let buffer_clone = buffer.clone();
+        let position_clone = position.clone();
+
+        let err_fn = |err| eprintln!("Playback error: {}", err);
+
+        let stream_config = StreamConfig {
+            channels: device_channels,
+            sample_rate: cpal::SampleRate(device_sample_rate),
+            buffer_size: cpal::BufferSize::Default,
+        };
+
+        let stop_flag_clone = stop_flag.clone();
+        let stream = match config.sample_format() {
+            SampleFormat::F32 => {
+                let buffer = buffer_clone.clone();
+                let pos = position_clone.clone();
+                device
+                    .build_output_stream(
+                        &stream_config,
+                        move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
+                            // Check stop flag - if set, output silence
+                            if stop_flag_clone.load(Ordering::Relaxed) {
+                                for sample in data.iter_mut() {
+                                    *sample = 0.0;
+                                }
+                                return;
+                            }
+                            
+                            let mut idx = pos.load(Ordering::Relaxed);
+                            let buf = buffer.lock().unwrap();
+                            for sample in data.iter_mut() {
+                                if idx < buf.len() {
+                                    *sample = buf[idx];
+                                    idx += 1;
+                                } else {
+                                    *sample = 0.0;
+                                }
+                            }
+                            pos.store(idx, Ordering::Relaxed);
+                        },
+                        err_fn,
+                        None,
+                    )
+                    .map_err(|e| format!("Failed to build stream: {}", e))?
+            }
+            SampleFormat::I16 => {
+                let buffer = buffer_clone.clone();
+                let pos = position_clone.clone();
+                device
+                    .build_output_stream(
+                        &stream_config,
+                        move |data: &mut [i16], _: &cpal::OutputCallbackInfo| {
+                            // Check stop flag - if set, output silence
+                            if stop_flag_clone.load(Ordering::Relaxed) {
+                                for sample in data.iter_mut() {
+                                    *sample = 0;
+                                }
+                                return;
+                            }
+                            
+                            let mut idx = pos.load(Ordering::Relaxed);
+                            let buf = buffer.lock().unwrap();
+                            for sample in data.iter_mut() {
+                                if idx < buf.len() {
+                                    *sample = (buf[idx] * 32767.0) as i16;
+                                    idx += 1;
+                                } else {
+                                    *sample = 0;
+                                }
+                            }
+                            pos.store(idx, Ordering::Relaxed);
+                        },
+                        err_fn,
+                        None,
+                    )
+                    .map_err(|e| format!("Failed to build stream: {}", e))?
+            }
+            SampleFormat::U16 => {
+                let buffer = buffer_clone.clone();
+                let pos = position_clone.clone();
+                device
+                    .build_output_stream(
+                        &stream_config,
+                        move |data: &mut [u16], _: &cpal::OutputCallbackInfo| {
+                            // Check stop flag - if set, output silence
+                            if stop_flag_clone.load(Ordering::Relaxed) {
+                                for sample in data.iter_mut() {
+                                    *sample = 32768;
+                                }
+                                return;
+                            }
+                            
+                            let mut idx = pos.load(Ordering::Relaxed);
+                            let buf = buffer.lock().unwrap();
+                            for sample in data.iter_mut() {
+                                if idx < buf.len() {
+                                    *sample = ((buf[idx] + 1.0) * 32767.5) as u16;
+                                    idx += 1;
+                                } else {
+                                    *sample = 32768;
+                                }
+                            }
+                            pos.store(idx, Ordering::Relaxed);
+                        },
+                        err_fn,
+                        None,
+                    )
+                    .map_err(|e| format!("Failed to build stream: {}", e))?
+            }
+            _ => return Err("Unsupported sample format".to_string()),
+        };
+
+        eprintln!("play_to_device: Starting stream playback...");
+        stream.play().map_err(|e| {
+            eprintln!("play_to_device: Failed to play stream: {}", e);
+            format!("Failed to play stream: {}", e)
+        })?;
+
+        eprintln!("play_to_device: Stream started successfully");
+
+        // Keep the stream alive until playback finishes.
+        // Previously the stream was dropped immediately on function return,
+        // causing silent playback (cpal stops output when its Stream is dropped).
+        let total_samples = {
+            buffer.lock().unwrap().len()
+        };
+        loop {
+            let pos = position.load(std::sync::atomic::Ordering::Relaxed);
+            if pos >= total_samples || stop_flag.load(std::sync::atomic::Ordering::Relaxed) {
+                break;
+            }
+            std::thread::sleep(std::time::Duration::from_millis(10));
+        }
+
+        // stream is dropped here, after audio has finished playing
+        drop(stream);
+        eprintln!("play_to_device: Function completed successfully");
+        Ok(())
+    }
+
+    fn resample(&self, samples: &[f32], from_rate: u32, to_rate: u32) -> Vec<f32> {
+        if from_rate == to_rate {
+            return samples.to_vec();
+        }
+
+        let ratio = to_rate as f64 / from_rate as f64;
+        let new_len = (samples.len() as f64 * ratio) as usize;
+        let mut resampled = Vec::with_capacity(new_len);
+
+        for i in 0..new_len {
+            let src_idx = (i as f64 / ratio) as usize;
+            if src_idx < samples.len() {
+                resampled.push(samples[src_idx]);
+            } else {
+                resampled.push(0.0);
+            }
+        }
+
+        resampled
+    }
+
+    fn interleave_channels(
+        &self,
+        samples: &[f32],
+        src_channels: u16,
+        dst_channels: u16,
+    ) -> Vec<f32> {
+        if src_channels == dst_channels {
+            return samples.to_vec();
+        }
+
+        let mut interleaved = Vec::new();
+        let samples_per_channel = samples.len() / src_channels as usize;
+
+        for i in 0..samples_per_channel {
+            for ch in 0..dst_channels {
+                let src_ch = if ch < src_channels { ch } else { src_channels - 1 };
+                let idx = (i * src_channels as usize) + src_ch as usize;
+                if idx < samples.len() {
+                    interleaved.push(samples[idx]);
+                } else {
+                    interleaved.push(0.0);
+                }
+            }
+        }
+
+        interleaved
+    }
+}
+
+impl Default for AudioOutputState {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/tauri/src-tauri/src/lib.rs
+++ b/tauri/src-tauri/src/lib.rs
@@ -0,0 +1 @@
+pub mod audio_capture;
--- a/tauri/src-tauri/src/main.rs
+++ b/tauri/src-tauri/src/main.rs
@@ -0,0 +1,926 @@
+// Prevents additional console window on Windows in release, DO NOT REMOVE!!
+#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
+
+mod audio_capture;
+mod audio_output;
+
+use std::sync::Mutex;
+use tauri::{command, State, Manager, WindowEvent, Emitter, Listener, RunEvent};
+use tauri_plugin_shell::ShellExt;
+use tokio::sync::mpsc;
+
+const LEGACY_PORT: u16 = 8000;
+const SERVER_PORT: u16 = 17493;
+
+/// Find a voicebox-server process listening on a given port (Windows only).
+///
+/// Uses PowerShell `Get-NetTCPConnection` to look up the PID owning the port,
+/// then verifies via `tasklist` that it's a voicebox process. The caller is
+/// responsible for checking port occupancy first (e.g. `TcpStream::connect_timeout`).
+/// Replaces the previous `netstat -ano` approach which failed on systems with
+/// corrupted system DLLs (see #277).
+#[cfg(windows)]
+fn find_voicebox_pid_on_port(port: u16) -> Option<u32> {
+    use std::process::Command;
+
+    // Use PowerShell's Get-NetTCPConnection to find the PID listening on the port.
+    // This is a built-in cmdlet that doesn't depend on netstat.exe.
+    let ps_script = format!(
+        "Get-NetTCPConnection -LocalPort {} -State Listen -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess",
+        port
+    );
+    if let Ok(output) = Command::new("powershell")
+        .args(["-NoProfile", "-Command", &ps_script])
+        .output()
+    {
+        let output_str = String::from_utf8_lossy(&output.stdout);
+        for line in output_str.lines() {
+            if let Ok(pid) = line.trim().parse::<u32>() {
+                // Verify this PID is a voicebox process
+                if let Ok(tasklist_output) = Command::new("tasklist")
+                    .args(["/FI", &format!("PID eq {}", pid), "/FO", "CSV", "/NH"])
+                    .output()
+                {
+                    let tasklist_str = String::from_utf8_lossy(&tasklist_output.stdout);
+                    if tasklist_str.to_lowercase().contains("voicebox") {
+                        return Some(pid);
+                    }
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Check if a Voicebox server is responding on the given port.
+///
+/// Sends an HTTP GET to `/health` and returns `true` only if the response
+/// is valid JSON matching the Voicebox `HealthResponse` schema — specifically
+/// `status` must be `"healthy"`, and both `model_loaded` and `gpu_available`
+/// must be present as booleans. This prevents misidentifying an unrelated
+/// service that happens to expose a `/health` endpoint.
+#[allow(dead_code)] // Used in platform-specific cfg blocks
+fn check_health(port: u16) -> bool {
+    let url = format!("http://127.0.0.1:{}/health", port);
+    match reqwest::blocking::Client::builder()
+        .timeout(std::time::Duration::from_secs(3))
+        .build()
+    {
+        Ok(client) => match client.get(&url).send() {
+            Ok(resp) => {
+                if !resp.status().is_success() {
+                    return false;
+                }
+                // Parse as JSON and validate Voicebox-specific fields
+                match resp.json::<serde_json::Value>() {
+                    Ok(body) => {
+                        body.get("status").and_then(|v| v.as_str()) == Some("healthy")
+                            && body.get("model_loaded").map(|v| v.is_boolean()).unwrap_or(false)
+                            && body.get("gpu_available").map(|v| v.is_boolean()).unwrap_or(false)
+                    }
+                    Err(_) => false,
+                }
+            }
+            Err(_) => false,
+        },
+        Err(_) => false,
+    }
+}
+
+struct ServerState {
+    child: Mutex<Option<tauri_plugin_shell::process::CommandChild>>,
+    server_pid: Mutex<Option<u32>>,
+    keep_running_on_close: Mutex<bool>,
+    models_dir: Mutex<Option<String>>,
+}
+
+#[command]
+async fn start_server(
+    app: tauri::AppHandle,
+    state: State<'_, ServerState>,
+    remote: Option<bool>,
+    models_dir: Option<String>,
+) -> Result<String, String> {
+    // Store models_dir for use on restart (empty string means reset to default)
+    if let Some(ref dir) = models_dir {
+        if dir.is_empty() {
+            *state.models_dir.lock().unwrap() = None;
+        } else {
+            *state.models_dir.lock().unwrap() = Some(dir.clone());
+        }
+    }
+    // Check if server is already running (managed by this app instance)
+    if state.child.lock().unwrap().is_some() {
+        return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+    }
+
+    // Check if a voicebox server is already running on our port (from previous session with keep_running=true,
+    // or an externally started server e.g. via `python`, `uvicorn`, Docker, etc.)
+    #[cfg(unix)]
+    {
+        use std::process::Command;
+        if let Ok(output) = Command::new("lsof")
+            .args(["-i", &format!(":{}", SERVER_PORT), "-sTCP:LISTEN"])
+            .output()
+        {
+            let output_str = String::from_utf8_lossy(&output.stdout);
+            for line in output_str.lines().skip(1) {
+                let parts: Vec<&str> = line.split_whitespace().collect();
+                if parts.len() >= 2 {
+                    let command = parts[0];
+                    let pid_str = parts[1];
+                    if command.contains("voicebox") {
+                        if let Ok(pid) = pid_str.parse::<u32>() {
+                            println!("Found existing voicebox-server on port {} (PID: {}), reusing it", SERVER_PORT, pid);
+                            // Store the PID so we can kill it on exit if needed
+                            *state.server_pid.lock().unwrap() = Some(pid);
+                            return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                        }
+                    } else {
+                        // Process name doesn't contain "voicebox" — could be an external
+                        // Python/uvicorn/Docker server. Verify via HTTP health check.
+                        println!("Port {} in use by '{}' (PID: {}), checking if it's a Voicebox server...", SERVER_PORT, command, pid_str);
+                        if check_health(SERVER_PORT) {
+                            println!("Health check passed — reusing external server on port {}", SERVER_PORT);
+                            return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                        }
+                        println!("Health check failed — port is occupied by a non-Voicebox process");
+                        return Err(format!(
+                            "Port {} is already in use by another application ({}). \
+                             Close it or change the Voicebox server port.",
+                            SERVER_PORT, command
+                        ));
+                    }
+                }
+            }
+        }
+    }
+    
+    #[cfg(windows)]
+    {
+        use std::net::TcpStream;
+        if TcpStream::connect_timeout(
+            &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
+            std::time::Duration::from_secs(1),
+        ).is_ok() {
+            // Port is in use — check if it's a voicebox process by name first
+            if let Some(pid) = find_voicebox_pid_on_port(SERVER_PORT) {
+                println!("Found existing voicebox-server on port {} (PID: {}), reusing it", SERVER_PORT, pid);
+                *state.server_pid.lock().unwrap() = Some(pid);
+                return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+            }
+            // Process name doesn't match — could be an external Python/Docker server.
+            // Verify via HTTP health check before giving up.
+            println!("Port {} in use by unknown process, checking if it's a Voicebox server...", SERVER_PORT);
+            if check_health(SERVER_PORT) {
+                println!("Health check passed — reusing external server on port {}", SERVER_PORT);
+                return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+            }
+            return Err(format!(
+                "Port {} is already in use by another application. \
+                 Close the other application or change the Voicebox port.",
+                SERVER_PORT
+            ));
+        }
+    }
+
+    // Kill any orphaned voicebox-server from previous session on legacy port 8000
+    // This handles upgrades from older versions that used a fixed port
+    #[cfg(unix)]
+    {
+        use std::process::Command;
+        if let Ok(output) = Command::new("lsof")
+            .args(["-i", &format!(":{}", LEGACY_PORT), "-sTCP:LISTEN"])
+            .output()
+        {
+            let output_str = String::from_utf8_lossy(&output.stdout);
+            for line in output_str.lines().skip(1) {
+                let parts: Vec<&str> = line.split_whitespace().collect();
+                if parts.len() >= 2 {
+                    let command = parts[0];
+                    let pid_str = parts[1];
+                    
+                    if command.contains("voicebox") {
+                        if let Ok(pid) = pid_str.parse::<i32>() {
+                            println!("Found orphaned voicebox-server on legacy port {} (PID: {}, CMD: {}), killing it...", LEGACY_PORT, pid, command);
+                            let _ = Command::new("kill")
+                                .args(["-9", "--", &format!("-{}", pid)])
+                                .output();
+                            let _ = Command::new("kill")
+                                .args(["-9", &pid.to_string()])
+                                .output();
+                        }
+                    } else {
+                        println!("Legacy port {} is in use by non-voicebox process: {} (PID: {}), not killing", LEGACY_PORT, command, pid_str);
+                    }
+                }
+            }
+        }
+    }
+    
+    #[cfg(windows)]
+    {
+        use std::net::TcpStream;
+        if TcpStream::connect_timeout(
+            &format!("127.0.0.1:{}", LEGACY_PORT).parse().unwrap(),
+            std::time::Duration::from_secs(1),
+        ).is_ok() {
+            if let Some(pid) = find_voicebox_pid_on_port(LEGACY_PORT) {
+                println!("Found orphaned voicebox-server on legacy port {} (PID: {}), killing it...", LEGACY_PORT, pid);
+                let _ = std::process::Command::new("taskkill")
+                    .args(["/PID", &pid.to_string(), "/T", "/F"])
+                    .output();
+            }
+        }
+    }
+    
+    // Brief wait for port to be released
+    std::thread::sleep(std::time::Duration::from_millis(200));
+
+    // Get app data directory
+    let data_dir = app
+        .path()
+        .app_data_dir()
+        .map_err(|e| format!("Failed to get app data dir: {}", e))?;
+
+    // Ensure data directory exists
+    std::fs::create_dir_all(&data_dir)
+        .map_err(|e| format!("Failed to create data dir: {}", e))?;
+
+    println!("=================================================================");
+    println!("Starting voicebox-server sidecar");
+    println!("Data directory: {:?}", data_dir);
+    println!("Remote mode: {}", remote.unwrap_or(false));
+
+    // Check for CUDA backend in data directory (onedir layout: backends/cuda/)
+    let cuda_binary = {
+        let cuda_dir = data_dir.join("backends").join("cuda");
+        let cuda_name = if cfg!(windows) {
+            "voicebox-server-cuda.exe"
+        } else {
+            "voicebox-server-cuda"
+        };
+        let exe_path = cuda_dir.join(cuda_name);
+        if exe_path.exists() {
+            println!("Found CUDA backend at {:?}", cuda_dir);
+
+            // Version check: run --version from the onedir directory so
+            // PyInstaller can find its support files for the fast --version path
+            let app_version = app.config().version.clone().unwrap_or_default();
+            let version_ok = match std::process::Command::new(&exe_path)
+                .arg("--version")
+                .current_dir(&cuda_dir)
+                .output()
+            {
+                Ok(output) => {
+                    // Output format: "voicebox-server X.Y.Z\n"
+                    let version_str = String::from_utf8_lossy(&output.stdout);
+                    let binary_version = version_str.trim().split_whitespace().last().unwrap_or("");
+                    if binary_version == app_version {
+                        println!("CUDA binary version {} matches app version", binary_version);
+                        true
+                    } else {
+                        println!(
+                            "CUDA binary version mismatch: binary={}, app={}. Falling back to CPU.",
+                            binary_version, app_version
+                        );
+                        false
+                    }
+                }
+                Err(e) => {
+                    println!("Failed to check CUDA binary version: {}. Falling back to CPU.", e);
+                    false
+                }
+            };
+
+            if version_ok {
+                Some(exe_path)
+            } else {
+                None
+            }
+        } else {
+            println!("No CUDA backend found, using bundled CPU binary");
+            None
+        }
+    };
+
+    let sidecar_result = app.shell().sidecar("voicebox-server");
+
+    let mut sidecar = match sidecar_result {
+        Ok(s) => s,
+        Err(e) => {
+            eprintln!("Failed to get sidecar: {}", e);
+
+            // In dev mode, check if the server is already running (started manually)
+            #[cfg(debug_assertions)]
+            {
+                eprintln!("Dev mode: Checking if server is already running on port {}...", SERVER_PORT);
+
+                // Try to connect to the server port
+                use std::net::TcpStream;
+                if TcpStream::connect_timeout(
+                    &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
+                    std::time::Duration::from_secs(1),
+                ).is_ok() {
+                    println!("Found server already running on port {}", SERVER_PORT);
+                    return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                }
+
+                eprintln!("");
+                eprintln!("=================================================================");
+                eprintln!("DEV MODE: No server found on port {}", SERVER_PORT);
+                eprintln!("");
+                eprintln!("Start the Python server in a separate terminal:");
+                eprintln!("  bun run dev:server");
+                eprintln!("=================================================================");
+                eprintln!("");
+            }
+
+            return Err(format!("Failed to start server. In dev mode, run 'bun run dev:server' in a separate terminal."));
+        }
+    };
+
+    println!("Sidecar command created successfully");
+
+    // Build common args
+    let data_dir_str = data_dir
+        .to_str()
+        .ok_or_else(|| "Invalid data dir path".to_string())?
+        .to_string();
+    let port_str = SERVER_PORT.to_string();
+    let parent_pid_str = std::process::id().to_string();
+    let is_remote = remote.unwrap_or(false);
+
+    // Resolve the custom models directory from the parameter or stored state
+    let effective_models_dir = models_dir.or_else(|| state.models_dir.lock().unwrap().clone());
+    if let Some(ref dir) = effective_models_dir {
+        println!("Custom models directory: {}", dir);
+    }
+
+    // If CUDA binary exists, launch it from the onedir directory.
+    // .current_dir() is critical: PyInstaller onedir expects all DLLs and
+    // support files (nvidia/, _internal/, etc.) relative to the exe.
+    let spawn_result = if let Some(ref cuda_path) = cuda_binary {
+        let cuda_dir = cuda_path.parent().unwrap();
+        println!("Launching CUDA backend: {:?} (cwd: {:?})", cuda_path, cuda_dir);
+        let mut cmd = app.shell().command(cuda_path.to_str().unwrap());
+        cmd = cmd.current_dir(cuda_dir);
+        cmd = cmd.args(["--data-dir", &data_dir_str, "--port", &port_str, "--parent-pid", &parent_pid_str]);
+        if is_remote {
+            cmd = cmd.args(["--host", "0.0.0.0"]);
+        }
+        if let Some(ref dir) = effective_models_dir {
+            cmd = cmd.env("VOICEBOX_MODELS_DIR", dir);
+        }
+        cmd.spawn()
+    } else {
+        // Use the bundled CPU sidecar
+        sidecar = sidecar.args(["--data-dir", &data_dir_str, "--port", &port_str, "--parent-pid", &parent_pid_str]);
+        if is_remote {
+            sidecar = sidecar.args(["--host", "0.0.0.0"]);
+        }
+        if let Some(ref dir) = effective_models_dir {
+            sidecar = sidecar.env("VOICEBOX_MODELS_DIR", dir);
+        }
+        println!("Spawning server process...");
+        sidecar.spawn()
+    };
+
+    let (mut rx, child) = match spawn_result {
+        Ok(result) => result,
+        Err(e) => {
+            eprintln!("Failed to spawn server process: {}", e);
+
+            // In dev mode, check if a manually-started server is available
+            #[cfg(debug_assertions)]
+            {
+                use std::net::TcpStream;
+                if TcpStream::connect_timeout(
+                    &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
+                    std::time::Duration::from_secs(1),
+                ).is_ok() {
+                    println!("Found manually-started server on port {}", SERVER_PORT);
+                    return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                }
+
+                eprintln!("");
+                eprintln!("=================================================================");
+                eprintln!("DEV MODE: Server binary failed to start");
+                eprintln!("");
+                eprintln!("Start the Python server in a separate terminal:");
+                eprintln!("  bun run dev:server");
+                eprintln!("=================================================================");
+                eprintln!("");
+                return Err("Dev mode: Start server manually with 'bun run dev:server'".to_string());
+            }
+
+            #[cfg(not(debug_assertions))]
+            {
+                eprintln!("This could be due to:");
+                eprintln!("  - Missing or corrupted binary");
+                eprintln!("  - Missing execute permissions");
+                eprintln!("  - Code signing issues on macOS");
+                eprintln!("  - Missing dependencies");
+                return Err(format!("Failed to spawn: {}", e));
+            }
+        }
+    };
+
+    println!("Server process spawned, waiting for ready signal...");
+    println!("=================================================================");
+
+    // Store child process and PID
+    let process_pid = child.pid();
+    *state.server_pid.lock().unwrap() = Some(process_pid);
+    *state.child.lock().unwrap() = Some(child);
+
+    // Wait for server to be ready by listening for startup log
+    // PyInstaller bundles can be slow on first import, especially torch/transformers
+    let timeout = tokio::time::Duration::from_secs(120);
+    let start_time = tokio::time::Instant::now();
+    let mut error_output = Vec::new();
+
+    loop {
+        if start_time.elapsed() > timeout {
+            eprintln!("Server startup timeout after 120 seconds");
+            if !error_output.is_empty() {
+                eprintln!("Collected error output:");
+                for line in &error_output {
+                    eprintln!("  {}", line);
+                }
+            }
+
+            // In dev mode, check if a manual server came up during the wait
+            #[cfg(debug_assertions)]
+            {
+                use std::net::TcpStream;
+                if TcpStream::connect_timeout(
+                    &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
+                    std::time::Duration::from_secs(1),
+                ).is_ok() {
+                    // Kill the placeholder process
+                    let _ = state.child.lock().unwrap().take();
+                    println!("Found manually-started server on port {}", SERVER_PORT);
+                    return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                }
+            }
+
+            return Err("Server startup timeout - check Console.app for detailed logs".to_string());
+        }
+
+        match tokio::time::timeout(tokio::time::Duration::from_millis(100), rx.recv()).await {
+            Ok(Some(event)) => {
+                match event {
+                    tauri_plugin_shell::process::CommandEvent::Stdout(line) => {
+                        let line_str = String::from_utf8_lossy(&line);
+                        println!("Server output: {}", line_str);
+                        let _ = app.emit("server-log", serde_json::json!({
+                            "stream": "stdout",
+                            "line": line_str.trim_end(),
+                        }));
+
+                        if line_str.contains("Uvicorn running") || line_str.contains("Application startup complete") {
+                            println!("Server is ready!");
+                            break;
+                        }
+                    }
+                    tauri_plugin_shell::process::CommandEvent::Stderr(line) => {
+                        let line_str = String::from_utf8_lossy(&line).to_string();
+                        eprintln!("Server: {}", line_str);
+                        let _ = app.emit("server-log", serde_json::json!({
+                            "stream": "stderr",
+                            "line": line_str.trim_end(),
+                        }));
+
+                        // Collect error lines for debugging
+                        if line_str.contains("ERROR") || line_str.contains("Error") || line_str.contains("Failed") {
+                            error_output.push(line_str.clone());
+                        }
+
+                        // Uvicorn logs to stderr, so check there too
+                        if line_str.contains("Uvicorn running") || line_str.contains("Application startup complete") {
+                            println!("Server is ready!");
+                            break;
+                        }
+                    }
+                    _ => {}
+                }
+            }
+            Ok(None) => {
+                // In dev mode, this is expected when using the placeholder binary
+                #[cfg(debug_assertions)]
+                {
+                    use std::net::TcpStream;
+                    eprintln!("Server process ended (dev mode placeholder detected)");
+
+                    // Check if a manually-started server is available
+                    if TcpStream::connect_timeout(
+                        &format!("127.0.0.1:{}", SERVER_PORT).parse().unwrap(),
+                        std::time::Duration::from_secs(1),
+                    ).is_ok() {
+                        // Clean up state
+                        let _ = state.child.lock().unwrap().take();
+                        let _ = state.server_pid.lock().unwrap().take();
+                        println!("Found manually-started server on port {}", SERVER_PORT);
+                        return Ok(format!("http://127.0.0.1:{}", SERVER_PORT));
+                    }
+
+                    eprintln!("");
+                    eprintln!("=================================================================");
+                    eprintln!("DEV MODE: No bundled server binary available");
+                    eprintln!("");
+                    eprintln!("Start the Python server in a separate terminal:");
+                    eprintln!("  bun run dev:server");
+                    eprintln!("=================================================================");
+                    eprintln!("");
+                    return Err("Dev mode: Start server manually with 'bun run dev:server'".to_string());
+                }
+
+                #[cfg(not(debug_assertions))]
+                {
+                    eprintln!("Server process ended unexpectedly during startup!");
+                    eprintln!("The server binary may have crashed or exited with an error.");
+                    eprintln!("Check Console.app logs for more details (search for 'voicebox')");
+                    return Err("Server process ended unexpectedly".to_string());
+                }
+            }
+            Err(_) => {
+                // Timeout on this recv, continue loop
+                continue;
+            }
+        }
+    }
+
+    // Spawn task to continue reading output and emit to frontend
+    let app_handle = app.clone();
+    tokio::spawn(async move {
+        while let Some(event) = rx.recv().await {
+            match event {
+                tauri_plugin_shell::process::CommandEvent::Stdout(line) => {
+                    let line_str = String::from_utf8_lossy(&line);
+                    println!("Server: {}", line_str);
+                    let _ = app_handle.emit("server-log", serde_json::json!({
+                        "stream": "stdout",
+                        "line": line_str.trim_end(),
+                    }));
+                }
+                tauri_plugin_shell::process::CommandEvent::Stderr(line) => {
+                    let line_str = String::from_utf8_lossy(&line);
+                    eprintln!("Server error: {}", line_str);
+                    let _ = app_handle.emit("server-log", serde_json::json!({
+                        "stream": "stderr",
+                        "line": line_str.trim_end(),
+                    }));
+                }
+                _ => {}
+            }
+        }
+    });
+
+    Ok(format!("http://127.0.0.1:{}", SERVER_PORT))
+}
+
+#[command]
+async fn stop_server(state: State<'_, ServerState>) -> Result<(), String> {
+    let pid = state.server_pid.lock().unwrap().take();
+    let _child = state.child.lock().unwrap().take();
+    
+    if let Some(pid) = pid {
+        println!("stop_server: Stopping server with PID: {}", pid);
+        
+        #[cfg(unix)]
+        {
+            use std::process::Command;
+            // Kill process group with SIGTERM first
+            let _ = Command::new("kill")
+                .args(["-TERM", "--", &format!("-{}", pid)])
+                .output();
+            
+            // Brief wait then force kill
+            std::thread::sleep(std::time::Duration::from_millis(100));
+            
+            let _ = Command::new("kill")
+                .args(["-9", "--", &format!("-{}", pid)])
+                .output();
+            let _ = Command::new("kill")
+                .args(["-9", &pid.to_string()])
+                .output();
+            
+            println!("stop_server: Process group kill completed");
+        }
+        
+        #[cfg(windows)]
+        {
+            // Send graceful shutdown via HTTP — the server's parent-pid watchdog
+            // will also handle cleanup if this app process exits.
+            println!("Sending graceful shutdown via HTTP...");
+            let client = reqwest::blocking::Client::builder()
+                .timeout(std::time::Duration::from_secs(2))
+                .build()
+                .unwrap();
+
+            let _ = client
+                .post(&format!("http://127.0.0.1:{}/shutdown", SERVER_PORT))
+                .send();
+
+            println!("Shutdown request sent (server watchdog will handle cleanup)");
+        }
+    }
+    
+    Ok(())
+}
+
+#[command]
+async fn restart_server(
+    app: tauri::AppHandle,
+    state: State<'_, ServerState>,
+    models_dir: Option<String>,
+) -> Result<String, String> {
+    println!("restart_server: stopping current server...");
+
+    // Update stored models_dir: empty string means reset to default, non-empty means set
+    if let Some(ref dir) = models_dir {
+        if dir.is_empty() {
+            *state.models_dir.lock().unwrap() = None;
+        } else {
+            *state.models_dir.lock().unwrap() = Some(dir.clone());
+        }
+    }
+
+    // Stop the current server
+    stop_server(state.clone()).await?;
+
+    // Wait for port to be released
+    println!("restart_server: waiting for port release...");
+    tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
+
+    // Start server again (will auto-detect CUDA binary and use stored models_dir)
+    println!("restart_server: starting server...");
+    start_server(app, state, None, None).await
+}
+
+#[command]
+fn set_keep_server_running(state: State<'_, ServerState>, keep_running: bool) {
+    println!("set_keep_server_running called with: {}", keep_running);
+    *state.keep_running_on_close.lock().unwrap() = keep_running;
+}
+
+#[command]
+async fn start_system_audio_capture(
+    state: State<'_, audio_capture::AudioCaptureState>,
+    max_duration_secs: u32,
+) -> Result<(), String> {
+    audio_capture::start_capture(&state, max_duration_secs).await
+}
+
+#[command]
+async fn stop_system_audio_capture(
+    state: State<'_, audio_capture::AudioCaptureState>,
+) -> Result<String, String> {
+    audio_capture::stop_capture(&state).await
+}
+
+#[command]
+fn is_system_audio_supported() -> bool {
+    audio_capture::is_supported()
+}
+
+#[command]
+fn list_audio_output_devices(
+    state: State<'_, audio_output::AudioOutputState>,
+) -> Result<Vec<audio_output::AudioOutputDevice>, String> {
+    state.list_output_devices()
+}
+
+#[command]
+async fn play_audio_to_devices(
+    state: State<'_, audio_output::AudioOutputState>,
+    audio_data: Vec<u8>,
+    device_ids: Vec<String>,
+) -> Result<(), String> {
+    state.play_audio_to_devices(audio_data, device_ids).await
+}
+
+#[command]
+fn stop_audio_playback(
+    state: State<'_, audio_output::AudioOutputState>,
+) -> Result<(), String> {
+    state.stop_all_playback()
+}
+
+#[cfg_attr(mobile, tauri::mobile_entry_point)]
+pub fn run() {
+    tauri::Builder::default()
+        .plugin(tauri_plugin_dialog::init())
+        .plugin(tauri_plugin_fs::init())
+        .plugin(tauri_plugin_shell::init())
+        .manage(ServerState {
+            child: Mutex::new(None),
+            server_pid: Mutex::new(None),
+            keep_running_on_close: Mutex::new(false),
+            models_dir: Mutex::new(None),
+        })
+        .manage(audio_capture::AudioCaptureState::new())
+        .manage(audio_output::AudioOutputState::new())
+        .setup(|app| {
+            #[cfg(desktop)]
+            {
+                app.handle().plugin(tauri_plugin_updater::Builder::new().build())?;
+                app.handle().plugin(tauri_plugin_process::init())?;
+            }
+
+            // Hide title bar icon on Windows
+            #[cfg(windows)]
+            {
+                use windows::Win32::Foundation::HWND;
+                use windows::Win32::UI::WindowsAndMessaging::{SetClassLongPtrW, GCLP_HICON, GCLP_HICONSM};
+                
+                if let Some((_, window)) = app.webview_windows().iter().next() {
+                    if let Ok(hwnd) = window.hwnd() {
+                        let hwnd = HWND(hwnd.0);
+                        unsafe {
+                            // Set both small and regular icons to NULL to hide the title bar icon
+                            SetClassLongPtrW(hwnd, GCLP_HICON, 0);
+                            SetClassLongPtrW(hwnd, GCLP_HICONSM, 0);
+                        }
+                    }
+                }
+            }
+
+            // Enable microphone access on Linux (WebKitGTK denies getUserMedia by default)
+            #[cfg(target_os = "linux")]
+            {
+                use tauri::Manager;
+                if let Some(window) = app.get_webview_window("main") {
+                    let _ = window.with_webview(|webview| {
+                        use webkit2gtk::{WebViewExt, SettingsExt, PermissionRequestExt};
+                        use webkit2gtk::glib::ObjectExt;
+                        let wk_webview = webview.inner();
+
+                        // Enable media stream support in WebKitGTK settings
+                        if let Some(settings) = WebViewExt::settings(&wk_webview) {
+                            settings.set_enable_media_stream(true);
+                        }
+
+                        // Auto-grant UserMediaPermissionRequest (microphone access)
+                        // Only for trusted local origins (Tauri dev server or custom protocol)
+                        wk_webview.connect_permission_request(move |webview, request: &webkit2gtk::PermissionRequest| {
+                            if request.is::<webkit2gtk::UserMediaPermissionRequest>() {
+                                let uri = WebViewExt::uri(webview).unwrap_or_default();
+                                let is_trusted = uri.starts_with("tauri://")
+                                    || uri.starts_with("https://tauri.localhost")
+                                    || uri.starts_with("http://localhost")
+                                    || uri.starts_with("http://127.0.0.1");
+                                if is_trusted {
+                                    request.allow();
+                                    return true;
+                                }
+                                request.deny();
+                                return true;
+                            }
+                            false
+                        });
+                    });
+                }
+            }
+
+            Ok(())
+        })
+        .invoke_handler(tauri::generate_handler![
+            start_server,
+            stop_server,
+            restart_server,
+            set_keep_server_running,
+            start_system_audio_capture,
+            stop_system_audio_capture,
+            is_system_audio_supported,
+            list_audio_output_devices,
+            play_audio_to_devices,
+            stop_audio_playback
+        ])
+        .on_window_event({
+            let closing = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
+            move |window, event| {
+            if let WindowEvent::CloseRequested { api, .. } = event {
+                // If we're already in the close flow, let it proceed
+                if closing.load(std::sync::atomic::Ordering::SeqCst) {
+                    return;
+                }
+                closing.store(true, std::sync::atomic::Ordering::SeqCst);
+
+                // Prevent automatic close so frontend can clean up
+                api.prevent_close();
+
+                // Emit event to frontend to check setting and stop server if needed
+                let app_handle = window.app_handle();
+
+                if let Err(e) = app_handle.emit("window-close-requested", ()) {
+                    eprintln!("Failed to emit window-close-requested event: {}", e);
+                    window.close().ok();
+                    return;
+                }
+
+                // Set up listener for frontend response
+                let window_for_close = window.clone();
+                let closing_for_timeout = closing.clone();
+                let (tx, mut rx) = mpsc::unbounded_channel::<()>();
+
+                let listener_id = window.listen("window-close-allowed", move |_| {
+                    let _ = tx.send(());
+                });
+
+                tauri::async_runtime::spawn(async move {
+                    tokio::select! {
+                        _ = rx.recv() => {
+                            window_for_close.close().ok();
+                        }
+                        _ = tokio::time::sleep(tokio::time::Duration::from_secs(5)) => {
+                            eprintln!("Window close timeout, closing anyway");
+                            window_for_close.close().ok();
+                        }
+                    }
+                    window_for_close.unlisten(listener_id);
+                    closing_for_timeout.store(false, std::sync::atomic::Ordering::SeqCst);
+                });
+            }
+        }})
+        .build(tauri::generate_context!())
+        .expect("error while building tauri application")
+        .run(|app, event| {
+            let _ = &app; // used on unix
+            match &event {
+                RunEvent::Exit => {
+                    let state = app.state::<ServerState>();
+                    let keep_running = *state.keep_running_on_close.lock().unwrap();
+                    let has_pid = state.server_pid.lock().unwrap().is_some();
+                    println!("RunEvent::Exit — keep_running={}, has_pid={}", keep_running, has_pid);
+
+                    if keep_running {
+                        // Tell the server to disable its watchdog so it survives
+                        // after this process exits.
+                        println!("Keep server running: disabling watchdog...");
+
+                        // Write a sentinel file as a reliable fallback. On Windows
+                        // the HTTP request below can race with process exit, leaving
+                        // the watchdog unaware it should stay alive. The sentinel
+                        // file is checked during the watchdog grace period.
+                        let data_dir = app
+                            .path()
+                            .app_data_dir()
+                            .unwrap_or_default();
+                        let sentinel = data_dir.join(".keep-running");
+                        if let Err(e) = std::fs::write(&sentinel, b"1") {
+                            eprintln!("Failed to write keep-running sentinel: {}", e);
+                        } else {
+                            println!("Wrote keep-running sentinel to {:?}", sentinel);
+                        }
+
+                        let client = reqwest::blocking::Client::builder()
+                            .timeout(std::time::Duration::from_secs(2))
+                            .build()
+                            .unwrap();
+                        match client
+                            .post(&format!("http://127.0.0.1:{}/watchdog/disable", SERVER_PORT))
+                            .send()
+                        {
+                            Ok(resp) => println!("Watchdog disable response: {}", resp.status()),
+                            Err(e) => eprintln!("Failed to disable watchdog: {}", e),
+                        }
+                    } else {
+                        // Server will self-terminate via parent-pid watchdog when
+                        // this process exits. On Unix, also send SIGTERM for
+                        // immediate cleanup.
+                        println!("RunEvent::Exit - server will self-terminate via watchdog");
+
+                        #[cfg(unix)]
+                        {
+                            if let Some(pid) = state.server_pid.lock().unwrap().take() {
+                                use std::process::Command;
+                                let _ = Command::new("kill")
+                                    .args(["-TERM", "--", &format!("-{}", pid)])
+                                    .output();
+                                std::thread::sleep(std::time::Duration::from_millis(100));
+                                let _ = Command::new("kill")
+                                    .args(["-9", "--", &format!("-{}", pid)])
+                                    .output();
+                                let _ = Command::new("kill")
+                                    .args(["-9", &pid.to_string()])
+                                    .output();
+                            }
+                        }
+                    }
+                }
+                RunEvent::ExitRequested { api, .. } => {
+                    println!("RunEvent::ExitRequested received");
+                    // Don't prevent exit, just log it
+                    let _ = api;
+                }
+                _ => {}
+            }
+        });
+}
+
+fn main() {
+    run();
+}
--- a/tauri/src-tauri/tauri.conf.json
+++ b/tauri/src-tauri/tauri.conf.json
@@ -0,0 +1,66 @@
+{
+  "$schema": "https://schema.tauri.app/config/2",
+  "productName": "Voicebox",
+  "version": "0.4.5",
+  "identifier": "sh.voicebox.app",
+  "build": {
+    "beforeDevCommand": "bun run dev",
+    "beforeBuildCommand": "bun run build",
+    "frontendDist": "../dist",
+    "devUrl": "http://localhost:5173"
+  },
+  "bundle": {
+    "active": true,
+    "targets": "all",
+    "createUpdaterArtifacts": "v1Compatible",
+    "externalBin": ["binaries/voicebox-server"],
+    "icon": [
+      "icons/32x32.png",
+      "icons/128x128.png",
+      "icons/128x128@2x.png",
+      "icons/icon.icns",
+      "icons/icon.ico"
+    ],
+    "macOS": {
+      "frameworks": [],
+      "minimumSystemVersion": "11.0",
+      "infoPlist": "Info.plist",
+      "entitlements": "Entitlements.plist"
+    },
+    "resources": {
+      "gen/Assets.car": "./",
+      "gen/voicebox.icns": "./",
+      "gen/partial.plist": "./"
+    }
+  },
+  "app": {
+    "security": {
+      "csp": null,
+      "capabilities": ["default"]
+    },
+    "windows": [
+      {
+        "title": "",
+        "width": 1200,
+        "height": 800,
+        "minWidth": 800,
+        "minHeight": 600,
+        "resizable": true,
+        "fullscreen": false,
+        "devtools": true,
+        "userAgent": null,
+        "titleBarStyle": "Overlay"
+      }
+    ],
+    "withGlobalTauri": true
+  },
+  "plugins": {
+    "shell": {
+      "open": ".*"
+    },
+    "updater": {
+      "pubkey": "dW50cnVzdGVkIGNvbW1lbnQ6IG1pbmlzaWduIHB1YmxpYyBrZXk6IEUxRENBQkRBQjdBNTM1OTIKUldTU05hVzMycXZjNGJGcUxmcVVocll2QjdSaTJNdlFxR2M3VDJsMnVvbDdyZGRPMmRlOW9aWTcK",
+      "endpoints": ["https://github.com/jamiepine/voicebox/releases/latest/download/latest.json"]
+    }
+  }
+}
--- a/tauri/src-tauri/tests/audio_capture_test.rs
+++ b/tauri/src-tauri/tests/audio_capture_test.rs
@@ -0,0 +1,59 @@
+// NOTE: This test requires system audio to be playing during execution.
+// To run this test successfully:
+//   1. Start playing audio (music, video, etc.)
+//   2. Run: cargo test --test audio_capture_test -- --nocapture
+//   3. The test will capture audio for 5 seconds and verify the output
+
+use voicebox::audio_capture::{AudioCaptureState, start_capture, stop_capture};
+use base64::Engine;
+
+#[tokio::test]
+async fn test_system_audio_capture() {
+    // Create AudioCaptureState
+    let state = AudioCaptureState::new();
+
+    println!("Starting system audio capture with 5 second max duration...");
+
+    // Start capture with 5 second max duration
+    let result = start_capture(&state, 5).await;
+
+    if let Err(e) = result {
+        panic!("Failed to start capture: {}", e);
+    }
+
+    println!("Capture started, waiting 5 seconds...");
+
+    // Wait 5 seconds for capture to complete
+    tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
+
+    println!("Stopping capture...");
+
+    // Stop capture and get the result
+    let audio_data = stop_capture(&state).await;
+
+    match audio_data {
+        Ok(base64_wav) => {
+            println!("Capture stopped successfully");
+
+            // Validate the returned base64 WAV data
+            println!("Validating base64 WAV data...");
+
+            // Decode base64 to bytes
+            let decoded_bytes = base64::engine::general_purpose::STANDARD
+                .decode(&base64_wav)
+                .expect("Failed to decode base64 data");
+
+            // Verify bytes array is not empty
+            assert!(!decoded_bytes.is_empty(), "Decoded bytes array is empty");
+
+            // Confirm data has content (length > 0)
+            println!("WAV data length: {} bytes", decoded_bytes.len());
+            assert!(decoded_bytes.len() > 0, "WAV data has no content");
+
+            println!("✓ Test passed: Audio capture produced valid WAV data");
+        }
+        Err(e) => {
+            panic!("Failed to stop capture or get audio data: {}", e);
+        }
+    }
+}
				`@@ -0,0 +1 @@`
				{"default":{"identifier":"default","description":"Default permissions for voicebox","remote":{"urls":["http://localhost:*"]},"local":true,"windows":["main"],"permissions":["core:default","core:window:default","core:window:allow-start-dragging","core:webview:default","core:webview:allow-internal-toggle-devtools","shell:allow-open","shell:allow-execute","shell:allow-spawn","updater:default","process:default","dialog:default","dialog:allow-save","dialog:allow-open","fs:default","fs:read-all","fs:write-all"],"platforms":["linux","macOS","windows"]}}