{ "openapi": "3.1.0", "info": { "title": "voicebox API", "description": "Production-quality Qwen3-TTS voice cloning API", "version": "0.1.0" }, "servers": [ { "url": "http://localhost:8000", "description": "Local development server" } ], "paths": { "/": { "get": { "summary": "Root", "description": "Root endpoint.", "operationId": "root__get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } } }, "tags": [ "General" ] } }, "/health": { "get": { "summary": "Health", "description": "Health check endpoint.", "operationId": "health_health_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HealthResponse" } } } } }, "tags": [ "General" ] } }, "/profiles": { "get": { "summary": "List Profiles", "description": "List all voice profiles.", "operationId": "list_profiles_profiles_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "items": { "$ref": "#/components/schemas/VoiceProfileResponse" }, "type": "array", "title": "Response List Profiles Profiles Get" } } } } }, "tags": [ "Profiles" ] }, "post": { "summary": "Create Profile", "description": "Create a new voice profile.", "operationId": "create_profile_profiles_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/VoiceProfileCreate" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/VoiceProfileResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] } }, "/profiles/{profile_id}": { "get": { "summary": "Get Profile", "description": "Get a voice profile by ID.", "operationId": "get_profile_profiles__profile_id__get", "parameters": [ { "name": "profile_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Profile Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/VoiceProfileResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] }, "put": { "summary": "Update Profile", "description": "Update a voice profile.", "operationId": "update_profile_profiles__profile_id__put", "parameters": [ { "name": "profile_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Profile Id" } } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "$ref": "#/components/schemas/VoiceProfileCreate" } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/VoiceProfileResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] }, "delete": { "summary": "Delete Profile", "description": "Delete a voice profile.", "operationId": "delete_profile_profiles__profile_id__delete", "parameters": [ { "name": "profile_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Profile Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] } }, "/profiles/{profile_id}/samples": { "post": { "summary": "Add Profile Sample", "description": "Add a sample to a voice profile.", "operationId": "add_profile_sample_profiles__profile_id__samples_post", "parameters": [ { "name": "profile_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Profile Id" } } ], "requestBody": { "required": true, "content": { "multipart/form-data": { "schema": { "$ref": "#/components/schemas/Body_add_profile_sample_profiles__profile_id__samples_post" } } } }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ProfileSampleResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] }, "get": { "summary": "Get Profile Samples", "description": "Get all samples for a profile.", "operationId": "get_profile_samples_profiles__profile_id__samples_get", "parameters": [ { "name": "profile_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Profile Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "type": "array", "items": { "$ref": "#/components/schemas/ProfileSampleResponse" }, "title": "Response Get Profile Samples Profiles Profile Id Samples Get" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] } }, "/profiles/samples/{sample_id}": { "delete": { "summary": "Delete Profile Sample", "description": "Delete a profile sample.", "operationId": "delete_profile_sample_profiles_samples__sample_id__delete", "parameters": [ { "name": "sample_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Sample Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Profiles" ] } }, "/generate": { "post": { "summary": "Generate Speech", "description": "Generate speech from text using a voice profile.", "operationId": "generate_speech_generate_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GenerationRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/GenerationResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Generation" ] } }, "/history": { "get": { "summary": "List History", "description": "List generation history with optional filters.", "operationId": "list_history_history_get", "parameters": [ { "name": "profile_id", "in": "query", "required": false, "schema": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Profile Id" } }, { "name": "search", "in": "query", "required": false, "schema": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Search" } }, { "name": "limit", "in": "query", "required": false, "schema": { "type": "integer", "default": 50, "title": "Limit" } }, { "name": "offset", "in": "query", "required": false, "schema": { "type": "integer", "default": 0, "title": "Offset" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HistoryListResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "History" ] } }, "/history/{generation_id}": { "get": { "summary": "Get Generation", "description": "Get a generation by ID.", "operationId": "get_generation_history__generation_id__get", "parameters": [ { "name": "generation_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Generation Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HistoryResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "History" ] }, "delete": { "summary": "Delete Generation", "description": "Delete a generation.", "operationId": "delete_generation_history__generation_id__delete", "parameters": [ { "name": "generation_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Generation Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "History" ] } }, "/history/stats": { "get": { "summary": "Get Stats", "description": "Get generation statistics.", "operationId": "get_stats_history_stats_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } } }, "tags": [ "History" ] } }, "/transcribe": { "post": { "summary": "Transcribe Audio", "description": "Transcribe audio file to text.", "operationId": "transcribe_audio_transcribe_post", "requestBody": { "content": { "multipart/form-data": { "schema": { "$ref": "#/components/schemas/Body_transcribe_audio_transcribe_post" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/TranscriptionResponse" } } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Generation" ] } }, "/audio/{generation_id}": { "get": { "summary": "Get Audio", "description": "Serve generated audio file.", "operationId": "get_audio_audio__generation_id__get", "parameters": [ { "name": "generation_id", "in": "path", "required": true, "schema": { "type": "string", "title": "Generation Id" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Generation" ] } }, "/models/load": { "post": { "summary": "Load Model", "description": "Manually load TTS model.", "operationId": "load_model_models_load_post", "parameters": [ { "name": "model_size", "in": "query", "required": false, "schema": { "type": "string", "default": "1.7B", "title": "Model Size" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Models" ] } }, "/models/unload": { "post": { "summary": "Unload Model", "description": "Unload TTS model to free memory.", "operationId": "unload_model_models_unload_post", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } } }, "tags": [ "Models" ] } }, "/models/progress/{model_name}": { "get": { "summary": "Get Model Progress", "description": "Get model download progress via Server-Sent Events.", "operationId": "get_model_progress_models_progress__model_name__get", "parameters": [ { "name": "model_name", "in": "path", "required": true, "schema": { "type": "string", "title": "Model Name" } } ], "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Models" ] } }, "/models/status": { "get": { "summary": "Get Model Status", "description": "Get status of all available models.", "operationId": "get_model_status_models_status_get", "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ModelStatusListResponse" } } } } }, "tags": [ "Models" ] } }, "/models/download": { "post": { "summary": "Trigger Model Download", "description": "Trigger download of a specific model.", "operationId": "trigger_model_download_models_download_post", "requestBody": { "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ModelDownloadRequest" } } }, "required": true }, "responses": { "200": { "description": "Successful Response", "content": { "application/json": { "schema": {} } } }, "422": { "description": "Validation Error", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HTTPValidationError" } } } } }, "tags": [ "Models" ] } } }, "components": { "schemas": { "Body_add_profile_sample_profiles__profile_id__samples_post": { "properties": { "file": { "type": "string", "format": "binary", "title": "File" }, "reference_text": { "type": "string", "title": "Reference Text" } }, "type": "object", "required": [ "file", "reference_text" ], "title": "Body_add_profile_sample_profiles__profile_id__samples_post" }, "Body_transcribe_audio_transcribe_post": { "properties": { "file": { "type": "string", "format": "binary", "title": "File" }, "language": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Language" } }, "type": "object", "required": [ "file" ], "title": "Body_transcribe_audio_transcribe_post" }, "GenerationRequest": { "properties": { "profile_id": { "type": "string", "title": "Profile Id" }, "text": { "type": "string", "maxLength": 5000, "minLength": 1, "title": "Text" }, "language": { "type": "string", "pattern": "^(en|zh)$", "title": "Language", "default": "en" }, "seed": { "anyOf": [ { "type": "integer", "minimum": 0.0 }, { "type": "null" } ], "title": "Seed" }, "model_size": { "anyOf": [ { "type": "string", "pattern": "^(1\\.7B|0\\.6B)$" }, { "type": "null" } ], "title": "Model Size", "default": "1.7B" } }, "type": "object", "required": [ "profile_id", "text" ], "title": "GenerationRequest", "description": "Request model for voice generation." }, "GenerationResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "profile_id": { "type": "string", "title": "Profile Id" }, "text": { "type": "string", "title": "Text" }, "language": { "type": "string", "title": "Language" }, "audio_path": { "type": "string", "title": "Audio Path" }, "duration": { "type": "number", "title": "Duration" }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Seed" }, "created_at": { "type": "string", "format": "date-time", "title": "Created At" } }, "type": "object", "required": [ "id", "profile_id", "text", "language", "audio_path", "duration", "seed", "created_at" ], "title": "GenerationResponse", "description": "Response model for voice generation." }, "HTTPValidationError": { "properties": { "detail": { "items": { "$ref": "#/components/schemas/ValidationError" }, "type": "array", "title": "Detail" } }, "type": "object", "title": "HTTPValidationError" }, "HealthResponse": { "properties": { "status": { "type": "string", "title": "Status" }, "model_loaded": { "type": "boolean", "title": "Model Loaded" }, "model_downloaded": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "title": "Model Downloaded" }, "model_size": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Model Size" }, "gpu_available": { "type": "boolean", "title": "Gpu Available" }, "vram_used_mb": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "title": "Vram Used Mb" } }, "type": "object", "required": [ "status", "model_loaded", "gpu_available" ], "title": "HealthResponse", "description": "Response model for health check." }, "HistoryListResponse": { "properties": { "items": { "items": { "$ref": "#/components/schemas/HistoryResponse" }, "type": "array", "title": "Items" }, "total": { "type": "integer", "title": "Total" } }, "type": "object", "required": [ "items", "total" ], "title": "HistoryListResponse", "description": "Response model for history list." }, "HistoryResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "profile_id": { "type": "string", "title": "Profile Id" }, "profile_name": { "type": "string", "title": "Profile Name" }, "text": { "type": "string", "title": "Text" }, "language": { "type": "string", "title": "Language" }, "audio_path": { "type": "string", "title": "Audio Path" }, "duration": { "type": "number", "title": "Duration" }, "seed": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "title": "Seed" }, "created_at": { "type": "string", "format": "date-time", "title": "Created At" } }, "type": "object", "required": [ "id", "profile_id", "profile_name", "text", "language", "audio_path", "duration", "seed", "created_at" ], "title": "HistoryResponse", "description": "Response model for history entry (includes profile name)." }, "ModelDownloadRequest": { "properties": { "model_name": { "type": "string", "title": "Model Name" } }, "type": "object", "required": [ "model_name" ], "title": "ModelDownloadRequest", "description": "Request model for triggering model download." }, "ModelStatus": { "properties": { "model_name": { "type": "string", "title": "Model Name" }, "display_name": { "type": "string", "title": "Display Name" }, "downloaded": { "type": "boolean", "title": "Downloaded" }, "size_mb": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "title": "Size Mb" }, "loaded": { "type": "boolean", "title": "Loaded", "default": false } }, "type": "object", "required": [ "model_name", "display_name", "downloaded" ], "title": "ModelStatus", "description": "Response model for model status." }, "ModelStatusListResponse": { "properties": { "models": { "items": { "$ref": "#/components/schemas/ModelStatus" }, "type": "array", "title": "Models" } }, "type": "object", "required": [ "models" ], "title": "ModelStatusListResponse", "description": "Response model for model status list." }, "ProfileSampleResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "profile_id": { "type": "string", "title": "Profile Id" }, "audio_path": { "type": "string", "title": "Audio Path" }, "reference_text": { "type": "string", "title": "Reference Text" } }, "type": "object", "required": [ "id", "profile_id", "audio_path", "reference_text" ], "title": "ProfileSampleResponse", "description": "Response model for profile sample." }, "TranscriptionResponse": { "properties": { "text": { "type": "string", "title": "Text" }, "duration": { "type": "number", "title": "Duration" } }, "type": "object", "required": [ "text", "duration" ], "title": "TranscriptionResponse", "description": "Response model for transcription." }, "ValidationError": { "properties": { "loc": { "items": { "anyOf": [ { "type": "string" }, { "type": "integer" } ] }, "type": "array", "title": "Location" }, "msg": { "type": "string", "title": "Message" }, "type": { "type": "string", "title": "Error Type" } }, "type": "object", "required": [ "loc", "msg", "type" ], "title": "ValidationError" }, "VoiceProfileCreate": { "properties": { "name": { "type": "string", "maxLength": 100, "minLength": 1, "title": "Name" }, "description": { "anyOf": [ { "type": "string", "maxLength": 500 }, { "type": "null" } ], "title": "Description" }, "language": { "type": "string", "pattern": "^(en|zh)$", "title": "Language", "default": "en" } }, "type": "object", "required": [ "name" ], "title": "VoiceProfileCreate", "description": "Request model for creating a voice profile." }, "VoiceProfileResponse": { "properties": { "id": { "type": "string", "title": "Id" }, "name": { "type": "string", "title": "Name" }, "description": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Description" }, "language": { "type": "string", "title": "Language" }, "created_at": { "type": "string", "format": "date-time", "title": "Created At" }, "updated_at": { "type": "string", "format": "date-time", "title": "Updated At" } }, "type": "object", "required": [ "id", "name", "description", "language", "created_at", "updated_at" ], "title": "VoiceProfileResponse", "description": "Response model for voice profile." } } }, "tags": [ { "name": "General", "description": "Root and health check endpoints" }, { "name": "Profiles", "description": "Voice profile management" }, { "name": "Generation", "description": "Speech generation, transcription, and audio retrieval" }, { "name": "History", "description": "Generation history and statistics" }, { "name": "Models", "description": "Model loading, unloading, and status management" } ] }