From 4b5797575d56696b7b6dcfe1644dd3a4b89c5675 Mon Sep 17 00:00:00 2001 From: TTS Service Date: Fri, 27 Mar 2026 13:52:11 +0800 Subject: [PATCH] feat: support both JSON and form-urlencoded body; change default port to 3333 --- Dockerfile | 4 ++-- app/config.py | 2 +- app/main.py | 44 +++++++++++++++++++++++++++----------------- docker-compose.yml | 4 ++-- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4cecacb..75b209b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,6 @@ COPY app/ . RUN mkdir -p /app/data /app/audio -EXPOSE 17200 +EXPOSE 3333 -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "17200"] +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3333"] diff --git a/app/config.py b/app/config.py index 3e231a9..e321555 100644 --- a/app/config.py +++ b/app/config.py @@ -12,7 +12,7 @@ MIMO_VOICE = os.environ.get("MIMO_VOICE", "mimo_default") # 服务器配置 SERVER_HOST = os.environ.get("SERVER_HOST", "0.0.0.0") -SERVER_PORT = int(os.environ.get("SERVER_PORT", "17200")) +SERVER_PORT = int(os.environ.get("SERVER_PORT", "3333")) # 音频存储目录 AUDIO_DIR = os.environ.get("AUDIO_DIR", os.path.join(BASE_DIR, "audio")) diff --git a/app/main.py b/app/main.py index a32b839..ed50bd1 100644 --- a/app/main.py +++ b/app/main.py @@ -219,27 +219,43 @@ async def get_chapter_audio(book_id: str, chapter_id: str): return FileResponse(chapter.audio_file, media_type="audio/mpeg", filename=f"{chapter_id}.mp3") -# ── 实时 TTS 接口(兼容百度/阿里云风格)────────────────────────────────── +# ── 实时 TTS 接口(兼容听书 App 格式)───────────────────────────────────── from fastapi.responses import Response +from urllib.parse import parse_qs @app.post("/api/tts") async def realtime_tts(request: Request): """ 实时 TTS 生成接口 - 请求体 (JSON): - - text: 要合成的文本(必填) - - style: 风格(可选,如"开心"、"东北话") - - speed: 语速调整(可选,暂未使用,MiMo 通过 style 控制语速) - 返回: MP3 音频二进制流 (audio/mpeg) + 兼容两种 App 发送格式: + 1. JSON body: {"text": "内容", "style": "开心"} + 2. Form body: tex=内容&spd=5 (百度风格) + 返回: MP3 音频二进制流 (audio/mpeg),失败返回 JSON """ - data = await request.json() - text = data.get("text", "").strip() - style = data.get("style", "").strip() + text = "" + style = "" + content_type = request.headers.get("content-type", "") + + try: + if "json" in content_type: + data = await request.json() + text = data.get("text", "").strip() + style = data.get("style", "").strip() + else: + # form-urlencoded (百度风格) + body_bytes = await request.body() + params = parse_qs(body_bytes.decode("utf-8")) + text = (params.get("tex", [""])[0]).strip() + # URL 解码(百度会 double-encode) + from urllib.parse import unquote + text = unquote(unquote(text)) + except Exception: + pass if not text: return Response( - content=json.dumps({"status": 40000001, "message": "text 不能为空"}, ensure_ascii=False), + content=json.dumps({"status": 40000001, "message": "text/tex 不能为空"}, ensure_ascii=False), media_type="application/json", status_code=400, ) @@ -248,7 +264,7 @@ async def realtime_tts(request: Request): # MiMo TTS 生成 WAV wav_bytes = await call_mimo_tts(text, style) - # WAV → MP3(内存中完成) + # WAV → MP3(临时文件) tmp_dir = Path(config.AUDIO_DIR) / "_tmp" tmp_dir.mkdir(parents=True, exist_ok=True) tmp_id = uuid.uuid4().hex @@ -270,12 +286,6 @@ async def realtime_tts(request: Request): return Response(content=mp3_bytes, media_type="audio/mpeg") - except HTTPException: - return Response( - content=json.dumps({"status": 50000001, "message": "TTS 生成失败"}, ensure_ascii=False), - media_type="application/json", - status_code=502, - ) except Exception as e: return Response( content=json.dumps({"status": 50000002, "message": str(e)[:300]}, ensure_ascii=False), diff --git a/docker-compose.yml b/docker-compose.yml index e64efb2..02a4a76 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,13 +5,13 @@ services: build: . container_name: tts-book-service ports: - - "17200:17200" + - "3333:3333" environment: - MIMO_API_KEY=${MIMO_API_KEY} - MIMO_API_ENDPOINT=${MIMO_API_ENDPOINT:-https://api.xiaomimimo.com/v1/chat/completions} - MIMO_TTS_MODEL=${MIMO_TTS_MODEL:-mimo-v2-audio-tts} - MIMO_VOICE=${MIMO_VOICE:-mimo_default} - - SERVER_PORT=17200 + - SERVER_PORT=3333 volumes: - tts-data:/app/data - tts-audio:/app/audio