feat: support both JSON and form-urlencoded body; change default port to 3333

This commit is contained in:
TTS Service
2026-03-27 13:52:11 +08:00
parent 6c5caa308d
commit 4b5797575d
4 changed files with 32 additions and 22 deletions

View File

@@ -12,6 +12,6 @@ COPY app/ .
RUN mkdir -p /app/data /app/audio RUN mkdir -p /app/data /app/audio
EXPOSE 17200 EXPOSE 3333
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "17200"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3333"]

View File

@@ -12,7 +12,7 @@ MIMO_VOICE = os.environ.get("MIMO_VOICE", "mimo_default")
# 服务器配置 # 服务器配置
SERVER_HOST = os.environ.get("SERVER_HOST", "0.0.0.0") SERVER_HOST = os.environ.get("SERVER_HOST", "0.0.0.0")
SERVER_PORT = int(os.environ.get("SERVER_PORT", "17200")) SERVER_PORT = int(os.environ.get("SERVER_PORT", "3333"))
# 音频存储目录 # 音频存储目录
AUDIO_DIR = os.environ.get("AUDIO_DIR", os.path.join(BASE_DIR, "audio")) AUDIO_DIR = os.environ.get("AUDIO_DIR", os.path.join(BASE_DIR, "audio"))

View File

@@ -219,27 +219,43 @@ async def get_chapter_audio(book_id: str, chapter_id: str):
return FileResponse(chapter.audio_file, media_type="audio/mpeg", filename=f"{chapter_id}.mp3") return FileResponse(chapter.audio_file, media_type="audio/mpeg", filename=f"{chapter_id}.mp3")
# ── 实时 TTS 接口(兼容百度/阿里云风格)────────────────────────────────── # ── 实时 TTS 接口(兼容听书 App 格式)─────────────────────────────────────
from fastapi.responses import Response from fastapi.responses import Response
from urllib.parse import parse_qs
@app.post("/api/tts") @app.post("/api/tts")
async def realtime_tts(request: Request): async def realtime_tts(request: Request):
""" """
实时 TTS 生成接口 实时 TTS 生成接口
请求体 (JSON): 兼容两种 App 发送格式:
- text: 要合成的文本(必填) 1. JSON body: {"text": "内容", "style": "开心"}
- style: 风格(可选,如"开心""东北话" 2. Form body: tex=内容&spd=5 (百度风格)
- speed: 语速调整可选暂未使用MiMo 通过 style 控制语速) 返回: MP3 音频二进制流 (audio/mpeg),失败返回 JSON
返回: MP3 音频二进制流 (audio/mpeg)
""" """
text = ""
style = ""
content_type = request.headers.get("content-type", "")
try:
if "json" in content_type:
data = await request.json() data = await request.json()
text = data.get("text", "").strip() text = data.get("text", "").strip()
style = data.get("style", "").strip() style = data.get("style", "").strip()
else:
# form-urlencoded (百度风格)
body_bytes = await request.body()
params = parse_qs(body_bytes.decode("utf-8"))
text = (params.get("tex", [""])[0]).strip()
# URL 解码(百度会 double-encode
from urllib.parse import unquote
text = unquote(unquote(text))
except Exception:
pass
if not text: if not text:
return Response( return Response(
content=json.dumps({"status": 40000001, "message": "text 不能为空"}, ensure_ascii=False), content=json.dumps({"status": 40000001, "message": "text/tex 不能为空"}, ensure_ascii=False),
media_type="application/json", media_type="application/json",
status_code=400, status_code=400,
) )
@@ -248,7 +264,7 @@ async def realtime_tts(request: Request):
# MiMo TTS 生成 WAV # MiMo TTS 生成 WAV
wav_bytes = await call_mimo_tts(text, style) wav_bytes = await call_mimo_tts(text, style)
# WAV → MP3内存中完成 # WAV → MP3临时文件
tmp_dir = Path(config.AUDIO_DIR) / "_tmp" tmp_dir = Path(config.AUDIO_DIR) / "_tmp"
tmp_dir.mkdir(parents=True, exist_ok=True) tmp_dir.mkdir(parents=True, exist_ok=True)
tmp_id = uuid.uuid4().hex tmp_id = uuid.uuid4().hex
@@ -270,12 +286,6 @@ async def realtime_tts(request: Request):
return Response(content=mp3_bytes, media_type="audio/mpeg") return Response(content=mp3_bytes, media_type="audio/mpeg")
except HTTPException:
return Response(
content=json.dumps({"status": 50000001, "message": "TTS 生成失败"}, ensure_ascii=False),
media_type="application/json",
status_code=502,
)
except Exception as e: except Exception as e:
return Response( return Response(
content=json.dumps({"status": 50000002, "message": str(e)[:300]}, ensure_ascii=False), content=json.dumps({"status": 50000002, "message": str(e)[:300]}, ensure_ascii=False),

View File

@@ -5,13 +5,13 @@ services:
build: . build: .
container_name: tts-book-service container_name: tts-book-service
ports: ports:
- "17200:17200" - "3333:3333"
environment: environment:
- MIMO_API_KEY=${MIMO_API_KEY} - MIMO_API_KEY=${MIMO_API_KEY}
- MIMO_API_ENDPOINT=${MIMO_API_ENDPOINT:-https://api.xiaomimimo.com/v1/chat/completions} - MIMO_API_ENDPOINT=${MIMO_API_ENDPOINT:-https://api.xiaomimimo.com/v1/chat/completions}
- MIMO_TTS_MODEL=${MIMO_TTS_MODEL:-mimo-v2-audio-tts} - MIMO_TTS_MODEL=${MIMO_TTS_MODEL:-mimo-v2-audio-tts}
- MIMO_VOICE=${MIMO_VOICE:-mimo_default} - MIMO_VOICE=${MIMO_VOICE:-mimo_default}
- SERVER_PORT=17200 - SERVER_PORT=3333
volumes: volumes:
- tts-data:/app/data - tts-data:/app/data
- tts-audio:/app/audio - tts-audio:/app/audio