diff --git a/API.md b/API.md new file mode 100644 index 0000000..f763ae6 --- /dev/null +++ b/API.md @@ -0,0 +1,449 @@ +# TTS Book Service API 文档 + +> 基于小米 MiMo TTS 的听书音频转换服务 API 参考 + +## 概览 + +| 分类 | 前缀 | 说明 | +|------|------|------| +| 听书 App 接口 | `/api/` | 供听书 App 调用的音频接入接口 | +| 实时 TTS | `/api/tts` | 实时生成并返回音频流 | +| 管理接口 | `/admin/api/` | Web 管理界面使用的 CRUD 接口 | +| 配置文件 | `/httpTts.json` | 听书 App 导入用配置 | + +**Base URL**: `http://:3333` + +--- + +## 一、实时 TTS 接口 + +### POST `/api/tts` + +实时调用 MiMo TTS 生成音频,直接返回 MP3 二进制流。 + +> 长文本会自动分段生成并拼接(每段 ≤ 2000 字符,在句末/段落边界智能切分)。 + +#### 请求格式 + +支持两种格式: + +**1. JSON 格式**(推荐) + +```http +POST /api/tts +Content-Type: application/json + +{ + "text": "要合成的文本内容", + "style": "开心", + "voice": "" +} +``` + +**2. Form-urlencoded 格式**(兼容百度风格) + +```http +POST /api/tts +Content-Type: application/x-www-form-urlencoded + +tex=%E8%A6%81%E5%90%88%E6%88%90%E7%9A%84%E6%96%87%E6%9C%AC +``` + +#### 参数说明 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `text` / `tex` | string | ✅ | 要合成的文本(JSON 用 `text`,Form 用 `tex`) | +| `style` | string | ❌ | 说话风格,如 `开心`、`语速慢`、`东北话`、`像个大将军` 等(见风格参考) | +| `voice` | string | ❌ | 音色名称,留空使用默认音色(`mimo_default`) | + +#### 响应 + +- **成功**: `200 OK`,`Content-Type: audio/mpeg`,响应体为 MP3 二进制数据 +- **失败**: `400` / `500`,`Content-Type: application/json` + +```json +{ + "status": 40000001, + "message": "text/tex 不能为空" +} +``` + +#### 示例 + +```bash +# cURL - JSON 格式 +curl -X POST http://localhost:3333/api/tts \ + -H "Content-Type: application/json" \ + -d '{"text": "你好,今天天气真好!", "style": "开心"}' \ + -o output.mp3 + +# cURL - 带音色指定 +curl -X POST http://localhost:3333/api/tts \ + -H "Content-Type: application/json" \ + -d '{"text": "从前有座山,山里有座庙。", "voice": "mimo_male_01"}' \ + -o output.mp3 +``` + +--- + +## 二、听书 App 音频接入接口 + +### GET `/api/book/{book_id}` + +获取书籍信息及章节列表,供听书 App 调用。 + +#### 路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `book_id` | string | 书籍唯一标识 | + +#### 响应 + +```json +{ + "book_id": "book_9", + "title": "三体", + "author": "刘慈欣", + "chapters": [ + { + "chapter_id": "chapter_1", + "app_chapter_id": "chapter1", + "title": "第一章 疯狂年代", + "status": "ready", + "audio_url": "/api/book/book_9/chapter/chapter_1/audio" + }, + { + "chapter_id": "chapter_2", + "app_chapter_id": "chapter2", + "title": "第二章 寂静的春天", + "status": "pending", + "audio_url": null + } + ] +} +``` + +| 字段 | 说明 | +|------|------| +| `status` | 章节音频状态:`pending`(待生成) / `generating`(生成中) / `ready`(就绪) / `error`(失败) | +| `audio_url` | 音频下载地址,仅 `status=ready` 时有值 | + +#### 错误响应 + +```json +{"detail": "书籍 book_9 不存在"} +``` +`404 Not Found` + +--- + +### GET `/api/book/{book_id}/chapter/{chapter_id}/audio` + +下载章节 MP3 音频文件。 + +#### 路径参数 + +| 参数 | 类型 | 说明 | +|------|------|------| +| `book_id` | string | 书籍 ID | +| `chapter_id` | string | 章节 ID | + +#### 响应 + +- **成功**: `200 OK`,`Content-Type: audio/mpeg`,MP3 文件流 +- **未生成**: `404`,`{"detail": "音频尚未生成,当前状态: pending"}` +- **不存在**: `404`,`{"detail": "章节不存在"}` + +--- + +## 三、管理接口 + +### 书籍管理 + +#### GET `/admin/api/books` + +获取所有书籍列表。 + +```json +[ + {"book_id": "book_9", "title": "三体", "author": "刘慈欣"}, + {"book_id": "book_12", "title": "活着", "author": "余华"} +] +``` + +#### POST `/admin/api/books` + +创建新书籍。 + +```http +Content-Type: application/json + +{ + "book_id": "book_9", + "title": "三体", + "author": "刘慈欣" +} +``` + +| 字段 | 必填 | 说明 | +|------|------|------| +| `book_id` | ✅ | 唯一标识 | +| `title` | ✅ | 书名 | +| `author` | ❌ | 作者 | + +**响应**: `{"ok": true, "book_id": "book_9"}` + +**错误**: `409 Conflict`(book_id 已存在)、`400`(缺少必填字段) + +#### DELETE `/admin/api/books/{book_id}` + +删除书籍及其所有章节。 + +**响应**: `{"ok": true}` + +--- + +### 章节管理 + +#### GET `/admin/api/books/{book_id}/chapters` + +获取书籍下的章节列表。 + +```json +[ + { + "chapter_id": "chapter_1", + "app_chapter_id": "chapter1", + "title": "第一章 疯狂年代", + "text_content": "这是章节文本的前200个字符...", + "text_length": 15000, + "status": "ready", + "error_msg": "", + "has_audio": true + } +] +``` + +> 注意: `text_content` 只返回前 200 个字符用于预览。完整文本需通过 PUT 接口编辑时获取。 + +#### POST `/admin/api/books/{book_id}/chapters` + +创建新章节。 + +```json +{ + "chapter_id": "chapter_1", + "app_chapter_id": "chapter1", + "title": "第一章", + "text_content": "章节正文内容..." +} +``` + +| 字段 | 必填 | 说明 | +|------|------|------| +| `chapter_id` | ✅ | 章节唯一标识 | +| `app_chapter_id` | ❌ | 听书 App 中的章节 ID,默认同 chapter_id | +| `title` | ❌ | 章节标题 | +| `text_content` | ❌ | TTS 文本内容 | + +#### PUT `/admin/api/books/{book_id}/chapters/{chapter_id}` + +更新章节信息(部分更新)。 + +```json +{ + "text_content": "更新后的文本内容...", + "title": "新标题" +} +``` + +所有字段均为可选,只更新传入的字段。 + +#### DELETE `/admin/api/books/{book_id}/chapters/{chapter_id}` + +删除单个章节。 + +--- + +### 音频生成 + +#### POST `/admin/api/books/{book_id}/chapters/{chapter_id}/generate` + +为单个章节生成音频。 + +> 长文本自动分段生成并拼接(每段 ≤ 2000 字符)。 + +**响应**: + +```json +{"ok": true, "status": "ready", "error_msg": ""} +``` + +或生成失败时: + +```json +{"ok": true, "status": "error", "error_msg": "MiMo TTS API 错误: HTTP 502"} +``` + +#### POST `/admin/api/books/{book_id}/generate-all` + +批量生成书籍下所有**未就绪**章节的音频。 + +> 按顺序逐章生成,可能需要较长时间。 + +**响应**: + +```json +{"ok": true, "total": 5, "chapter_ids": ["ch_1", "ch_2", "ch_3", "ch_4", "ch_5"]} +``` + +--- + +### TTS 试听 + +#### POST `/admin/api/tts/preview` + +试听 TTS 效果,返回生成的音频 URL。 + +```json +{ + "text": "你好,世界!", + "style": "开心", + "voice": "" +} +``` + +| 字段 | 必填 | 说明 | +|------|------|------| +| `text` | ✅ | 试听文本 | +| `style` | ❌ | 说话风格 | +| `voice` | ❌ | 音色名称 | + +**响应**: + +```json +{"ok": true, "url": "/audio/_preview/a1b2c3d4.mp3"} +``` + +--- + +### 配置查看 + +#### GET `/admin/api/config` + +查看当前服务配置。 + +```json +{ + "endpoint": "https://api.xiaomimimo.com/v1/chat/completions", + "model": "mimo-v2-audio-tts", + "voice": "mimo_default", + "api_key_masked": "sk-mi****", + "max_chunk_chars": 2000 +} +``` + +--- + +## 四、配置文件 + +### GET `/httpTts.json` + +提供听书 App 导入用的音频源配置文件(MiMo TTS 单条目)。 + +用户可直接在听书 App 中通过 URL 导入此配置。 + +--- + +## 五、MiMo TTS 风格参考 + +在 `style` 参数中填写风格关键词,MiMo TTS 支持以下类别: + +### 情感类 +| 风格 | 示例文本 | +|------|----------| +| 开心 | 今天真是太棒了!| +| 悲伤 | 他默默地离开了... | +| 生气 | 你怎么能这样做!| +| 平静 | 让我们慢慢来。| +| 惊讶 | 什么?这不可能!| +| 温柔 | 没关系,我在这里。| + +### 语速类 +| 风格 | 效果 | +|------|------| +| 语速慢 | 适合冥想、教学 | +| 语速快 | 适合新闻、解说 | +| 悄悄话 | 轻声细语 | + +### 角色类 +| 风格 | 效果 | +|------|------| +| 像个大将军 | 威严有力 | +| 像个小孩 | 稚嫩可爱 | +| 孙悟空 | 经典猴王 | +| 像个诗人 | 文艺优雅 | +| 像个老人 | 沧桑稳重 | + +### 方言类 +| 风格 | 说明 | +|------|------| +| 东北话 | 东北方言 | +| 四川话 | 四川方言 | +| 台湾腔 | 台湾口音 | +| 粤语 | 广东话 | +| 河南话 | 河南方言 | + +### 组合使用 + +风格可以组合,例如: + +```json +{ + "text": "今天天气真好", + "style": "开心 语速快" +} +``` + +--- + +## 六、错误码参考 + +| HTTP 状态码 | status 字段 | 含义 | +|-------------|-------------|------| +| 400 | 40000001 | 请求参数缺失或无效 | +| 404 | - | 资源不存在(书籍/章节/音频文件) | +| 409 | - | 资源冲突(ID 已存在) | +| 500 | 50000002 | 服务端错误(未配置 API Key 等) | +| 502 | - | MiMo TTS API 调用失败 | + +--- + +## 七、文本自动分段机制 + +当文本超过 **2000 字符**时,服务自动进行智能分段: + +### 分段策略 + +按优先级尝试在以下位置切分: + +1. **段落边界** — `\n\n` +2. **换行符** — `\n` +3. **中文句末标点** — `。!?…` +4. **英文句末标点** — `.!?` +5. **分号** — `;;` +6. **逗号** — `,,` +7. **硬切** — 以上都不匹配时按长度截断 + +### 处理流程 + +``` +原始文本 (10000字) + → 智能分段 → [段1(1800字), 段2(1950字), 段3(2000字), 段4(1900字), 段5(2350字)] + → 逐段调用 MiMo TTS → [wav1, wav2, wav3, wav4, wav5] + → ffmpeg 拼接 → 最终 MP3 +``` + +此过程对用户完全透明,`/api/tts` 和章节音频生成均自动支持。 diff --git a/README.md b/README.md index 993fe65..69b4200 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,16 @@ docker compose up -d ### Web 管理界面 (`/`) - 📖 书籍管理(添加/删除) - 📑 章节管理(添加/编辑/删除) -- 🎙️ TTS 试听(支持风格设置) -- ⚡ 单章/批量音频生成 +- 🎙️ TTS 试听(支持风格 + 音色设置) +- ⚡ 单章/批量音频生成(自动分段拼接) - ⚙️ 配置查看 +### 核心特性 +- **智能文本分段**: 长文本自动在句末/段落边界切分(≤2000字/段),逐段生成后拼接 +- **多风格支持**: 开心、悲伤、东北话、像个大将军... 任意组合 +- **音色切换**: 支持指定不同音色(voice 参数) +- **并发批量生成**: 限制 3 路并发,快速完成整本书 + ### 听书 App 接口 | 接口 | 方法 | 说明 | @@ -72,7 +78,16 @@ docker compose up -d | `MIMO_API_ENDPOINT` | ❌ | `https://api.xiaomimimo.com/v1/chat/completions` | API 地址 | | `MIMO_TTS_MODEL` | ❌ | `mimo-v2-audio-tts` | 模型名称 | | `MIMO_VOICE` | ❌ | `mimo_default` | 默认音色 | -| `SERVER_PORT` | ❌ | `17200` | 服务端口 | +| `SERVER_PORT` | ❌ | `3333` | 服务端口 | + +## 📖 API 文档 + +完整 API 文档见 [**API.md**](./API.md),包含: + +- 所有接口的请求/响应格式 +- 参数说明与错误码 +- MiMo TTS 风格参考大全 +- 文本自动分段机制说明 ## MiMo TTS 风格参考 diff --git a/app/main.py b/app/main.py index 37a58a0..9bf80a7 100644 --- a/app/main.py +++ b/app/main.py @@ -16,11 +16,12 @@ from pathlib import Path import httpx from fastapi import FastAPI, HTTPException, Query, Request -from fastapi.responses import FileResponse, HTMLResponse, JSONResponse +from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response from fastapi.staticfiles import StaticFiles from sqlalchemy import Column, Integer, String, Text, DateTime, func, select, delete from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker from sqlalchemy.orm import DeclarativeBase +from urllib.parse import parse_qs, unquote import config @@ -33,6 +34,83 @@ logging.basicConfig( ) logger = logging.getLogger("tts-service") +# ── Text Segmentation ───────────────────────────────────────────────────── + +# MiMo TTS 单次请求文本上限(保守值,实际约 5000) +MAX_CHUNK_CHARS = 2000 + +# 分割优先级: 段落 > 句子 > 逗号/分号 > 按长度硬切 +_SEGMENT_PATTERNS = [ + "\n\n", # 段落 + "\n", # 换行 + "。", "!", "?", "…", # 中文句末 + ".", "!", "?", # 英文句末 + ";", ";", # 分号 + ",", ",", # 逗号 +] + + +def split_text(text: str, max_chars: int = MAX_CHUNK_CHARS) -> list[str]: + """ + 智能分段:尽量在自然边界(段落/句子/标点)处切分, + 保证每段不超过 max_chars 字符。 + """ + text = text.strip() + if len(text) <= max_chars: + return [text] + + chunks: list[str] = [] + remaining = text + + while remaining: + if len(remaining) <= max_chars: + chunks.append(remaining) + break + + # 在 max_chars 范围内找最佳切割点 + window = remaining[:max_chars] + cut_pos = -1 + + for sep in _SEGMENT_PATTERNS: + idx = window.rfind(sep) + if idx > 0: + cut_pos = idx + len(sep) + break + + if cut_pos <= 0: + # 没找到任何分隔符,硬切 + cut_pos = max_chars + + chunk = remaining[:cut_pos].strip() + if chunk: + chunks.append(chunk) + remaining = remaining[cut_pos:].strip() + + return chunks + + +# ── Audio Concatenation ─────────────────────────────────────────────────── + +def concat_mp3_files(mp3_paths: list[str], output_path: str): + """用 ffmpeg 将多个 MP3 文件拼接为一个""" + # 创建 ffmpeg concat 文件列表 + list_path = output_path + ".concat_list.txt" + with open(list_path, "w") as f: + for p in mp3_paths: + f.write(f"file '{p}'\n") + + try: + result = subprocess.run( + ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", list_path, + "-codec:a", "libmp3lame", "-qscale:a", "2", output_path], + capture_output=True, text=True, + ) + if result.returncode != 0: + raise RuntimeError(f"ffmpeg 拼接失败: {result.stderr[:300]}") + finally: + os.remove(list_path) + + # ── Database ────────────────────────────────────────────────────────────── engine = create_async_engine(config.DATABASE_URL, echo=False) @@ -69,16 +147,17 @@ class Chapter(Base): # ── TTS Service ─────────────────────────────────────────────────────────── -async def call_mimo_tts(text: str, style: str = "") -> bytes: +async def call_mimo_tts(text: str, style: str = "", voice: str = "") -> bytes: """调用小米 MiMo TTS API,返回 WAV 音频字节""" if not config.MIMO_API_KEY: raise HTTPException(500, "MIMO_API_KEY 未配置,请设置环境变量") content = f"{text}" if style else text + use_voice = voice or config.MIMO_VOICE payload = { "model": config.MIMO_TTS_MODEL, - "audio": {"format": "wav", "voice": config.MIMO_VOICE}, + "audio": {"format": "wav", "voice": use_voice}, "messages": [{"role": "assistant", "content": content}], } @@ -125,7 +204,7 @@ def wav_to_mp3(wav_path: str, mp3_path: str): async def generate_chapter_audio(chapter_id_str: str): - """为指定章节生成音频(WAV → MP3)""" + """为指定章节生成音频(支持长文本自动分段拼接)""" async with async_session() as db: result = await db.execute(select(Chapter).where(Chapter.chapter_id == chapter_id_str)) chapter = result.scalar_one_or_none() @@ -145,20 +224,41 @@ async def generate_chapter_audio(chapter_id_str: str): audio_dir = Path(config.AUDIO_DIR) / chapter.book_id audio_dir.mkdir(parents=True, exist_ok=True) - wav_path = str(audio_dir / f"{chapter.chapter_id}.wav") mp3_path = str(audio_dir / f"{chapter.chapter_id}.mp3") + chunks = split_text(chapter.text_content) - # MiMo TTS 生成 WAV - wav_bytes = await call_mimo_tts(chapter.text_content) - with open(wav_path, "wb") as f: - f.write(wav_bytes) + if len(chunks) == 1: + # 单段:直接生成 + wav_bytes = await call_mimo_tts(chapter.text_content) + wav_path = str(audio_dir / f"{chapter.chapter_id}.wav") + with open(wav_path, "wb") as f: + f.write(wav_bytes) + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, wav_to_mp3, wav_path, mp3_path) + os.remove(wav_path) + else: + # 多段:逐段生成 → 拼接 + logger.info(f"章节 {chapter_id_str}: 文本 {len(chapter.text_content)} 字, 分 {len(chunks)} 段生成") + tmp_mp3_paths = [] + for i, chunk in enumerate(chunks): + wav_bytes = await call_mimo_tts(chunk) + tmp_id = f"{chapter.chapter_id}_part{i}" + wav_path = str(audio_dir / f"{tmp_id}.wav") + tmp_mp3 = str(audio_dir / f"{tmp_id}.mp3") - # WAV → MP3 - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, wav_to_mp3, wav_path, mp3_path) + with open(wav_path, "wb") as f: + f.write(wav_bytes) - # 删除 WAV 源文件,只保留 MP3 - os.remove(wav_path) + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, wav_to_mp3, wav_path, tmp_mp3) + os.remove(wav_path) + tmp_mp3_paths.append(tmp_mp3) + + # 拼接 + await loop.run_in_executor(None, concat_mp3_files, tmp_mp3_paths, mp3_path) + for p in tmp_mp3_paths: + os.remove(p) + logger.info(f"章节 {chapter_id_str}: {len(chunks)} 段拼接完成") chapter.audio_file = mp3_path chapter.status = "ready" @@ -166,6 +266,7 @@ async def generate_chapter_audio(chapter_id_str: str): except Exception as e: chapter.status = "error" chapter.error_msg = str(e)[:500] + logger.error(f"章节 {chapter_id_str} 生成失败: {e}") await db.commit() @@ -184,6 +285,12 @@ async def lifespan(app: FastAPI): app = FastAPI(title="TTS Book Service", lifespan=lifespan) +@app.get("/health") +async def health_check(): + """健康检查""" + return {"status": "ok", "service": "TTS Book Service", "api_key_configured": bool(config.MIMO_API_KEY)} + + # ── 听书 App 音频接入接口 ───────────────────────────────────────────────── @app.get("/api/book/{book_id}") @@ -242,9 +349,6 @@ async def get_chapter_audio(book_id: str, chapter_id: str): # ── 实时 TTS 接口(兼容听书 App 格式)───────────────────────────────────── -from fastapi.responses import Response -from urllib.parse import parse_qs - @app.post("/api/tts") async def realtime_tts(request: Request): """ @@ -256,6 +360,7 @@ async def realtime_tts(request: Request): """ text = "" style = "" + voice = "" content_type = request.headers.get("content-type", "") try: @@ -263,13 +368,13 @@ async def realtime_tts(request: Request): data = await request.json() text = data.get("text", "").strip() style = data.get("style", "").strip() + voice = data.get("voice", "").strip() else: # form-urlencoded (百度风格) body_bytes = await request.body() params = parse_qs(body_bytes.decode("utf-8")) text = (params.get("tex", [""])[0]).strip() # URL 解码(百度会 double-encode) - from urllib.parse import unquote text = unquote(unquote(text)) except Exception: pass @@ -282,28 +387,60 @@ async def realtime_tts(request: Request): ) try: - # MiMo TTS 生成 WAV - wav_bytes = await call_mimo_tts(text, style) + # 文本分段 + chunks = split_text(text) + logger.info(f"实时 TTS: text_len={len(text)}, chunks={len(chunks)}, style={style or '(默认)'}, voice={voice or '(默认)'}") - # WAV → MP3(临时文件) tmp_dir = Path(config.AUDIO_DIR) / "_tmp" tmp_dir.mkdir(parents=True, exist_ok=True) - tmp_id = uuid.uuid4().hex - wav_path = str(tmp_dir / f"{tmp_id}.wav") - mp3_path = str(tmp_dir / f"{tmp_id}.mp3") - with open(wav_path, "wb") as f: - f.write(wav_bytes) + if len(chunks) == 1: + # 单段:直接生成 + wav_bytes = await call_mimo_tts(text, style, voice) + tmp_id = uuid.uuid4().hex + wav_path = str(tmp_dir / f"{tmp_id}.wav") + mp3_path = str(tmp_dir / f"{tmp_id}.mp3") - loop = asyncio.get_event_loop() - await loop.run_in_executor(None, wav_to_mp3, wav_path, mp3_path) + with open(wav_path, "wb") as f: + f.write(wav_bytes) - with open(mp3_path, "rb") as f: - mp3_bytes = f.read() + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, wav_to_mp3, wav_path, mp3_path) - # 清理临时文件 - os.remove(wav_path) - os.remove(mp3_path) + with open(mp3_path, "rb") as f: + mp3_bytes = f.read() + + os.remove(wav_path) + os.remove(mp3_path) + else: + # 多段:逐段生成 → 拼接 + mp3_paths = [] + for i, chunk in enumerate(chunks): + wav_bytes = await call_mimo_tts(chunk, style, voice) + chunk_id = uuid.uuid4().hex + wav_path = str(tmp_dir / f"{chunk_id}.wav") + mp3_path = str(tmp_dir / f"{chunk_id}.mp3") + + with open(wav_path, "wb") as f: + f.write(wav_bytes) + + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, wav_to_mp3, wav_path, mp3_path) + os.remove(wav_path) + mp3_paths.append(mp3_path) + + # 拼接所有 MP3 + merged_id = uuid.uuid4().hex + merged_path = str(tmp_dir / f"{merged_id}.mp3") + await loop.run_in_executor(None, concat_mp3_files, mp3_paths, merged_path) + + with open(merged_path, "rb") as f: + mp3_bytes = f.read() + + # 清理 + for p in mp3_paths: + os.remove(p) + os.remove(merged_path) return Response(content=mp3_bytes, media_type="audio/mpeg") @@ -459,7 +596,7 @@ async def generate_audio(book_id: str, chapter_id: str): @app.post("/admin/api/books/{book_id}/generate-all") async def generate_all_chapters(book_id: str): - """批量生成书籍所有章节音频""" + """批量生成书籍所有章节音频(并发,限制 3 路)""" async with async_session() as db: result = await db.execute( select(Chapter).where(Chapter.book_id == book_id, Chapter.status != "ready") @@ -467,10 +604,31 @@ async def generate_all_chapters(book_id: str): chapters = result.scalars().all() chapter_ids = [ch.chapter_id for ch in chapters] - for cid in chapter_ids: - await generate_chapter_audio(cid) + if not chapter_ids: + return {"ok": True, "total": 0, "chapter_ids": [], "message": "没有需要生成的章节"} - return {"ok": True, "total": len(chapter_ids), "chapter_ids": chapter_ids} + # 并发生成,限制同时 3 个请求避免过载 + sem = asyncio.Semaphore(3) + + async def _gen(cid: str): + async with sem: + await generate_chapter_audio(cid) + + tasks = [_gen(cid) for cid in chapter_ids] + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 统计结果 + errors = [str(r) for r in results if isinstance(r, Exception)] + success_count = len(chapter_ids) - len(errors) + + return { + "ok": True, + "total": len(chapter_ids), + "success": success_count, + "failed": len(errors), + "errors": errors[:10] if errors else [], + "chapter_ids": chapter_ids, + } # --- TTS 试听 --- @@ -481,11 +639,12 @@ async def tts_preview(request: Request): data = await request.json() text = data.get("text", "").strip() style = data.get("style", "").strip() + voice = data.get("voice", "").strip() if not text: raise HTTPException(400, "文本不能为空") - wav_bytes = await call_mimo_tts(text, style) + wav_bytes = await call_mimo_tts(text, style, voice) audio_dir = Path(config.AUDIO_DIR) / "_preview" audio_dir.mkdir(parents=True, exist_ok=True) @@ -510,6 +669,7 @@ async def get_config(): "model": config.MIMO_TTS_MODEL, "voice": config.MIMO_VOICE, "api_key_masked": config.MIMO_API_KEY[:6] + "****" if config.MIMO_API_KEY else "未配置", + "max_chunk_chars": MAX_CHUNK_CHARS, } diff --git a/app/static/index.html b/app/static/index.html index c69cf89..413235f 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -113,9 +113,15 @@ audio{width:100%;margin-top:8px}
🎙️ TTS 试听
-
- - +
+
+ + +
+
+ + +
@@ -435,6 +441,7 @@ async function generateAll() { async function doPreview() { const text = document.getElementById('preview-text').value.trim(); const style = document.getElementById('preview-style').value.trim(); + const voice = document.getElementById('preview-voice').value.trim(); if (!text) { toast('请输入文本', 'error'); return; } const btn = document.getElementById('preview-btn'); btn.disabled = true; @@ -443,7 +450,7 @@ async function doPreview() { const res = await fetch('/admin/api/tts/preview', { method: 'POST', headers: {'Content-Type': 'application/json'}, - body: JSON.stringify({text, style}) + body: JSON.stringify({text, style, voice}) }); const data = await res.json(); if (data.ok) { diff --git a/httpTts-mimo.json b/httpTts-mimo.json index d40c2f2..1307c62 100644 --- a/httpTts-mimo.json +++ b/httpTts-mimo.json @@ -8,5 +8,5 @@ "loginUi": "[]", "loginUrl": "", "name": "3.小米MiMo TTS", - "url": "http://39.101.77.56:3333/api/tts,{\"method\": \"POST\", \"body\": {\"text\": \"{{speakText}}\"}}" + "url": "http://39.101.77.56:3333/api/tts,{\"method\": \"POST\", \"body\": {\"text\": \"{{speakText}}\", \"style\": \"\", \"voice\": \"\"}}" } diff --git a/httpTts.json b/httpTts.json index 41cb2d5..76b6280 100644 --- a/httpTts.json +++ b/httpTts.json @@ -27,7 +27,9 @@ "id": -31, "lastUpdateTime": 1774590209945, "loginCheckJs": "var response = result;\nif (response.headers().get(\"Content-Type\") != \"audio/mpeg\") {\n throw \"TTS生成失败: \" + response.body().string()\n}\nresponse", + "loginUi": "[]", + "loginUrl": "", "name": "3.小米MiMo TTS", - "url": "http://39.101.77.56:3333/api/tts,{\"method\": \"POST\", \"body\": {\"text\": \"{{speakText}}\"}}" + "url": "http://39.101.77.56:3333/api/tts,{\"method\": \"POST\", \"body\": {\"text\": \"{{speakText}}\", \"style\": \"\", \"voice\": \"\"}}" } ]