From 3129d45b2540d478a370489ad65549f4a4d6a4a6 Mon Sep 17 00:00:00 2001
From: Kevin Wong <lamnickdavid@gmail.com>
Date: Mon, 9 Feb 2026 14:47:19 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Docs/BACKEND_README.md                        |   2 +
 Docs/DevLogs/Day21.md                         | 132 ++++++++
 Docs/DevLogs/Day22.md                         | 221 +++++++++++++
 Docs/task_complete.md                         |   7 +-
 backend/app/modules/ai/router.py              |  27 ++
 backend/app/modules/videos/schemas.py         |   4 +-
 backend/app/modules/videos/workflow.py        | 296 +++++++++++++++--
 backend/app/services/glm_service.py           |  44 +++
 backend/app/services/video_service.py         | 106 ++++++
 backend/app/services/voice_clone_service.py   |   2 +-
 backend/app/services/whisper_service.py       |  39 ++-
 frontend/package-lock.json                    |  56 ++++
 frontend/package.json                         |   3 +
 .../features/home/model/useHomeController.ts  | 201 +++++++++---
 .../features/home/model/useHomePersistence.ts |  52 +--
 .../src/features/home/model/useMaterials.ts   |  88 +++--
 .../features/home/ui/GenerateActionBar.tsx    |  77 +++--
 frontend/src/features/home/ui/HomePage.tsx    |  25 +-
 .../src/features/home/ui/MaterialSelector.tsx | 306 +++++++++++++-----
 .../src/features/home/ui/ScriptEditor.tsx     |  98 +++++-
 .../features/home/ui/TitleSubtitlePanel.tsx   |  24 +-
 frontend/src/shared/types/material.ts         |   7 +
 models/Qwen3-TTS/qwen_tts_server.py           |   6 +-
 23 files changed, 1529 insertions(+), 294 deletions(-)
 create mode 100644 Docs/DevLogs/Day22.md
 create mode 100644 frontend/src/shared/types/material.ts

diff --git a/Docs/BACKEND_README.md b/Docs/BACKEND_README.md
index 9889472..7c4b697 100644
--- a/Docs/BACKEND_README.md
+++ b/Docs/BACKEND_README.md
@@ -108,6 +108,8 @@ backend/
 
 `POST /api/videos/generate` 支持以下可选字段：
 
+- `material_path`: 视频素材路径（单素材模式）
+- `material_paths`: 多素材路径数组（多机位模式，≥2 个素材时按句子自动切换）
 - `tts_mode`: TTS 模式 (`edgetts` / `voiceclone`)
 - `voice`: EdgeTTS 音色 ID（edgetts 模式）
 - `ref_audio_id` / `ref_text`: 参考音频 ID 与文本（voiceclone 模式）
diff --git a/Docs/DevLogs/Day21.md b/Docs/DevLogs/Day21.md
index 558cfe4..50f7285 100644
--- a/Docs/DevLogs/Day21.md
+++ b/Docs/DevLogs/Day21.md
@@ -315,3 +315,135 @@ npm run build && pm2 restart vigent2-frontend  # 刷脸验证UI
 pm2 restart vigent2-backend
 npm run build && pm2 restart vigent2-frontend
 ```
+
+---
+
+## 🎬 多素材视频生成（多机位效果）
+
+### 概述
+支持用户上传多个不同角度的自拍视频，生成视频时按句子自动切换素材，最终效果类似多机位拍摄。单素材时走原有流程，无额外开销。
+
+### 核心架构
+
+#### 流水线变更
+```
+【单素材（不变）】
+text → TTS → audio → LatentSync(1个素材+完整audio) → Whisper字幕 → Remotion → 成片
+
+【多素材（新增）】
+text → TTS → audio → Whisper字幕(提前) → 按素材数量均分时长(对齐字边界)
+  → 对每段: 切分audio + LatentSync(素材[i]+音频片段[i])
+  → FFmpeg拼接所有片段 → Remotion(完整字幕时间戳) → 成片
+```
+
+#### 素材切换逻辑（均分方案）
+1. Whisper 对完整音频转录，得到字级别时间戳
+2. 按素材数量**均分音频总时长**（`total_duration / N`）
+3. 每个分割点对齐到最近的 Whisper 字边界，避免在字中间切分
+4. 首段 start 扩展为 0.0，末段 end 扩展为音频结尾，确保完整覆盖
+
+> **设计决策**：最初方案基于原始文案标点分句，但用户文案往往不含句号（只有逗号），导致只产生 1 段。改为均分方案后不依赖文案标点，对任何输入都能正确切分。
+
+---
+
+### 一、后端改动
+
+#### 1. `backend/app/modules/videos/schemas.py`
+- 新增 `material_paths: Optional[List[str]]` 字段
+- 保留 `material_path: str` 向后兼容
+
+#### 2. `backend/app/modules/videos/workflow.py`（核心改动）
+
+**新增函数**：
+- `_split_equal(segments, material_paths)`: 按素材数量均分音频时长，对齐到最近的 Whisper 字边界
+
+**修改 `process_video_generation()`**：
+- `is_multi = len(material_paths) > 1` 判断走多素材/单素材分支
+- 多素材分支：Whisper 提前 → 均分切分 → 音频切分 → 逐段 LatentSync → FFmpeg 拼接
+
+#### 3. `backend/app/services/video_service.py`
+- 新增 `concat_videos()`: FFmpeg concat demuxer (`-c copy`) 拼接视频片段
+- 新增 `split_audio()`: FFmpeg 按时间范围切分音频 (`-ss` + `-t` + `-c copy`)
+
+#### 4. `backend/scripts/watchdog.py`
+- 健康检查阈值从 3 次提高到 5 次（容忍期 2.5 分钟）
+- 新增重启后 120 秒冷却期，避免模型加载期间被误判为故障
+- 启动时给所有服务 60 秒初始冷却期
+
+---
+
+### 二、前端改动
+
+#### 1. 新增依赖
+```bash
+npm install @dnd-kit/core @dnd-kit/sortable @dnd-kit/utilities
+```
+
+#### 2. `frontend/src/features/home/model/useMaterials.ts`
+- `selectedMaterial: string` → `selectedMaterials: string[]`（多选）
+- 新增 `toggleMaterial(id)`: 切换选中/取消（至少保留1个）
+- 新增 `reorderMaterials(activeId, overId)`: 拖拽排序
+- 上传格式扩展：新增 `.mkv/.webm/.flv/.wmv/.m4v/.ts/.mts`
+
+#### 3. `frontend/src/features/home/ui/MaterialSelector.tsx`（重写）
+- 素材列表每行增加复选框 + 序号徽标（①②③）
+- 选中 ≥2 个时显示拖拽排序区（@dnd-kit `SortableContext`）
+- 每个排序项：拖拽把手 + 序号 + 素材名 + 移除按钮
+- HTML input accept 改为 `video/*`
+
+#### 4. `frontend/src/features/home/model/useHomeController.ts`
+- 多素材 payload：`material_paths` 数组 + `material_path` 向后兼容
+- `enable_subtitles` 硬编码为 `true`（移除开关）
+- 验证：至少选中 1 个素材
+
+#### 5. `frontend/src/features/home/model/useHomePersistence.ts`
+- 素材持久化改为 JSON 数组，向后兼容旧格式（单字符串）
+- 移除 `enableSubtitles` 持久化
+
+#### 6. `frontend/src/features/home/ui/TitleSubtitlePanel.tsx`
+- 移除"逐字高亮字幕"开关，字幕样式区始终显示
+
+#### 7. `frontend/src/features/home/ui/HomePage.tsx`
+- 更新 props 传递（`selectedMaterials`, `toggleMaterial`, `reorderMaterials`）
+
+---
+
+### 三、Bug 修复记录
+
+#### BUG-1: 多素材只使用第一个视频（基于标点的分句方案失败）
+- **现象**: 选了 2 个素材但生成的视频只使用第 1 个，日志显示 `Multi-material: 1 segments, 2 materials`。
+- **根因 v1**: 最初通过正则 `[。！？!?]` 在 Whisper 输出中分句，但 Whisper 不输出标点。
+- **修复 v1**: 改为用原始文案标点分句——但用户文案往往只含逗号（，），无句末标点（。！？），仍退化为 1 段。
+- **最终修复**: 彻底放弃基于标点的分句方案，改为 `_split_equal()` **按素材数量均分音频时长**，对齐到最近的 Whisper 字边界。不依赖任何标点符号，对所有文案均有效。
+
+#### BUG-2: 口型对不上（音频时间偏移）
+- **根因**: `split_audio` 用 Whisper 的 start/end 时间（如 0.11~7.21）切分音频，但 `compose()` 用完整原始音频（0.0~结尾）合成，导致时间偏移。
+- **修复**: 强制首段 start=0.0，末段 end=音频实际时长，确保切分音频完整覆盖。
+
+#### BUG-3: min_segment_sec 过度合并导致退化（已随方案切换移除）
+- **根因**: 旧方案中 2 个句子第 2 句不足 3 秒时，最短时长检查合并为 1 段，多素材退化为单素材。
+- **状态**: 均分方案不存在此问题，相关代码已移除。
+
+---
+
+### 涉及文件汇总
+
+| 文件 | 变更类型 | 说明 |
+|------|----------|------|
+| `backend/app/modules/videos/schemas.py` | 修改 | 新增 material_paths 字段 |
+| `backend/app/modules/videos/workflow.py` | 修改 | 多素材流水线核心逻辑 + 3个 Bug 修复 |
+| `backend/app/services/video_service.py` | 修改 | 新增 concat_videos / split_audio |
+| `backend/scripts/watchdog.py` | 修改 | 阈值优化 + 冷却期机制 |
+| `frontend/package.json` | 修改 | 新增 @dnd-kit 依赖 |
+| `frontend/src/features/home/model/useMaterials.ts` | 修改 | 多选 + 排序状态管理 |
+| `frontend/src/features/home/ui/MaterialSelector.tsx` | 重写 | 多选复选框 + 拖拽排序 UI |
+| `frontend/src/features/home/model/useHomeController.ts` | 修改 | 多素材 payload + 移除字幕开关 |
+| `frontend/src/features/home/model/useHomePersistence.ts` | 修改 | JSON 数组持久化 |
+| `frontend/src/features/home/ui/TitleSubtitlePanel.tsx` | 修改 | 移除字幕开关 |
+| `frontend/src/features/home/ui/HomePage.tsx` | 修改 | 更新 props 传递 |
+
+### 重启要求
+```bash
+pm2 restart vigent2-backend
+npm run build && pm2 restart vigent2-frontend
+```
diff --git a/Docs/DevLogs/Day22.md b/Docs/DevLogs/Day22.md
new file mode 100644
index 0000000..8bf33b6
--- /dev/null
+++ b/Docs/DevLogs/Day22.md
@@ -0,0 +1,221 @@
+## 🔧 多素材生成优化与健壮性加固 (Day 22)
+
+### 概述
+对 Day 21 实现的多素材视频生成（多机位）功能进行全面审查，修复 6 个高优先级 Bug、完成 8 项体验优化，并将多素材流水线从"逐段 LatentSync"重构为"先拼接再推理"架构，推理次数从 N 次降为 1 次。
+
+---
+
+### 一、后端高优 Bug 修复
+
+#### 1. `_split_equal()` 素材数 > 字符数边界溢出
+- **问题**: 5 个素材但只有 2 个 Whisper 字符时，边界索引重复，部分素材被跳过
+- **修复**: 加入 `n = min(n, len(all_chars))` 上限保护
+- **文件**: `backend/app/modules/videos/workflow.py`
+
+#### 2. 多素材 LatentSync 单段失败无 fallback
+- **问题**: 单素材模式下 LatentSync 失败会 fallback 到原始素材，但多素材模式直接抛异常，整个任务失败
+- **修复**: 多素材循环中加 try-except，失败时 fallback 到原始素材片段
+- **文件**: `backend/app/modules/videos/workflow.py`
+
+#### 3. `num_segments == 0` 时 ZeroDivisionError
+- **问题**: 所有 assignments 被跳过后 `i / num_segments` 触发除零
+- **修复**: 循环前加 `if num_segments == 0` 检查并抛出明确错误
+- **文件**: `backend/app/modules/videos/workflow.py`
+
+#### 4. `split_audio` 未校验 duration > 0
+- **问题**: `end <= start` 时 FFmpeg 行为异常
+- **修复**: 加入 `if duration <= 0: raise ValueError(...)`
+- **文件**: `backend/app/services/video_service.py`
+
+#### 5. Whisper 失败时按时长均分兜底
+- **问题**: Whisper 失败后直接退化为单素材，其他素材被浪费
+- **修复**: 按 `audio_duration / len(material_paths)` 均分，不依赖字符对齐
+- **文件**: `backend/app/modules/videos/workflow.py`
+
+#### 6. `concat_videos` 空列表未检查
+- **问题**: 传入空 `video_paths` 时 FFmpeg 报错
+- **修复**: 加入 `if not video_paths: raise ValueError(...)`
+- **文件**: `backend/app/services/video_service.py`
+
+---
+
+### 二、前端优化
+
+#### 1. payload 构建非空断言修复
+- `m!.path` → `m?.path` + `.filter(Boolean)`，防止素材被删后 crash
+- **文件**: `frontend/src/features/home/model/useHomeController.ts`
+
+#### 2. 生成按钮展示后端进度消息
+- 新增 `message` prop，生成中显示如"(正在处理片段 2/3...)"
+- **文件**: `frontend/src/features/home/ui/GenerateActionBar.tsx`, `HomePage.tsx`
+
+#### 3. 新上传素材自动选中
+- 上传成功后对比前后素材列表，新增的 ID 自动追加到 `selectedMaterials`
+- **文件**: `frontend/src/features/home/model/useMaterials.ts`
+
+#### 4. Material 接口统一
+- 三处 `interface Material` 重复定义提取到 `shared/types/material.ts`
+- **文件**: `frontend/src/shared/types/material.ts` (新建), `useMaterials.ts`, `useHomeController.ts`, `MaterialSelector.tsx`
+
+#### 5. 拖拽排序修复
+- 移除 `DragOverlay`（`backdrop-blur` 创建新 containing block 导致定位错乱）
+- 改为 `useSortable` 原生拖拽 + `CSS.Translate`，拖拽中元素高亮加阴影
+- **文件**: `frontend/src/features/home/ui/MaterialSelector.tsx`
+
+#### 6. 素材选择上限 4 个
+- `toggleMaterial` 新增 `MAX_MATERIALS = 4` 限制
+- UI 选满后未选中项变半透明禁用，提示文字改为"可多选，最多4个"
+- **文件**: `useMaterials.ts`, `MaterialSelector.tsx`
+
+#### 7. 移动端排序区域响应式
+- 素材列表 `max-h-64` → `max-h-48 sm:max-h-64`
+- **文件**: `MaterialSelector.tsx`
+
+#### 8. 多素材耗时提示
+- 选中 ≥2 素材时生成按钮下方显示"多素材模式 (N 个机位)，生成耗时较长"
+- **文件**: `GenerateActionBar.tsx`, `HomePage.tsx`
+
+---
+
+### 三、核心架构重构：先拼接再推理
+
+#### V1 (Day 21): 逐段 LatentSync
+```
+素材A → LatentSync(素材A, 音频片段1) → lipsync_A
+素材B → LatentSync(素材B, 音频片段2) → lipsync_B
+FFmpeg concat(lipsync_A, lipsync_B) → 最终视频
+```
+- 缺点：N 个素材 = N 次 LatentSync 推理（每次 ~30s）
+
+#### V2 (Day 22): 先拼接再推理
+```
+素材A → prepare_segment(裁剪到3.67s) → prepared_A
+素材B → prepare_segment(裁剪到4.00s) → prepared_B
+FFmpeg concat(prepared_A, prepared_B) → concat_video (7.67s)
+LatentSync(concat_video, 完整音频) → 最终视频
+```
+- 优点：只需 **1 次** LatentSync 推理，时间从 N×30s 降为 1×30s
+
+#### 新增 `prepare_segment()` 方法
+```python
+def prepare_segment(self, video_path, target_duration, output_path, target_resolution=None):
+    # 素材时长 > 目标: 裁剪 (-t)
+    # 素材时长 < 目标: 循环 (-stream_loop) + 裁剪
+    # 分辨率一致: -c copy 无损 (不重编码)
+    # 分辨率不一致: scale + pad 统一到第一个素材分辨率
+```
+
+#### 分辨率处理策略
+- 新增 `get_resolution()` 方法检测各素材分辨率
+- 所有素材分辨率相同时：`-c copy` 无损裁剪（保持原画质）
+- 分辨率不一致时：统一到第一个素材的分辨率，`force_original_aspect_ratio=decrease` + `pad` 居中
+- LatentSync 只处理嘴部 512×512 区域，输出保持原分辨率
+
+#### 时间对齐验证
+
+| 环节 | 时间基准 | 对齐关系 |
+|------|---------|---------|
+| TTS 音频 | 原始时长 (7.67s) | 基准 |
+| Whisper 字幕 | 基于 TTS 音频 | 时间戳对齐音频 |
+| 均分切分 | assignments 总时长 = 音频时长 | 首段 start=0, 末段 end=audio_duration |
+| prepare 各段 | `-t seg_dur` 精确截断 | 总和 ≈ 音频时长 |
+| LatentSync | concat_video + 完整音频 | 内部 0.5s 容差 |
+| compose | lipsync_video + 音频/BGM | `-shortest` 保证同步 |
+| Remotion | 基于 captions_path 渲染字幕 | 时间戳对齐音频 |
+
+---
+
+### 涉及文件汇总
+
+| 文件 | 变更类型 | 说明 |
+|------|----------|------|
+| `backend/app/modules/videos/workflow.py` | 修改 | 6 个 Bug 修复 + 流水线重构（先拼接再推理）|
+| `backend/app/services/video_service.py` | 修改 | 新增 `prepare_segment()`、`get_resolution()`，`split_audio` 校验，`concat_videos` 空列表检查 |
+| `frontend/src/shared/types/material.ts` | 新建 | 统一 Material 接口 |
+| `frontend/src/features/home/model/useMaterials.ts` | 修改 | 上传自动选中、素材上限 4 个 |
+| `frontend/src/features/home/model/useHomeController.ts` | 修改 | payload 非空断言修复、Material 接口引用 |
+| `frontend/src/features/home/ui/MaterialSelector.tsx` | 修改 | 拖拽修复、上限 4 个 UI、移动端响应式 |
+| `frontend/src/features/home/ui/GenerateActionBar.tsx` | 修改 | 进度消息展示、多素材耗时提示 |
+| `frontend/src/features/home/ui/HomePage.tsx` | 修改 | 传递 message、materialCount prop |
+
+---
+
+### 四、AI 多语言翻译
+
+#### 功能
+在文案编辑区新增「AI多语言」按钮，支持将中文口播文案一键翻译为 9 种语言，并可随时还原原文。
+
+#### 支持语言
+英语 English、日语 日本語、韩语 한국어、法语 Français、德语 Deutsch、西班牙语 Español、俄语 Русский、意大利语 Italiano、葡萄牙语 Português
+
+#### 实现
+
+##### 后端
+- **`backend/app/services/glm_service.py`** — 新增 `translate_text()` 方法，调用智谱 GLM API（temperature=0.3），prompt 要求只返回译文、保持语气风格
+- **`backend/app/modules/ai/router.py`** — 新增 `POST /api/ai/translate` 接口，接收 `{text, target_lang}`，返回 `{translated_text}`
+
+##### 前端
+- **`frontend/src/features/home/ui/ScriptEditor.tsx`** — 新增 `LANGUAGES` 列表（9 种语言）、语言下拉菜单（点击外部自动关闭）、翻译中 loading 状态、「还原原文」按钮（翻译过后出现在菜单顶部）
+- **`frontend/src/features/home/model/useHomeController.ts`** — 新增 `handleTranslate`（调用翻译 API、首次翻译保存原文）、`originalText` 状态、`handleRestoreOriginal`（恢复原文）
+
+#### 涉及文件
+
+| 文件 | 变更 | 说明 |
+|------|------|------|
+| `backend/app/services/glm_service.py` | 修改 | 新增 `translate_text()` 方法 |
+| `backend/app/modules/ai/router.py` | 修改 | 新增 `/api/ai/translate` 接口 |
+| `frontend/src/features/home/ui/ScriptEditor.tsx` | 修改 | 语言菜单 UI、翻译 loading、还原原文按钮 |
+| `frontend/src/features/home/model/useHomeController.ts` | 修改 | `handleTranslate`、`originalText`、`handleRestoreOriginal` |
+
+---
+
+### 五、TTS 多语言支持
+
+#### 背景
+翻译功能实现后，用户可将中文文案翻译为其他语言。但翻译后生成视频时 TTS 仍只支持中文：
+- **EdgeTTS**：声音列表只有 5 个 `zh-CN-*` 中文声音
+- **声音克隆 (Qwen3-TTS)**：`language` 参数硬编码为 `"Chinese"`
+
+#### 实现方案
+
+##### 1. 前端：语言感知的声音列表
+- `VOICES` 从扁平数组扩展为 `Record<string, VoiceOption[]>`，覆盖 10 种语言（zh-CN / en-US / ja-JP / ko-KR / fr-FR / de-DE / es-ES / ru-RU / it-IT / pt-BR），每种语言 2 个声音（男/女）
+- 新增 `LANG_TO_LOCALE` 映射：翻译目标语言名 → EdgeTTS locale（如 `"English" → "en-US"`）
+- 新增 `textLang` 状态，跟踪当前文案语言，默认 `"zh-CN"`
+
+##### 2. 翻译时自动切换声音
+- `handleTranslate` 成功后：根据目标语言设置 `textLang`，EdgeTTS 模式下自动切换 `voice` 为目标语言的默认声音
+- `handleRestoreOriginal` 还原时：重置 `textLang` 为 `"zh-CN"`，恢复中文默认声音
+- `VoiceSelector` 根据 `textLang` 动态显示对应语言的声音列表
+
+##### 3. 声音克隆语言透传
+- 前端：新增 `LOCALE_TO_QWEN_LANG` 映射（`zh-CN→"Chinese"`, `en-US→"English"`, 其他→`"Auto"`）
+- 生成请求 payload 加入 `language` 字段（仅声音克隆模式）
+- 后端 `GenerateRequest` schema 新增 `language: str = "Chinese"` 字段
+- `workflow.py`：`language="Chinese"` 硬编码改为 `language=req.language`
+
+##### 4. Bug 修复：textLang 持久化
+- **问题**: `voice` 已持久化但 `textLang` 未持久化，刷新页面后 `voice` 恢复为英文声音但 `textLang` 默认回中文，导致 VoiceSelector 显示中文声音列表却选中英文声音，无高亮按钮
+- **修复**: 在 `useHomePersistence` 中加入 `textLang` 的 localStorage 读写
+
+#### 数据流
+
+```
+用户翻译 "English"
+  → ScriptEditor.onTranslate("English")
+  → LANG_TO_LOCALE["English"] = "en-US"
+  → setTextLang("en-US"), setVoice("en-US-GuyNeural")
+  → VoiceSelector 显示 VOICES["en-US"] = [Guy, Jenny]
+  → 生成时:
+      EdgeTTS: payload.voice = "en-US-GuyNeural"
+      声音克隆: payload.language = "English" (via getQwenLanguage)
+```
+
+#### 涉及文件
+
+| 文件 | 变更 | 说明 |
+|------|------|------|
+| `frontend/src/features/home/model/useHomeController.ts` | 修改 | VOICES 多语言 Record、textLang 状态、LANG_TO_LOCALE / LOCALE_TO_QWEN_LANG 映射、翻译自动切换 voice |
+| `frontend/src/features/home/model/useHomePersistence.ts` | 修改 | textLang 持久化读写 |
+| `backend/app/modules/videos/schemas.py` | 修改 | GenerateRequest 加 `language` 字段 |
+| `backend/app/modules/videos/workflow.py` | 修改 | 声音克隆调用处用 `req.language` 替代硬编码 |
diff --git a/Docs/task_complete.md b/Docs/task_complete.md
index 875c449..e72ac9b 100644
--- a/Docs/task_complete.md
+++ b/Docs/task_complete.md
@@ -10,7 +10,7 @@
 
 > 这里记录了每一天的核心开发内容与 milestone。
 
-### Day 21: 缺陷修复 + 浮动预览 + 发布重构 + 架构优化 (Current)
+### Day 21: 缺陷修复 + 浮动预览 + 发布重构 + 架构优化 + 多素材生成 (Current)
 - [x] **Remotion 崩溃容错**: 渲染进程 SIGABRT 退出时检查输出文件，避免误判失败导致标题/字幕丢失。
 - [x] **首页作品选择持久化**: 修复 `fetchGeneratedVideos` 无条件覆盖恢复值的问题，新增 `preferVideoId` 参数控制选中逻辑。
 - [x] **发布页作品选择持久化**: 根因为签名 URL 不稳定，全面改用 `video.id` 替代 `path` 进行选择/持久化/比较。
@@ -22,6 +22,11 @@
 - [x] **后端模块分层**: materials/tools/ref_audios 三个模块补全 router+schemas+service 分层。
 - [x] **开发规范更新**: BACKEND_DEV.md 新增渐进原则、DOC_RULES.md 取消 TASK_COMPLETE.md 手动触发约束。
 - [x] **文档全面更新**: BACKEND_DEV/README、FRONTEND_DEV、DEPLOY_MANUAL、README.md 同步更新。
+- [x] **多素材视频生成（多机位效果）**: 支持多选素材 + 拖拽排序，按素材数量均分音频时长（对齐 Whisper 字边界）自动切换机位。逐段 LatentSync + FFmpeg 拼接。前端 @dnd-kit 拖拽排序 UI。
+- [x] **字幕开关移除**: 默认启用逐字高亮字幕，移除开关及相关死代码。
+- [x] **视频格式扩展**: 上传支持 mkv/webm/flv/wmv/m4v/ts/mts 等常见格式。
+- [x] **Watchdog 优化**: 健康检查阈值提高到 5 次，新增重启冷却期 120 秒，避免误重启。
+- [x] **多素材 Bug 修复**: 修复标点分句方案对无句末标点文案无效（改为均分方案）、音频时间偏移导致口型不对齐等缺陷。
 
 ### Day 20: 代码质量与安全优化
 - [x] **功能性修复**: LatentSync 回退逻辑、任务状态接口认证、User 类型统一。
diff --git a/backend/app/modules/ai/router.py b/backend/app/modules/ai/router.py
index 6e075dd..5d4731b 100644
--- a/backend/app/modules/ai/router.py
+++ b/backend/app/modules/ai/router.py
@@ -24,6 +24,33 @@ class GenerateMetaResponse(BaseModel):
     tags: list[str]
 
 
+class TranslateRequest(BaseModel):
+    """翻译请求"""
+    text: str
+    target_lang: str
+
+
+@router.post("/translate")
+async def translate_text(req: TranslateRequest):
+    """
+    AI 翻译文案
+
+    将文案翻译为指定目标语言
+    """
+    if not req.text or not req.text.strip():
+        raise HTTPException(status_code=400, detail="文案不能为空")
+    if not req.target_lang or not req.target_lang.strip():
+        raise HTTPException(status_code=400, detail="目标语言不能为空")
+
+    try:
+        logger.info(f"Translating text to {req.target_lang}: {req.text[:50]}...")
+        translated = await glm_service.translate_text(req.text.strip(), req.target_lang.strip())
+        return success_response({"translated_text": translated})
+    except Exception as e:
+        logger.error(f"Translate failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.post("/generate-meta")
 async def generate_meta(req: GenerateMetaRequest):
     """
diff --git a/backend/app/modules/videos/schemas.py b/backend/app/modules/videos/schemas.py
index de27491..f2f15bb 100644
--- a/backend/app/modules/videos/schemas.py
+++ b/backend/app/modules/videos/schemas.py
@@ -1,14 +1,16 @@
 from pydantic import BaseModel
-from typing import Optional
+from typing import Optional, List
 
 
 class GenerateRequest(BaseModel):
     text: str
     voice: str = "zh-CN-YunxiNeural"
     material_path: str
+    material_paths: Optional[List[str]] = None
     tts_mode: str = "edgetts"
     ref_audio_id: Optional[str] = None
     ref_text: Optional[str] = None
+    language: str = "zh-CN"
     title: Optional[str] = None
     enable_subtitles: bool = True
     subtitle_style_id: Optional[str] = None
diff --git a/backend/app/modules/videos/workflow.py b/backend/app/modules/videos/workflow.py
index 0166224..60c6193 100644
--- a/backend/app/modules/videos/workflow.py
+++ b/backend/app/modules/videos/workflow.py
@@ -1,4 +1,4 @@
-from typing import Optional, Any
+from typing import Optional, Any, List
 from pathlib import Path
 import time
 import traceback
@@ -24,6 +24,17 @@ from .schemas import GenerateRequest
 from .task_store import task_store
 
 
+def _locale_to_whisper_lang(locale: str) -> str:
+    """'en-US' → 'en', 'zh-CN' → 'zh'"""
+    return locale.split("-")[0] if "-" in locale else locale
+
+
+def _locale_to_qwen_lang(locale: str) -> str:
+    """'zh-CN' → 'Chinese', 'en-US' → 'English', 其他 → 'Auto'"""
+    mapping = {"zh": "Chinese", "en": "English"}
+    return mapping.get(locale.split("-")[0], "Auto")
+
+
 _lipsync_service: Optional[LipSyncService] = None
 _lipsync_ready: Optional[bool] = None
 _lipsync_last_check: float = 0
@@ -79,19 +90,107 @@ def _update_task(task_id: str, **updates: Any) -> None:
     task_store.update(task_id, updates)
 
 
+# ── 多素材辅助函数 ──
+
+
+def _split_equal(segments: List[dict], material_paths: List[str]) -> List[dict]:
+    """按素材数量均分音频时长，对齐到最近的 Whisper 字边界。
+
+    Args:
+        segments: Whisper 产出的 segment 列表, 每个包含 words (字级时间戳)
+        material_paths: 素材路径列表
+
+    Returns:
+        [{"material_path": "...", "start": 0.0, "end": 5.2, "index": 0}, ...]
+    """
+    # 展平所有 Whisper 字符
+    all_chars: List[dict] = []
+    for seg in segments:
+        for w in seg.get("words", []):
+            all_chars.append(w)
+
+    n = len(material_paths)
+
+    if not all_chars or n == 0:
+        return [{"material_path": material_paths[0] if material_paths else "",
+                 "start": 0.0, "end": 99999.0, "index": 0}]
+
+    # 素材数不能超过字符数，否则边界会重复
+    if n > len(all_chars):
+        logger.warning(f"[MultiMat] 素材数({n}) > 字符数({len(all_chars)})，裁剪为 {len(all_chars)}")
+        n = len(all_chars)
+
+    total_start = all_chars[0]["start"]
+    total_end = all_chars[-1]["end"]
+    seg_dur = (total_end - total_start) / n
+
+    # 计算 N-1 个分割点，对齐到最近的字边界
+    boundaries = [0]  # 第一段从第 0 个字开始
+    for i in range(1, n):
+        target_time = total_start + i * seg_dur
+        # 找到 start 时间最接近 target_time 的字
+        best_idx = boundaries[-1] + 1  # 至少比上一个边界后移 1
+        best_diff = float("inf")
+        for j in range(boundaries[-1] + 1, len(all_chars)):
+            diff = abs(all_chars[j]["start"] - target_time)
+            if diff < best_diff:
+                best_diff = diff
+                best_idx = j
+            elif diff > best_diff:
+                break  # 时间递增，差值开始变大后可以停了
+        boundaries.append(min(best_idx, len(all_chars) - 1))
+    boundaries.append(len(all_chars))  # 最后一段到末尾
+
+    # 按边界生成分配结果
+    assignments: List[dict] = []
+    for i in range(n):
+        s_idx = boundaries[i]
+        e_idx = boundaries[i + 1]
+        if s_idx >= len(all_chars) or s_idx >= e_idx:
+            continue
+        assignments.append({
+            "material_path": material_paths[i],
+            "start": all_chars[s_idx]["start"],
+            "end": all_chars[e_idx - 1]["end"],
+            "text": "".join(c["word"] for c in all_chars[s_idx:e_idx]),
+            "index": len(assignments),
+        })
+
+    if not assignments:
+        return [{"material_path": material_paths[0], "start": 0.0, "end": 99999.0, "index": 0}]
+
+    logger.info(f"[MultiMat] 均分 {len(all_chars)} 字为 {len(assignments)} 段")
+    for a in assignments:
+        dur = a["end"] - a["start"]
+        logger.info(f"  段{a['index']}: [{a['start']:.2f}-{a['end']:.2f}s] ({dur:.1f}s) {a['text'][:20]}")
+
+    return assignments
+
+
 async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
     temp_files = []
     try:
         start_time = time.time()
+
+        # ── 确定素材列表 ──
+        material_paths: List[str] = []
+        if req.material_paths and len(req.material_paths) > 1:
+            material_paths = req.material_paths
+        else:
+            material_paths = [req.material_path]
+
+        is_multi = len(material_paths) > 1
+
         _update_task(task_id, status="processing", progress=5, message="正在下载素材...")
 
         temp_dir = settings.UPLOAD_DIR / "temp"
         temp_dir.mkdir(parents=True, exist_ok=True)
 
-        input_material_path = temp_dir / f"{task_id}_input.mp4"
-        temp_files.append(input_material_path)
-
-        await _download_material(req.material_path, input_material_path)
+        # 单素材模式：下载主素材
+        if not is_multi:
+            input_material_path = temp_dir / f"{task_id}_input.mp4"
+            temp_files.append(input_material_path)
+            await _download_material(material_paths[0], input_material_path)
 
         _update_task(task_id, message="正在生成语音...", progress=10)
 
@@ -119,7 +218,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
                 ref_audio_path=str(ref_audio_local),
                 ref_text=req.ref_text,
                 output_path=str(audio_path),
-                language="Chinese"
+                language=_locale_to_qwen_lang(req.language)
             )
         else:
             _update_task(task_id, message="正在生成语音 (EdgeTTS)...")
@@ -128,52 +227,183 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
 
         tts_time = time.time() - start_time
         print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
-        _update_task(task_id, progress=25)
-
-        _update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
 
         lipsync = _get_lipsync_service()
         lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
         temp_files.append(lipsync_video_path)
 
-        lipsync_start = time.time()
-        is_ready = await _check_lipsync_ready()
-
-        if is_ready:
-            print(f"[LipSync] Starting LatentSync inference...")
-            _update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
-            await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
-        else:
-            print(f"[LipSync] LatentSync not ready, copying original video")
-            _update_task(task_id, message="唇形同步不可用，使用原始视频...")
-            import shutil
-            shutil.copy(str(input_material_path), lipsync_video_path)
-
-        lipsync_time = time.time() - lipsync_start
-        print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
-        _update_task(task_id, progress=80)
-
+        video = VideoService()
         captions_path = None
-        if req.enable_subtitles:
-            _update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
+
+        if is_multi:
+            # ══════════════════════════════════════
+            # 多素材流水线
+            # ══════════════════════════════════════
+            _update_task(task_id, progress=12, message="正在生成字幕 (Whisper)...")
 
             captions_path = temp_dir / f"{task_id}_captions.json"
             temp_files.append(captions_path)
 
             try:
-                await whisper_service.align(
+                captions_data = await whisper_service.align(
                     audio_path=str(audio_path),
                     text=req.text,
-                    output_path=str(captions_path)
+                    output_path=str(captions_path),
+                    language=_locale_to_whisper_lang(req.language),
                 )
-                print(f"[Pipeline] Whisper alignment completed")
+                print(f"[Pipeline] Whisper alignment completed (multi-material)")
             except Exception as e:
-                logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
+                logger.warning(f"Whisper alignment failed: {e}")
+                captions_data = None
                 captions_path = None
 
+            _update_task(task_id, progress=15, message="正在分配素材...")
+
+            if captions_data and captions_data.get("segments"):
+                assignments = _split_equal(captions_data["segments"], material_paths)
+            else:
+                # Whisper 失败 → 按时长均分（不依赖字符对齐）
+                logger.warning("[MultiMat] Whisper 无数据，按时长均分")
+                audio_dur = video._get_duration(str(audio_path))
+                if audio_dur <= 0:
+                    audio_dur = 30.0  # 安全兜底
+                seg_dur = audio_dur / len(material_paths)
+                assignments = [
+                    {"material_path": material_paths[i], "start": i * seg_dur,
+                     "end": (i + 1) * seg_dur, "index": i}
+                    for i in range(len(material_paths))
+                ]
+
+            # 扩展段覆盖完整音频范围：首段从0开始，末段到音频结尾
+            audio_duration = video._get_duration(str(audio_path))
+            if assignments and audio_duration > 0:
+                assignments[0]["start"] = 0.0
+                assignments[-1]["end"] = audio_duration
+
+            num_segments = len(assignments)
+            print(f"[Pipeline] Multi-material: {num_segments} segments, {len(material_paths)} materials")
+
+            if num_segments == 0:
+                raise RuntimeError("Multi-material: no valid segments after splitting")
+
+            lipsync_start = time.time()
+
+            # ── 第一步：下载所有素材并检测分辨率 ──
+            material_locals: List[Path] = []
+            resolutions = []
+
+            for i, assignment in enumerate(assignments):
+                material_local = temp_dir / f"{task_id}_material_{i}.mp4"
+                temp_files.append(material_local)
+                await _download_material(assignment["material_path"], material_local)
+                material_locals.append(material_local)
+                resolutions.append(video.get_resolution(str(material_local)))
+
+            # 分辨率不一致时，统一到第一个素材的分辨率
+            base_res = resolutions[0] if resolutions else (0, 0)
+            need_scale = any(r != base_res for r in resolutions) and base_res[0] > 0
+            if need_scale:
+                logger.info(f"[MultiMat] 素材分辨率不一致，统一到 {base_res[0]}x{base_res[1]}")
+
+            # ── 第二步：裁剪每段素材到对应时长 ──
+            prepared_segments: List[Path] = []
+
+            for i, assignment in enumerate(assignments):
+                seg_progress = 15 + int((i / num_segments) * 30)  # 15% → 45%
+                seg_dur = assignment["end"] - assignment["start"]
+                _update_task(
+                    task_id,
+                    progress=seg_progress,
+                    message=f"正在准备素材 {i+1}/{num_segments}..."
+                )
+
+                prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
+                temp_files.append(prepared_path)
+                video.prepare_segment(
+                    str(material_locals[i]), seg_dur, str(prepared_path),
+                    target_resolution=base_res if need_scale else None
+                )
+                prepared_segments.append(prepared_path)
+
+            # ── 第二步：拼接所有素材片段 ──
+            _update_task(task_id, progress=50, message="正在拼接素材片段...")
+            concat_path = temp_dir / f"{task_id}_concat.mp4"
+            temp_files.append(concat_path)
+            video.concat_videos(
+                [str(p) for p in prepared_segments],
+                str(concat_path)
+            )
+
+            # ── 第三步：一次 LatentSync 推理 ──
+            is_ready = await _check_lipsync_ready()
+
+            if is_ready:
+                _update_task(task_id, progress=55, message="正在合成唇形 (LatentSync)...")
+                print(f"[LipSync] Multi-material: single LatentSync on concatenated video")
+                try:
+                    await lipsync.generate(str(concat_path), str(audio_path), str(lipsync_video_path))
+                except Exception as e:
+                    logger.warning(f"[LipSync] Failed, fallback to concat without lipsync: {e}")
+                    import shutil
+                    shutil.copy(str(concat_path), str(lipsync_video_path))
+            else:
+                print(f"[LipSync] Not ready, using concatenated video without lipsync")
+                import shutil
+                shutil.copy(str(concat_path), str(lipsync_video_path))
+
+            lipsync_time = time.time() - lipsync_start
+            print(f"[Pipeline] Multi-material prepare + concat + LipSync completed in {lipsync_time:.1f}s")
+            _update_task(task_id, progress=80)
+
+            # 如果用户关闭了字幕，清除 captions_path（Whisper 仅用于句子切分）
+            if not req.enable_subtitles:
+                captions_path = None
+
+        else:
+            # ══════════════════════════════════════
+            # 单素材流水线（原有逻辑）
+            # ══════════════════════════════════════
+            _update_task(task_id, progress=25)
+            _update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
+
+            lipsync_start = time.time()
+            is_ready = await _check_lipsync_ready()
+
+            if is_ready:
+                print(f"[LipSync] Starting LatentSync inference...")
+                _update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
+                await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
+            else:
+                print(f"[LipSync] LatentSync not ready, copying original video")
+                _update_task(task_id, message="唇形同步不可用，使用原始视频...")
+                import shutil
+                shutil.copy(str(input_material_path), lipsync_video_path)
+
+            lipsync_time = time.time() - lipsync_start
+            print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
+            _update_task(task_id, progress=80)
+
+            # 单素材模式：Whisper 在 LatentSync 之后
+            if req.enable_subtitles:
+                _update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
+
+                captions_path = temp_dir / f"{task_id}_captions.json"
+                temp_files.append(captions_path)
+
+                try:
+                    await whisper_service.align(
+                        audio_path=str(audio_path),
+                        text=req.text,
+                        output_path=str(captions_path),
+                        language=_locale_to_whisper_lang(req.language),
+                    )
+                    print(f"[Pipeline] Whisper alignment completed")
+                except Exception as e:
+                    logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
+                    captions_path = None
+
         _update_task(task_id, progress=85)
 
-        video = VideoService()
         final_audio_path = audio_path
         if req.bgm_id:
             _update_task(task_id, message="正在合成背景音乐...", progress=86)
diff --git a/backend/app/services/glm_service.py b/backend/app/services/glm_service.py
index 05a2e5e..78d7e75 100644
--- a/backend/app/services/glm_service.py
+++ b/backend/app/services/glm_service.py
@@ -43,6 +43,7 @@ class GLMService:
 要求：
 1. 标题要简洁有力，能吸引观众点击，不超过10个字
 2. 标签要与内容相关，便于搜索和推荐，只要3个
+3. 标题和标签必须使用与口播文案相同的语言（如文案是英文就用英文，日文就用日文）
 
 请严格按以下JSON格式返回（不要包含其他内容）：
 {{"title": "标题", "tags": ["标签1", "标签2", "标签3"]}}"""
@@ -120,6 +121,49 @@ class GLMService:
 
 
 
+    async def translate_text(self, text: str, target_lang: str) -> str:
+        """
+        将文案翻译为指定语言
+
+        Args:
+            text: 原始文案
+            target_lang: 目标语言（如 English, 日本語 等）
+
+        Returns:
+            翻译后的文案
+        """
+        prompt = f"""请将以下文案翻译为{target_lang}。
+
+原文：
+{text}
+
+要求：
+1. 只返回翻译后的文案，不要添加任何解释或说明
+2. 保持原文的语气和风格
+3. 翻译要自然流畅，符合目标语言的表达习惯"""
+
+        try:
+            client = self._get_client()
+            logger.info(f"Using GLM to translate text to {target_lang}")
+
+            import asyncio
+            response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=settings.GLM_MODEL,
+                messages=[{"role": "user", "content": prompt}],
+                thinking={"type": "disabled"},
+                max_tokens=2000,
+                temperature=0.3
+            )
+
+            content = response.choices[0].message.content
+            logger.info("GLM translation completed")
+            return content.strip()
+
+        except Exception as e:
+            logger.error(f"GLM translate error: {e}")
+            raise Exception(f"AI 翻译失败: {str(e)}")
+
     def _parse_json_response(self, content: str) -> dict:
         """解析 GLM 返回的 JSON 内容"""
         # 尝试直接解析
diff --git a/backend/app/services/video_service.py b/backend/app/services/video_service.py
index f098225..d0ef6aa 100644
--- a/backend/app/services/video_service.py
+++ b/backend/app/services/video_service.py
@@ -138,3 +138,109 @@ class VideoService:
             return output_path
         else:
             raise RuntimeError("FFmpeg composition failed")
+
+    def concat_videos(self, video_paths: list, output_path: str) -> str:
+        """使用 FFmpeg concat demuxer 拼接多个视频片段"""
+        if not video_paths:
+            raise ValueError("No video segments to concat")
+
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # 生成 concat list 文件
+        list_path = Path(output_path).parent / f"{Path(output_path).stem}_concat.txt"
+        with open(list_path, "w", encoding="utf-8") as f:
+            for vp in video_paths:
+                f.write(f"file '{vp}'\n")
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-f", "concat",
+            "-safe", "0",
+            "-i", str(list_path),
+            "-c", "copy",
+            output_path,
+        ]
+
+        try:
+            if self._run_ffmpeg(cmd):
+                return output_path
+            else:
+                raise RuntimeError("FFmpeg concat failed")
+        finally:
+            try:
+                list_path.unlink(missing_ok=True)
+            except Exception:
+                pass
+
+    def split_audio(self, audio_path: str, start: float, end: float, output_path: str) -> str:
+        """用 FFmpeg 按时间范围切分音频"""
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+
+        duration = end - start
+        if duration <= 0:
+            raise ValueError(f"Invalid audio split range: start={start}, end={end}, duration={duration}")
+
+        cmd = [
+            "ffmpeg", "-y",
+            "-ss", str(start),
+            "-t", str(duration),
+            "-i", audio_path,
+            "-c", "copy",
+            output_path,
+        ]
+
+        if self._run_ffmpeg(cmd):
+            return output_path
+        raise RuntimeError(f"FFmpeg audio split failed: {start}-{end}")
+
+    def get_resolution(self, file_path: str) -> tuple:
+        """获取视频分辨率，返回 (width, height)"""
+        cmd = [
+            'ffprobe', '-v', 'error',
+            '-select_streams', 'v:0',
+            '-show_entries', 'stream=width,height',
+            '-of', 'csv=p=0',
+            file_path
+        ]
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            parts = result.stdout.strip().split(',')
+            return (int(parts[0]), int(parts[1]))
+        except Exception:
+            return (0, 0)
+
+    def prepare_segment(self, video_path: str, target_duration: float, output_path: str,
+                        target_resolution: tuple = None) -> str:
+        """将素材视频裁剪或循环到指定时长（无音频）。
+        target_resolution: (width, height) 如需统一分辨率则传入，否则保持原分辨率。
+        """
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+
+        video_dur = self._get_duration(video_path)
+        if video_dur <= 0:
+            video_dur = target_duration
+
+        needs_loop = target_duration > video_dur
+        needs_scale = target_resolution is not None
+
+        cmd = ["ffmpeg", "-y"]
+        if needs_loop:
+            loop_count = int(target_duration / video_dur) + 1
+            cmd.extend(["-stream_loop", str(loop_count)])
+        cmd.extend(["-i", video_path, "-t", str(target_duration), "-an"])
+
+        if needs_scale:
+            w, h = target_resolution
+            cmd.extend(["-vf", f"scale={w}:{h}:force_original_aspect_ratio=decrease,pad={w}:{h}:(ow-iw)/2:(oh-ih)/2"])
+
+        # 需要循环或缩放时必须重编码，否则用 stream copy 保持原画质
+        if needs_loop or needs_scale:
+            cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18"])
+        else:
+            cmd.extend(["-c:v", "copy"])
+
+        cmd.append(output_path)
+
+        if self._run_ffmpeg(cmd):
+            return output_path
+        raise RuntimeError(f"FFmpeg prepare_segment failed: {video_path}")
diff --git a/backend/app/services/voice_clone_service.py b/backend/app/services/voice_clone_service.py
index 37e5def..018d056 100644
--- a/backend/app/services/voice_clone_service.py
+++ b/backend/app/services/voice_clone_service.py
@@ -48,7 +48,7 @@ class VoiceCloneService:
         """
         # 使用锁确保串行执行，避免 GPU 显存溢出
         async with self._lock:
-            logger.info(f"🎤 Voice Clone: {text[:30]}...")
+            logger.info(f"🎤 Voice Clone: {text[:30]}... (language={language})")
             Path(output_path).parent.mkdir(parents=True, exist_ok=True)
 
             # 读取参考音频
diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py
index 35ad219..207ce3a 100644
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@@ -20,16 +20,23 @@ MAX_CHARS_PER_LINE = 12
 
 def split_word_to_chars(word: str, start: float, end: float) -> list:
     """
-    将词拆分成单个字符，时间戳线性插值
+    将词拆分成单个字符，时间戳线性插值。
+    保留英文词前的空格（Whisper 输出如 " Hello"），用于正确重建英文字幕。
 
     Args:
-        word: 词文本
+        word: 词文本（可能含前导空格）
         start: 词开始时间
         end: 词结束时间
 
     Returns:
         单字符列表，每个包含 word/start/end
     """
+    # 保留前导空格（英文 Whisper 输出常见 " Hello" 形式）
+    leading_space = ""
+    if word and not word[0].strip():
+        leading_space = " "
+        word = word.lstrip()
+
     tokens = []
     ascii_buffer = ""
 
@@ -54,7 +61,8 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
         return []
 
     if len(tokens) == 1:
-        return [{"word": tokens[0], "start": start, "end": end}]
+        w = leading_space + tokens[0] if leading_space else tokens[0]
+        return [{"word": w, "start": start, "end": end}]
 
     # 线性插值时间戳
     duration = end - start
@@ -64,8 +72,11 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
     for i, token in enumerate(tokens):
         token_start = start + i * token_duration
         token_end = start + (i + 1) * token_duration
+        w = token
+        if i == 0 and leading_space:
+            w = leading_space + w
         result.append({
-            "word": token,
+            "word": w,
             "start": round(token_start, 3),
             "end": round(token_end, 3)
         })
@@ -108,7 +119,7 @@ def split_segment_to_lines(words: List[dict], max_chars: int = MAX_CHARS_PER_LIN
 
         if should_break and current_words:
             segments.append({
-                "text": current_text,
+                "text": current_text.strip(),
                 "start": current_words[0]["start"],
                 "end": current_words[-1]["end"],
                 "words": current_words.copy()
@@ -119,7 +130,7 @@ def split_segment_to_lines(words: List[dict], max_chars: int = MAX_CHARS_PER_LIN
     # 处理剩余的字
     if current_words:
         segments.append({
-            "text": current_text,
+            "text": current_text.strip(),
             "start": current_words[0]["start"],
             "end": current_words[-1]["end"],
             "words": current_words.copy()
@@ -162,7 +173,8 @@ class WhisperService:
         self,
         audio_path: str,
         text: str,
-        output_path: Optional[str] = None
+        output_path: Optional[str] = None,
+        language: str = "zh",
     ) -> dict:
         """
         对音频进行转录，生成字级别时间戳
@@ -171,12 +183,16 @@ class WhisperService:
             audio_path: 音频文件路径
             text: 原始文本（用于参考，但实际使用 whisper 转录结果）
             output_path: 可选，输出 JSON 文件路径
+            language: 语言代码 (zh/en 等)
 
         Returns:
             包含字级别时间戳的字典
         """
         import asyncio
 
+        # 英文等西文需要更大的每行字数
+        max_chars = 40 if language != "zh" else MAX_CHARS_PER_LINE
+
         def _do_transcribe():
             model = self._load_model()
 
@@ -185,7 +201,7 @@ class WhisperService:
             # 转录并获取字级别时间戳
             segments_iter, info = model.transcribe(
                 audio_path,
-                language="zh",
+                language=language,
                 word_timestamps=True,  # 启用字级别时间戳
                 vad_filter=True,  # 启用 VAD 过滤静音
             )
@@ -198,9 +214,10 @@ class WhisperService:
                 all_words = []
                 if segment.words:
                     for word_info in segment.words:
-                        word_text = word_info.word.strip()
-                        if word_text:
+                        word_text = word_info.word
+                        if word_text.strip():
                             # 将词拆分成单字，时间戳线性插值
+                            # 保留前导空格用于英文词间距
                             chars = split_word_to_chars(
                                 word_text,
                                 word_info.start,
@@ -210,7 +227,7 @@ class WhisperService:
 
                 # 将长段落按标点和字数拆分成多行
                 if all_words:
-                    line_segments = split_segment_to_lines(all_words, MAX_CHARS_PER_LINE)
+                    line_segments = split_segment_to_lines(all_words, max_chars)
                     all_segments.extend(line_segments)
 
             logger.info(f"Generated {len(all_segments)} subtitle segments")
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index cf7cd42..bd37513 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -8,6 +8,9 @@
       "name": "frontend",
       "version": "0.1.0",
       "dependencies": {
+        "@dnd-kit/core": "^6.3.1",
+        "@dnd-kit/sortable": "^10.0.0",
+        "@dnd-kit/utilities": "^3.2.2",
         "@supabase/supabase-js": "^2.93.1",
         "axios": "^1.13.4",
         "lucide-react": "^0.563.0",
@@ -281,6 +284,59 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@dnd-kit/accessibility": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@dnd-kit/accessibility/-/accessibility-3.1.1.tgz",
+      "integrity": "sha512-2P+YgaXF+gRsIihwwY1gCsQSYnu9Zyj2py8kY5fFvUM1qm2WA2u639R6YNVfU4GWr+ZM5mqEsfHZZLoRONbemw==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0"
+      }
+    },
+    "node_modules/@dnd-kit/core": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/@dnd-kit/core/-/core-6.3.1.tgz",
+      "integrity": "sha512-xkGBRQQab4RLwgXxoqETICr6S5JlogafbhNsidmrkVv2YRs5MLwpjoF2qpiGjQt8S9AoxtIV603s0GIUpY5eYQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@dnd-kit/accessibility": "^3.1.1",
+        "@dnd-kit/utilities": "^3.2.2",
+        "tslib": "^2.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
+    "node_modules/@dnd-kit/sortable": {
+      "version": "10.0.0",
+      "resolved": "https://registry.npmjs.org/@dnd-kit/sortable/-/sortable-10.0.0.tgz",
+      "integrity": "sha512-+xqhmIIzvAYMGfBYYnbKuNicfSsk4RksY2XdmJhT+HAC01nix6fHCztU68jooFiMUB01Ky3F0FyOvhG/BZrWkg==",
+      "license": "MIT",
+      "dependencies": {
+        "@dnd-kit/utilities": "^3.2.2",
+        "tslib": "^2.0.0"
+      },
+      "peerDependencies": {
+        "@dnd-kit/core": "^6.3.0",
+        "react": ">=16.8.0"
+      }
+    },
+    "node_modules/@dnd-kit/utilities": {
+      "version": "3.2.2",
+      "resolved": "https://registry.npmjs.org/@dnd-kit/utilities/-/utilities-3.2.2.tgz",
+      "integrity": "sha512-+MKAJEOfaBe5SmV6t34p80MMKhjvUz0vRrvVJbPT0WElzaOJ/1xs+D+KDv+tD/NE5ujfrChEcshd4fLn0wpiqg==",
+      "license": "MIT",
+      "dependencies": {
+        "tslib": "^2.0.0"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0"
+      }
+    },
     "node_modules/@emnapi/core": {
       "version": "1.8.1",
       "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.8.1.tgz",
diff --git a/frontend/package.json b/frontend/package.json
index 476aadb..ebf5efb 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -9,6 +9,9 @@
     "lint": "eslint"
   },
   "dependencies": {
+    "@dnd-kit/core": "^6.3.1",
+    "@dnd-kit/sortable": "^10.0.0",
+    "@dnd-kit/utilities": "^3.2.2",
     "@supabase/supabase-js": "^2.93.1",
     "axios": "^1.13.4",
     "lucide-react": "^0.563.0",
diff --git a/frontend/src/features/home/model/useHomeController.ts b/frontend/src/features/home/model/useHomeController.ts
index f53533a..c6b6675 100644
--- a/frontend/src/features/home/model/useHomeController.ts
+++ b/frontend/src/features/home/model/useHomeController.ts
@@ -25,13 +25,64 @@ import { useRefAudios } from "@/features/home/model/useRefAudios";
 import { useTitleSubtitleStyles } from "@/features/home/model/useTitleSubtitleStyles";
 import { ApiResponse, unwrap } from "@/shared/api/types";
 
-const VOICES = [
-  { id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
-  { id: "zh-CN-YunjianNeural", name: "云健 (男声-新闻)" },
-  { id: "zh-CN-YunyangNeural", name: "云扬 (男声-专业)" },
-  { id: "zh-CN-XiaoxiaoNeural", name: "晓晓 (女声-活泼)" },
-  { id: "zh-CN-XiaoyiNeural", name: "晓伊 (女声-温柔)" },
-];
+const VOICES: Record<string, { id: string; name: string }[]> = {
+  "zh-CN": [
+    { id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
+    { id: "zh-CN-YunjianNeural", name: "云健 (男声-新闻)" },
+    { id: "zh-CN-YunyangNeural", name: "云扬 (男声-专业)" },
+    { id: "zh-CN-XiaoxiaoNeural", name: "晓晓 (女声-活泼)" },
+    { id: "zh-CN-XiaoyiNeural", name: "晓伊 (女声-温柔)" },
+  ],
+  "en-US": [
+    { id: "en-US-GuyNeural", name: "Guy (Male)" },
+    { id: "en-US-JennyNeural", name: "Jenny (Female)" },
+  ],
+  "ja-JP": [
+    { id: "ja-JP-KeitaNeural", name: "圭太 (男声)" },
+    { id: "ja-JP-NanamiNeural", name: "七海 (女声)" },
+  ],
+  "ko-KR": [
+    { id: "ko-KR-InJoonNeural", name: "인준 (男声)" },
+    { id: "ko-KR-SunHiNeural", name: "선히 (女声)" },
+  ],
+  "fr-FR": [
+    { id: "fr-FR-HenriNeural", name: "Henri (Male)" },
+    { id: "fr-FR-DeniseNeural", name: "Denise (Female)" },
+  ],
+  "de-DE": [
+    { id: "de-DE-ConradNeural", name: "Conrad (Male)" },
+    { id: "de-DE-KatjaNeural", name: "Katja (Female)" },
+  ],
+  "es-ES": [
+    { id: "es-ES-AlvaroNeural", name: "Álvaro (Male)" },
+    { id: "es-ES-ElviraNeural", name: "Elvira (Female)" },
+  ],
+  "ru-RU": [
+    { id: "ru-RU-DmitryNeural", name: "Дмитрий (Male)" },
+    { id: "ru-RU-SvetlanaNeural", name: "Светлана (Female)" },
+  ],
+  "it-IT": [
+    { id: "it-IT-DiegoNeural", name: "Diego (Male)" },
+    { id: "it-IT-ElsaNeural", name: "Elsa (Female)" },
+  ],
+  "pt-BR": [
+    { id: "pt-BR-AntonioNeural", name: "Antonio (Male)" },
+    { id: "pt-BR-FranciscaNeural", name: "Francisca (Female)" },
+  ],
+};
+
+const LANG_TO_LOCALE: Record<string, string> = {
+  "中文": "zh-CN",
+  "English": "en-US",
+  "日本語": "ja-JP",
+  "한국어": "ko-KR",
+  "Français": "fr-FR",
+  "Deutsch": "de-DE",
+  "Español": "es-ES",
+  "Русский": "ru-RU",
+  "Italiano": "it-IT",
+  "Português": "pt-BR",
+};
 
 
 
@@ -70,22 +121,17 @@ interface RefAudio {
   created_at: number;
 }
 
-interface Material {
-  id: string;
-  name: string;
-  path: string;
-  size_mb: number;
-  scene?: string;
-}
+import type { Material } from "@/shared/types/material";
 
 export const useHomeController = () => {
   const apiBase = getApiBaseUrl();
 
-  const [selectedMaterial, setSelectedMaterial] = useState<string>("");
+  const [selectedMaterials, setSelectedMaterials] = useState<string[]>([]);
   const [previewMaterial, setPreviewMaterial] = useState<string | null>(null);
 
   const [text, setText] = useState<string>("");
   const [voice, setVoice] = useState<string>("zh-CN-YunxiNeural");
+  const [textLang, setTextLang] = useState<string>("zh-CN");
 
   // 使用全局任务状态
   const { currentTask, isGenerating, startTask } = useTask();
@@ -96,7 +142,6 @@ export const useHomeController = () => {
 
   // 字幕和标题相关状态
   const [videoTitle, setVideoTitle] = useState<string>("");
-  const [enableSubtitles, setEnableSubtitles] = useState<boolean>(true);
   const [selectedSubtitleStyleId, setSelectedSubtitleStyleId] = useState<string>("");
   const [selectedTitleStyleId, setSelectedTitleStyleId] = useState<string>("");
   const [subtitleFontSize, setSubtitleFontSize] = useState<number>(80);
@@ -181,8 +226,8 @@ export const useHomeController = () => {
         { new_name: editMaterialName.trim() }
       );
       const payload = unwrap(res);
-      if (selectedMaterial === materialId && payload?.id) {
-        setSelectedMaterial(payload.id);
+      if (selectedMaterials.includes(materialId) && payload?.id) {
+        setSelectedMaterials((prev) => prev.map((x) => (x === materialId ? payload.id : x)));
       }
       setEditingMaterialId(null);
       setEditMaterialName("");
@@ -197,6 +242,10 @@ export const useHomeController = () => {
   // AI 生成标题标签
   const [isGeneratingMeta, setIsGeneratingMeta] = useState(false);
 
+  // AI 多语言翻译
+  const [isTranslating, setIsTranslating] = useState(false);
+  const [originalText, setOriginalText] = useState<string | null>(null);
+
   // 在线录音相关
   const [isRecording, setIsRecording] = useState(false);
   const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null);
@@ -226,11 +275,13 @@ export const useHomeController = () => {
     uploadError,
     setUploadError,
     fetchMaterials,
+    toggleMaterial,
+    reorderMaterials,
     deleteMaterial,
     handleUpload,
   } = useMaterials({
-    selectedMaterial,
-    setSelectedMaterial,
+    selectedMaterials,
+    setSelectedMaterials,
   });
 
   const {
@@ -338,14 +389,14 @@ export const useHomeController = () => {
     setText,
     videoTitle,
     setVideoTitle,
-    enableSubtitles,
-    setEnableSubtitles,
     ttsMode,
     setTtsMode,
     voice,
     setVoice,
-    selectedMaterial,
-    setSelectedMaterial,
+    textLang,
+    setTextLang,
+    selectedMaterials,
+    setSelectedMaterials,
     selectedSubtitleStyleId,
     setSelectedSubtitleStyleId,
     selectedTitleStyleId,
@@ -410,7 +461,8 @@ export const useHomeController = () => {
   }, [isGenerating, currentTask, fetchGeneratedVideos]);
 
   useEffect(() => {
-    const material = materials.find((item) => item.id === selectedMaterial);
+    const firstSelected = selectedMaterials[0];
+    const material = materials.find((item) => item.id === firstSelected);
     if (!material?.path) {
       setMaterialDimensions(null);
       return;
@@ -450,7 +502,7 @@ export const useHomeController = () => {
       video.removeEventListener("loadedmetadata", handleLoaded);
       video.removeEventListener("error", handleError);
     };
-  }, [materials, selectedMaterial]);
+  }, [materials, selectedMaterials]);
 
 
   useEffect(() => {
@@ -486,12 +538,13 @@ export const useHomeController = () => {
   }, [selectedBgmId, bgmList]);
 
   useEffect(() => {
-    if (!selectedMaterial) return;
-    const target = materialItemRefs.current[selectedMaterial];
+    const firstSelected = selectedMaterials[0];
+    if (!firstSelected) return;
+    const target = materialItemRefs.current[firstSelected];
     if (target) {
       target.scrollIntoView({ block: "nearest", behavior: "smooth" });
     }
-  }, [selectedMaterial, materials]);
+  }, [selectedMaterials, materials]);
 
   // 【修复】历史视频默认选中逻辑
   // 当持久化恢复完成，且列表加载完毕，如果没选中任何视频，默认选中第一个
@@ -639,9 +692,58 @@ export const useHomeController = () => {
     }
   };
 
+  // AI 多语言翻译
+  const handleTranslate = async (targetLang: string) => {
+    if (!text.trim()) {
+      toast.error("请先输入口播文案");
+      return;
+    }
+
+    // 首次翻译时保存原文
+    if (originalText === null) {
+      setOriginalText(text);
+    }
+
+    setIsTranslating(true);
+    try {
+      const { data: res } = await api.post<ApiResponse<{ translated_text: string }>>(
+        "/api/ai/translate",
+        { text: text.trim(), target_lang: targetLang }
+      );
+      const payload = unwrap(res);
+      setText(payload.translated_text || "");
+
+      // 根据翻译目标语言更新 textLang 并自动切换声音
+      const locale = LANG_TO_LOCALE[targetLang] || "zh-CN";
+      setTextLang(locale);
+      if (ttsMode === "edgetts") {
+        const langVoices = VOICES[locale] || VOICES["zh-CN"];
+        setVoice(langVoices[0].id);
+      }
+    } catch (err: unknown) {
+      console.error("AI translate failed:", err);
+      const axiosErr = err as { response?: { data?: { message?: string } }; message?: string };
+      const errorMsg = axiosErr.response?.data?.message || axiosErr.message || String(err);
+      toast.error(`AI 翻译失败: ${errorMsg}`);
+    } finally {
+      setIsTranslating(false);
+    }
+  };
+
+  const handleRestoreOriginal = () => {
+    if (originalText !== null) {
+      setText(originalText);
+      setOriginalText(null);
+      setTextLang("zh-CN");
+      if (ttsMode === "edgetts") {
+        setVoice(VOICES["zh-CN"][0].id);
+      }
+    }
+  };
+
   // 生成视频
   const handleGenerate = async () => {
-    if (!selectedMaterial || !text.trim()) {
+    if (selectedMaterials.length === 0 || !text.trim()) {
       toast.error("请先选择素材并填写文案");
       return;
     }
@@ -663,26 +765,33 @@ export const useHomeController = () => {
 
     try {
       // 查找选中的素材对象以获取路径
-      const materialObj = materials.find((m) => m.id === selectedMaterial);
-      if (!materialObj) {
+      const firstMaterialObj = materials.find((m) => m.id === selectedMaterials[0]);
+      if (!firstMaterialObj) {
         toast.error("素材数据异常");
         return;
       }
 
       // 构建请求参数
       const payload: Record<string, unknown> = {
-        material_path: materialObj.path,
+        material_path: firstMaterialObj.path,
         text: text,
         tts_mode: ttsMode,
         title: videoTitle.trim() || undefined,
-        enable_subtitles: enableSubtitles,
+        enable_subtitles: true,
       };
 
-      if (enableSubtitles && selectedSubtitleStyleId) {
+      // 多素材
+      if (selectedMaterials.length > 1) {
+        payload.material_paths = selectedMaterials
+          .map((id) => materials.find((x) => x.id === id)?.path)
+          .filter((path): path is string => !!path);
+      }
+
+      if (selectedSubtitleStyleId) {
         payload.subtitle_style_id = selectedSubtitleStyleId;
       }
 
-      if (enableSubtitles && subtitleFontSize) {
+      if (subtitleFontSize) {
         payload.subtitle_font_size = Math.round(subtitleFontSize);
       }
 
@@ -698,15 +807,15 @@ export const useHomeController = () => {
         payload.title_top_margin = Math.round(titleTopMargin);
       }
 
-      if (enableSubtitles) {
-        payload.subtitle_bottom_margin = Math.round(subtitleBottomMargin);
-      }
+      payload.subtitle_bottom_margin = Math.round(subtitleBottomMargin);
 
       if (enableBgm && selectedBgmId) {
         payload.bgm_id = selectedBgmId;
         payload.bgm_volume = bgmVolume;
       }
 
+      payload.language = textLang;
+
       if (ttsMode === "edgetts") {
         payload.voice = voice;
       } else {
@@ -774,8 +883,9 @@ export const useHomeController = () => {
     fetchMaterials,
     deleteMaterial,
     handleUpload,
-    selectedMaterial,
-    setSelectedMaterial,
+    selectedMaterials,
+    toggleMaterial,
+    reorderMaterials,
     handlePreviewMaterial,
     editingMaterialId,
     editMaterialName,
@@ -789,6 +899,10 @@ export const useHomeController = () => {
     setExtractModalOpen,
     handleGenerateMeta,
     isGeneratingMeta,
+    handleTranslate,
+    isTranslating,
+    originalText,
+    handleRestoreOriginal,
     showStylePreview,
     setShowStylePreview,
     videoTitle,
@@ -809,17 +923,16 @@ export const useHomeController = () => {
     setTitleTopMargin,
     subtitleBottomMargin,
     setSubtitleBottomMargin,
-    enableSubtitles,
-    setEnableSubtitles,
     resolveAssetUrl,
     getFontFormat,
     buildTextShadow,
     materialDimensions,
     ttsMode,
     setTtsMode,
-    voices: VOICES,
+    voices: VOICES[textLang] || VOICES["zh-CN"],
     voice,
     setVoice,
+    textLang,
     refAudios,
     selectedRefAudio,
     handleSelectRefAudio,
diff --git a/frontend/src/features/home/model/useHomePersistence.ts b/frontend/src/features/home/model/useHomePersistence.ts
index 78cc99d..a9012ba 100644
--- a/frontend/src/features/home/model/useHomePersistence.ts
+++ b/frontend/src/features/home/model/useHomePersistence.ts
@@ -17,14 +17,14 @@ interface UseHomePersistenceOptions {
   setText: React.Dispatch<React.SetStateAction<string>>;
   videoTitle: string;
   setVideoTitle: React.Dispatch<React.SetStateAction<string>>;
-  enableSubtitles: boolean;
-  setEnableSubtitles: React.Dispatch<React.SetStateAction<boolean>>;
   ttsMode: 'edgetts' | 'voiceclone';
   setTtsMode: React.Dispatch<React.SetStateAction<'edgetts' | 'voiceclone'>>;
   voice: string;
   setVoice: React.Dispatch<React.SetStateAction<string>>;
-  selectedMaterial: string;
-  setSelectedMaterial: React.Dispatch<React.SetStateAction<string>>;
+  textLang: string;
+  setTextLang: React.Dispatch<React.SetStateAction<string>>;
+  selectedMaterials: string[];
+  setSelectedMaterials: React.Dispatch<React.SetStateAction<string[]>>;
   selectedSubtitleStyleId: string;
   setSelectedSubtitleStyleId: React.Dispatch<React.SetStateAction<string>>;
   selectedTitleStyleId: string;
@@ -57,14 +57,14 @@ export const useHomePersistence = ({
   setText,
   videoTitle,
   setVideoTitle,
-  enableSubtitles,
-  setEnableSubtitles,
   ttsMode,
   setTtsMode,
   voice,
   setVoice,
-  selectedMaterial,
-  setSelectedMaterial,
+  textLang,
+  setTextLang,
+  selectedMaterials,
+  setSelectedMaterials,
   selectedSubtitleStyleId,
   setSelectedSubtitleStyleId,
   selectedTitleStyleId,
@@ -96,9 +96,9 @@ export const useHomePersistence = ({
 
     const savedText = localStorage.getItem(`vigent_${storageKey}_text`);
     const savedTitle = localStorage.getItem(`vigent_${storageKey}_title`);
-    const savedSubtitles = localStorage.getItem(`vigent_${storageKey}_subtitles`);
     const savedTtsMode = localStorage.getItem(`vigent_${storageKey}_ttsMode`);
     const savedVoice = localStorage.getItem(`vigent_${storageKey}_voice`);
+    const savedTextLang = localStorage.getItem(`vigent_${storageKey}_textLang`);
     const savedMaterial = localStorage.getItem(`vigent_${storageKey}_material`);
     const savedSubtitleStyle = localStorage.getItem(`vigent_${storageKey}_subtitleStyle`);
     const savedTitleStyle = localStorage.getItem(`vigent_${storageKey}_titleStyle`);
@@ -113,11 +113,23 @@ export const useHomePersistence = ({
 
     setText(savedText || "大家好，欢迎来到我的频道，今天给大家分享一些有趣的内容。");
     setVideoTitle(savedTitle ? clampTitle(savedTitle) : "");
-    setEnableSubtitles(savedSubtitles !== null ? savedSubtitles === 'true' : true);
     setTtsMode((savedTtsMode as 'edgetts' | 'voiceclone') || 'edgetts');
     setVoice(savedVoice || "zh-CN-YunxiNeural");
+    if (savedTextLang) setTextLang(savedTextLang);
 
-    if (savedMaterial) setSelectedMaterial(savedMaterial);
+    if (savedMaterial) {
+      try {
+        const parsed = JSON.parse(savedMaterial);
+        if (Array.isArray(parsed)) {
+          setSelectedMaterials(parsed);
+        } else {
+          setSelectedMaterials([savedMaterial]);
+        }
+      } catch {
+        // 旧格式: 单字符串
+        setSelectedMaterials([savedMaterial]);
+      }
+    }
     if (savedSubtitleStyle) setSelectedSubtitleStyleId(savedSubtitleStyle);
     if (savedTitleStyle) setSelectedTitleStyleId(savedTitleStyle);
 
@@ -157,15 +169,15 @@ export const useHomePersistence = ({
     isAuthLoading,
     setBgmVolume,
     setEnableBgm,
-    setEnableSubtitles,
     setSelectedBgmId,
-    setSelectedMaterial,
+    setSelectedMaterials,
     setSelectedSubtitleStyleId,
     setSelectedTitleStyleId,
     setSelectedVideoId,
     setSubtitleFontSize,
     setSubtitleSizeLocked,
     setText,
+    setTextLang,
     setTitleFontSize,
     setTitleSizeLocked,
     setTitleTopMargin,
@@ -192,10 +204,6 @@ export const useHomePersistence = ({
     return () => clearTimeout(timeout);
   }, [videoTitle, storageKey, isRestored]);
 
-  useEffect(() => {
-    if (isRestored) localStorage.setItem(`vigent_${storageKey}_subtitles`, String(enableSubtitles));
-  }, [enableSubtitles, storageKey, isRestored]);
-
   useEffect(() => {
     if (isRestored) localStorage.setItem(`vigent_${storageKey}_ttsMode`, ttsMode);
   }, [ttsMode, storageKey, isRestored]);
@@ -205,10 +213,14 @@ export const useHomePersistence = ({
   }, [voice, storageKey, isRestored]);
 
   useEffect(() => {
-    if (isRestored && selectedMaterial) {
-      localStorage.setItem(`vigent_${storageKey}_material`, selectedMaterial);
+    if (isRestored) localStorage.setItem(`vigent_${storageKey}_textLang`, textLang);
+  }, [textLang, storageKey, isRestored]);
+
+  useEffect(() => {
+    if (isRestored && selectedMaterials.length > 0) {
+      localStorage.setItem(`vigent_${storageKey}_material`, JSON.stringify(selectedMaterials));
     }
-  }, [selectedMaterial, storageKey, isRestored]);
+  }, [selectedMaterials, storageKey, isRestored]);
 
   useEffect(() => {
     if (isRestored && selectedSubtitleStyleId) {
diff --git a/frontend/src/features/home/model/useMaterials.ts b/frontend/src/features/home/model/useMaterials.ts
index 18957a6..f514166 100644
--- a/frontend/src/features/home/model/useMaterials.ts
+++ b/frontend/src/features/home/model/useMaterials.ts
@@ -2,23 +2,16 @@ import { useCallback, useState } from "react";
 import api from "@/shared/api/axios";
 import { ApiResponse, unwrap } from "@/shared/api/types";
 import { toast } from "sonner";
-
-interface Material {
-  id: string;
-  name: string;
-  scene: string;
-  size_mb: number;
-  path: string;
-}
+import type { Material } from "@/shared/types/material";
 
 interface UseMaterialsOptions {
-  selectedMaterial: string;
-  setSelectedMaterial: React.Dispatch<React.SetStateAction<string>>;
+  selectedMaterials: string[];
+  setSelectedMaterials: React.Dispatch<React.SetStateAction<string[]>>;
 }
 
 export const useMaterials = ({
-  selectedMaterial,
-  setSelectedMaterial,
+  selectedMaterials,
+  setSelectedMaterials,
 }: UseMaterialsOptions) => {
   const [materials, setMaterials] = useState<Material[]>([]);
   const [fetchError, setFetchError] = useState<string | null>(null);
@@ -41,12 +34,13 @@ export const useMaterials = ({
       setMaterials(nextMaterials);
       setLastMaterialCount(nextMaterials.length);
 
-      setSelectedMaterial((prev) => {
-        // 如果当前选中的素材在列表中依然存在，保持选中
-        const exists = nextMaterials.some((item) => item.id === prev);
-        if (exists) return prev;
+      setSelectedMaterials((prev) => {
+        // 保留已选中且仍存在的
+        const existingIds = new Set(nextMaterials.map((m) => m.id));
+        const kept = prev.filter((id) => existingIds.has(id));
+        if (kept.length > 0) return kept;
         // 否则默认选中第一个
-        return nextMaterials[0]?.id || "";
+        return nextMaterials[0]?.id ? [nextMaterials[0].id] : [];
       });
     } catch (error) {
       console.error("获取素材失败:", error);
@@ -54,29 +48,58 @@ export const useMaterials = ({
     } finally {
       setIsFetching(false);
     }
-  }, [setSelectedMaterial]);
+  }, [setSelectedMaterials]);
+
+  const MAX_MATERIALS = 4;
+
+  const toggleMaterial = useCallback((id: string) => {
+    setSelectedMaterials((prev) => {
+      if (prev.includes(id)) {
+        // 不能取消最后一个
+        if (prev.length <= 1) return prev;
+        return prev.filter((x) => x !== id);
+      }
+      if (prev.length >= MAX_MATERIALS) return prev;
+      return [...prev, id];
+    });
+  }, [setSelectedMaterials]);
+
+  const reorderMaterials = useCallback((activeId: string, overId: string) => {
+    setSelectedMaterials((prev) => {
+      const oldIndex = prev.indexOf(activeId);
+      const newIndex = prev.indexOf(overId);
+      if (oldIndex === -1 || newIndex === -1) return prev;
+      const next = [...prev];
+      next.splice(oldIndex, 1);
+      next.splice(newIndex, 0, activeId);
+      return next;
+    });
+  }, [setSelectedMaterials]);
 
   const deleteMaterial = useCallback(async (materialId: string) => {
     if (!confirm("确定要删除这个素材吗？")) return;
     try {
       await api.delete(`/api/materials/${materialId}`);
       fetchMaterials();
-      if (selectedMaterial === materialId) {
-        setSelectedMaterial("");
+      if (selectedMaterials.includes(materialId)) {
+        setSelectedMaterials((prev) => {
+          const next = prev.filter((id) => id !== materialId);
+          return next.length > 0 ? next : [];
+        });
       }
     } catch (error) {
       toast.error("删除失败: " + error);
     }
-  }, [fetchMaterials, selectedMaterial, setSelectedMaterial]);
+  }, [fetchMaterials, selectedMaterials, setSelectedMaterials]);
 
   const handleUpload = useCallback(async (e: React.ChangeEvent<HTMLInputElement>) => {
     const file = e.target.files?.[0];
     if (!file) return;
 
-    const validTypes = ['.mp4', '.mov', '.avi'];
+    const validTypes = ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v', '.ts', '.mts'];
     const ext = file.name.toLowerCase().slice(file.name.lastIndexOf('.'));
     if (!validTypes.includes(ext)) {
-      setUploadError('仅支持 MP4、MOV、AVI 格式');
+      setUploadError('不支持的视频格式');
       return;
     }
 
@@ -100,7 +123,22 @@ export const useMaterials = ({
 
       setUploadProgress(100);
       setIsUploading(false);
-      fetchMaterials();
+
+      // 上传后重新拉列表并自动选中新素材
+      const { data: res } = await api.get<ApiResponse<{ materials: Material[] }>>(
+        `/api/materials?t=${new Date().getTime()}`
+      );
+      const payload = unwrap(res);
+      const nextMaterials = payload.materials || [];
+      setMaterials(nextMaterials);
+      setLastMaterialCount(nextMaterials.length);
+
+      // 找出新增的素材 ID 并自动选中
+      const oldIds = new Set(materials.map((m) => m.id));
+      const newIds = nextMaterials.filter((m) => !oldIds.has(m.id)).map((m) => m.id);
+      if (newIds.length > 0) {
+        setSelectedMaterials((prev) => [...prev, ...newIds]);
+      }
     } catch (err: unknown) {
       console.error("Upload failed:", err);
       setIsUploading(false);
@@ -122,6 +160,8 @@ export const useMaterials = ({
     uploadError,
     setUploadError,
     fetchMaterials,
+    toggleMaterial,
+    reorderMaterials,
     deleteMaterial,
     handleUpload,
   };
diff --git a/frontend/src/features/home/ui/GenerateActionBar.tsx b/frontend/src/features/home/ui/GenerateActionBar.tsx
index 52776c2..c2148f6 100644
--- a/frontend/src/features/home/ui/GenerateActionBar.tsx
+++ b/frontend/src/features/home/ui/GenerateActionBar.tsx
@@ -4,6 +4,7 @@ interface GenerateActionBarProps {
   isGenerating: boolean;
   progress: number;
   disabled: boolean;
+  materialCount?: number;
   onGenerate: () => void;
 }
 
@@ -11,43 +12,51 @@ export function GenerateActionBar({
   isGenerating,
   progress,
   disabled,
+  materialCount = 1,
   onGenerate,
 }: GenerateActionBarProps) {
   return (
-    <button
-      onClick={onGenerate}
-      disabled={disabled}
-      className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${disabled
-        ? "bg-gray-600 cursor-not-allowed text-gray-400"
-        : "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
-        }`}
-    >
-      {isGenerating ? (
-        <span className="flex items-center justify-center gap-3">
-          <svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
-            <circle
-              className="opacity-25"
-              cx="12"
-              cy="12"
-              r="10"
-              stroke="currentColor"
-              strokeWidth="4"
-              fill="none"
-            />
-            <path
-              className="opacity-75"
-              fill="currentColor"
-              d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
-            />
-          </svg>
-          生成中... {progress}%
-        </span>
-      ) : (
-        <span className="flex items-center justify-center gap-2">
-          <Rocket className="h-5 w-5" />
-          生成视频
-        </span>
+    <div>
+      <button
+        onClick={onGenerate}
+        disabled={disabled}
+        className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${disabled
+          ? "bg-gray-600 cursor-not-allowed text-gray-400"
+          : "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
+          }`}
+      >
+        {isGenerating ? (
+          <span className="flex items-center justify-center gap-3">
+            <svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
+              <circle
+                className="opacity-25"
+                cx="12"
+                cy="12"
+                r="10"
+                stroke="currentColor"
+                strokeWidth="4"
+                fill="none"
+              />
+              <path
+                className="opacity-75"
+                fill="currentColor"
+                d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
+              />
+            </svg>
+            生成中... {progress}%
+          </span>
+        ) : (
+          <span className="flex items-center justify-center gap-2">
+            <Rocket className="h-5 w-5" />
+            生成视频
+          </span>
+        )}
+      </button>
+      {!isGenerating && materialCount >= 2 && (
+        <p className="text-xs text-gray-400 text-center mt-1.5">
+          多素材模式 ({materialCount} 个机位)，生成耗时较长
+        </p>
       )}
-    </button>
+    </div>
   );
 }
diff --git a/frontend/src/features/home/ui/HomePage.tsx b/frontend/src/features/home/ui/HomePage.tsx
index de24151..388428c 100644
--- a/frontend/src/features/home/ui/HomePage.tsx
+++ b/frontend/src/features/home/ui/HomePage.tsx
@@ -34,8 +34,9 @@ export function HomePage() {
     fetchMaterials,
     deleteMaterial,
     handleUpload,
-    selectedMaterial,
-    setSelectedMaterial,
+    selectedMaterials,
+    toggleMaterial,
+    reorderMaterials,
     handlePreviewMaterial,
     editingMaterialId,
     editMaterialName,
@@ -49,6 +50,10 @@ export function HomePage() {
     setExtractModalOpen,
     handleGenerateMeta,
     isGeneratingMeta,
+    handleTranslate,
+    isTranslating,
+    originalText,
+    handleRestoreOriginal,
     showStylePreview,
     setShowStylePreview,
     videoTitle,
@@ -69,8 +74,6 @@ export function HomePage() {
     setTitleTopMargin,
     subtitleBottomMargin,
     setSubtitleBottomMargin,
-    enableSubtitles,
-    setEnableSubtitles,
     resolveAssetUrl,
     getFontFormat,
     buildTextShadow,
@@ -147,7 +150,7 @@ export function HomePage() {
             {/* 素材选择 */}
             <MaterialSelector
               materials={materials}
-              selectedMaterial={selectedMaterial}
+              selectedMaterials={selectedMaterials}
               isFetching={isFetching}
               lastMaterialCount={lastMaterialCount}
               editingMaterialId={editingMaterialId}
@@ -159,7 +162,8 @@ export function HomePage() {
               apiBase={apiBase}
               onUploadChange={handleUpload}
               onRefresh={fetchMaterials}
-              onSelectMaterial={setSelectedMaterial}
+              onToggleMaterial={toggleMaterial}
+              onReorderMaterials={reorderMaterials}
               onPreviewMaterial={handlePreviewMaterial}
               onStartEditing={startMaterialEditing}
               onEditNameChange={setEditMaterialName}
@@ -177,6 +181,10 @@ export function HomePage() {
               onOpenExtractModal={() => setExtractModalOpen(true)}
               onGenerateMeta={handleGenerateMeta}
               isGeneratingMeta={isGeneratingMeta}
+              onTranslate={handleTranslate}
+              isTranslating={isTranslating}
+              hasOriginalText={originalText !== null}
+              onRestoreOriginal={handleRestoreOriginal}
             />
 
             {/* 标题和字幕设置 */}
@@ -207,8 +215,6 @@ export function HomePage() {
               onTitleTopMarginChange={setTitleTopMargin}
               subtitleBottomMargin={subtitleBottomMargin}
               onSubtitleBottomMarginChange={setSubtitleBottomMargin}
-              enableSubtitles={enableSubtitles}
-              onToggleSubtitles={setEnableSubtitles}
               resolveAssetUrl={resolveAssetUrl}
               getFontFormat={getFontFormat}
               buildTextShadow={buildTextShadow}
@@ -276,7 +282,8 @@ export function HomePage() {
             <GenerateActionBar
               isGenerating={isGenerating}
               progress={currentTask?.progress || 0}
-              disabled={isGenerating || !selectedMaterial || (ttsMode === "voiceclone" && !selectedRefAudio)}
+              materialCount={selectedMaterials.length}
+              disabled={isGenerating || selectedMaterials.length === 0 || (ttsMode === "voiceclone" && !selectedRefAudio)}
               onGenerate={handleGenerate}
             />
           </div>
diff --git a/frontend/src/features/home/ui/MaterialSelector.tsx b/frontend/src/features/home/ui/MaterialSelector.tsx
index 11f364e..78f7ad7 100644
--- a/frontend/src/features/home/ui/MaterialSelector.tsx
+++ b/frontend/src/features/home/ui/MaterialSelector.tsx
@@ -1,17 +1,25 @@
-import type { ChangeEvent, MouseEvent } from "react";
-import { Upload, RefreshCw, Eye, Trash2, X, Pencil, Check } from "lucide-react";
-
-interface Material {
-  id: string;
-  name: string;
-  scene: string;
-  size_mb: number;
-  path: string;
-}
+import { type ChangeEvent, type MouseEvent } from "react";
+import { Upload, RefreshCw, Eye, Trash2, X, Pencil, Check, GripVertical } from "lucide-react";
+import type { Material } from "@/shared/types/material";
+import {
+  DndContext,
+  closestCenter,
+  KeyboardSensor,
+  PointerSensor,
+  useSensor,
+  useSensors,
+  type DragEndEvent,
+} from "@dnd-kit/core";
+import {
+  SortableContext,
+  horizontalListSortingStrategy,
+  useSortable,
+} from "@dnd-kit/sortable";
+import { CSS } from "@dnd-kit/utilities";
 
 interface MaterialSelectorProps {
   materials: Material[];
-  selectedMaterial: string;
+  selectedMaterials: string[];
   isFetching: boolean;
   lastMaterialCount: number;
   editingMaterialId: string | null;
@@ -23,7 +31,8 @@ interface MaterialSelectorProps {
   apiBase: string;
   onUploadChange: (event: ChangeEvent<HTMLInputElement>) => void;
   onRefresh: () => void;
-  onSelectMaterial: (id: string) => void;
+  onToggleMaterial: (id: string) => void;
+  onReorderMaterials: (activeId: string, overId: string) => void;
   onPreviewMaterial: (path: string) => void;
   onStartEditing: (material: Material, event: MouseEvent) => void;
   onEditNameChange: (value: string) => void;
@@ -34,9 +43,64 @@ interface MaterialSelectorProps {
   registerMaterialRef: (id: string, element: HTMLDivElement | null) => void;
 }
 
+function SortableChip({
+  id,
+  index,
+  label,
+  onRemove,
+}: {
+  id: string;
+  index: number;
+  label: string;
+  onRemove: () => void;
+}) {
+  const {
+    attributes,
+    listeners,
+    setNodeRef,
+    transform,
+    transition,
+    isDragging,
+  } = useSortable({ id });
+
+  const style = {
+    transform: CSS.Translate.toString(transform),
+    transition,
+  };
+
+  const circledNumbers = ["\u2460", "\u2461", "\u2462", "\u2463", "\u2464", "\u2465", "\u2466", "\u2467", "\u2468", "\u2469"];
+
+  return (
+    <div
+      ref={setNodeRef}
+      style={style}
+      className={`flex items-center gap-1 rounded-lg px-2 py-1 text-xs whitespace-nowrap transition-colors ${
+        isDragging
+          ? "bg-purple-500/50 border border-purple-400 text-white shadow-lg shadow-purple-500/30 z-10"
+          : "bg-purple-500/30 border border-purple-500/50 text-purple-200"
+      }`}
+    >
+      <span {...attributes} {...listeners} className="cursor-grab active:cursor-grabbing text-purple-400">
+        <GripVertical className="h-3 w-3" />
+      </span>
+      <span className="text-purple-300">{circledNumbers[index] || `${index + 1}`}</span>
+      <span className="max-w-[80px] truncate">{label}</span>
+      <button
+        onClick={(e) => {
+          e.stopPropagation();
+          onRemove();
+        }}
+        className="text-purple-400 hover:text-white ml-0.5"
+      >
+        <X className="h-3 w-3" />
+      </button>
+    </div>
+  );
+}
+
 export function MaterialSelector({
   materials,
-  selectedMaterial,
+  selectedMaterials,
   isFetching,
   lastMaterialCount,
   editingMaterialId,
@@ -48,7 +112,8 @@ export function MaterialSelector({
   apiBase,
   onUploadChange,
   onRefresh,
-  onSelectMaterial,
+  onToggleMaterial,
+  onReorderMaterials,
   onPreviewMaterial,
   onStartEditing,
   onEditNameChange,
@@ -58,20 +123,36 @@ export function MaterialSelector({
   onClearUploadError,
   registerMaterialRef,
 }: MaterialSelectorProps) {
+  const sensors = useSensors(
+    useSensor(PointerSensor, { activationConstraint: { distance: 5 } }),
+    useSensor(KeyboardSensor)
+  );
+
+  const handleDragEnd = (event: DragEndEvent) => {
+    const { active, over } = event;
+    if (over && active.id !== over.id) {
+      onReorderMaterials(String(active.id), String(over.id));
+    }
+  };
+
+  const selectedSet = new Set(selectedMaterials);
+  const isFull = selectedMaterials.length >= 4;
+  const circledNumbers = ["\u2460", "\u2461", "\u2462", "\u2463", "\u2464", "\u2465", "\u2466", "\u2467", "\u2468", "\u2469"];
+
   return (
     <div className="bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
       <div className="flex justify-between items-center gap-2 mb-4">
         <h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap">
           📹 视频素材
           <span className="ml-1 text-[11px] sm:text-xs text-gray-400/90 font-normal">
-            (上传自拍视频)
+            (可多选，最多4个)
           </span>
         </h2>
         <div className="flex gap-1.5">
           <input
             type="file"
             id="video-upload"
-            accept=".mp4,.mov,.avi"
+            accept="video/*"
             onChange={onUploadChange}
             className="hidden"
           />
@@ -119,6 +200,38 @@ export function MaterialSelector({
         </div>
       )}
 
+      {/* 已选素材排列（拖拽排序区） - 仅当选中 >= 2 个时显示 */}
+      {selectedMaterials.length >= 2 && (
+        <div className="mb-3 p-3 bg-purple-500/10 rounded-xl border border-purple-500/20">
+          <div className="text-[11px] text-purple-300/70 mb-2">🎬 机位顺序 (拖拽调整)</div>
+          <DndContext
+            sensors={sensors}
+            collisionDetection={closestCenter}
+            onDragEnd={handleDragEnd}
+          >
+            <SortableContext
+              items={selectedMaterials}
+              strategy={horizontalListSortingStrategy}
+            >
+              <div className="flex flex-wrap gap-1.5">
+                {selectedMaterials.map((id, index) => {
+                  const m = materials.find((x) => x.id === id);
+                  return (
+                    <SortableChip
+                      key={id}
+                      id={id}
+                      index={index}
+                      label={m?.scene || m?.name || id}
+                      onRemove={() => onToggleMaterial(id)}
+                    />
+                  );
+                })}
+              </div>
+            </SortableContext>
+          </DndContext>
+        </div>
+      )}
+
       {fetchError ? (
         <div className="p-4 bg-red-500/20 text-red-200 rounded-xl text-sm mb-4">
           获取素材失败: {fetchError}
@@ -126,7 +239,7 @@ export function MaterialSelector({
           API: {apiBase}/api/materials/
         </div>
       ) : isFetching && materials.length === 0 ? (
-        <div className="space-y-2 max-h-64 overflow-y-auto hide-scrollbar" style={{ contentVisibility: 'auto' }}>
+        <div className="space-y-2 max-h-48 sm:max-h-64 overflow-y-auto hide-scrollbar" style={{ contentVisibility: 'auto' }}>
           {Array.from({ length: Math.min(4, Math.max(1, lastMaterialCount || 1)) }).map((_, index) => (
             <div
               key={`material-skeleton-${index}`}
@@ -147,82 +260,99 @@ export function MaterialSelector({
         </div>
       ) : (
         <div
-          className="space-y-2 max-h-64 overflow-y-auto hide-scrollbar"
+          className="space-y-2 max-h-48 sm:max-h-64 overflow-y-auto hide-scrollbar"
           style={{ contentVisibility: 'auto' }}
         >
-          {materials.map((m) => (
-            <div
-              key={m.id}
-              ref={(el) => registerMaterialRef(m.id, el)}
-              className={`p-3 rounded-lg border transition-all flex items-center justify-between group ${selectedMaterial === m.id
-                ? "border-purple-500 bg-purple-500/20"
-                : "border-white/10 bg-white/5 hover:border-white/30"
-                }`}
-            >
-              {editingMaterialId === m.id ? (
-                <div className="flex-1 flex items-center gap-2" onClick={(e) => e.stopPropagation()}>
-                  <input
-                    value={editMaterialName}
-                    onChange={(e) => onEditNameChange(e.target.value)}
-                    className="flex-1 bg-black/40 border border-white/20 rounded-md px-2 py-1 text-xs text-white"
-                    autoFocus
-                  />
-                  <button
-                    onClick={(e) => onSaveEditing(m.id, e)}
-                    className="p-1 text-green-400 hover:text-green-300"
-                    title="保存"
-                  >
-                    <Check className="h-4 w-4" />
-                  </button>
-                  <button
-                    onClick={onCancelEditing}
-                    className="p-1 text-gray-400 hover:text-white"
-                    title="取消"
-                  >
-                    <X className="h-4 w-4" />
-                  </button>
-                </div>
-              ) : (
-                <button onClick={() => onSelectMaterial(m.id)} className="flex-1 text-left">
-                  <div className="text-white text-sm truncate">{m.scene || m.name}</div>
-                  <div className="text-gray-400 text-xs">{m.size_mb.toFixed(1)} MB</div>
-                </button>
-              )}
-              <div className="flex items-center gap-2 pl-2">
-                <button
-                  onClick={(e) => {
-                    e.stopPropagation();
-                    if (m.path) {
-                      onPreviewMaterial(m.path);
-                    }
-                  }}
-                  className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
-                  title="预览视频"
-                >
-                  <Eye className="h-4 w-4" />
-                </button>
-                {editingMaterialId !== m.id && (
-                  <button
-                    onClick={(e) => onStartEditing(m, e)}
-                    className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
-                    title="重命名"
-                  >
-                    <Pencil className="h-4 w-4" />
+          {materials.map((m) => {
+            const isSelected = selectedSet.has(m.id);
+            const selIndex = selectedMaterials.indexOf(m.id);
+            return (
+              <div
+                key={m.id}
+                ref={(el) => registerMaterialRef(m.id, el)}
+                className={`p-3 rounded-lg border transition-all flex items-center justify-between group ${isSelected
+                  ? "border-purple-500 bg-purple-500/20"
+                  : isFull
+                    ? "border-white/5 bg-white/[0.02] opacity-50 cursor-not-allowed"
+                    : "border-white/10 bg-white/5 hover:border-white/30"
+                  }`}
+              >
+                {editingMaterialId === m.id ? (
+                  <div className="flex-1 flex items-center gap-2" onClick={(e) => e.stopPropagation()}>
+                    <input
+                      value={editMaterialName}
+                      onChange={(e) => onEditNameChange(e.target.value)}
+                      className="flex-1 bg-black/40 border border-white/20 rounded-md px-2 py-1 text-xs text-white"
+                      autoFocus
+                    />
+                    <button
+                      onClick={(e) => onSaveEditing(m.id, e)}
+                      className="p-1 text-green-400 hover:text-green-300"
+                      title="保存"
+                    >
+                      <Check className="h-4 w-4" />
+                    </button>
+                    <button
+                      onClick={onCancelEditing}
+                      className="p-1 text-gray-400 hover:text-white"
+                      title="取消"
+                    >
+                      <X className="h-4 w-4" />
+                    </button>
+                  </div>
+                ) : (
+                  <button onClick={() => onToggleMaterial(m.id)} className="flex-1 text-left flex items-center gap-2">
+                    {/* 复选框 */}
+                    <span
+                      className={`flex-shrink-0 w-4 h-4 rounded border flex items-center justify-center text-[10px] ${isSelected
+                        ? "border-purple-500 bg-purple-500 text-white"
+                        : "border-white/30 text-transparent"
+                        }`}
+                    >
+                      {isSelected ? (selIndex >= 0 ? circledNumbers[selIndex] || "✓" : "✓") : ""}
+                    </span>
+                    <div className="min-w-0">
+                      <div className="text-white text-sm truncate">{m.scene || m.name}</div>
+                      <div className="text-gray-400 text-xs">{m.size_mb.toFixed(1)} MB</div>
+                    </div>
                   </button>
                 )}
-                <button
-                  onClick={(e) => {
-                    e.stopPropagation();
-                    onDeleteMaterial(m.id);
-                  }}
-                  className="p-1 text-gray-500 hover:text-red-400 opacity-0 group-hover:opacity-100 transition-opacity"
-                  title="删除素材"
-                >
-                  <Trash2 className="h-4 w-4" />
-                </button>
+                <div className="flex items-center gap-2 pl-2">
+                  <button
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      if (m.path) {
+                        onPreviewMaterial(m.path);
+                      }
+                    }}
+                    className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
+                    title="预览视频"
+                  >
+                    <Eye className="h-4 w-4" />
+                  </button>
+                  {editingMaterialId !== m.id && (
+                    <button
+                      onClick={(e) => onStartEditing(m, e)}
+                      className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
+                      title="重命名"
+                    >
+                      <Pencil className="h-4 w-4" />
+                    </button>
+                  )}
+                  <button
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      onDeleteMaterial(m.id);
+                    }}
+                    className="p-1 text-gray-500 hover:text-red-400 opacity-0 group-hover:opacity-100 transition-opacity"
+                    title="删除素材"
+                  >
+                    <Trash2 className="h-4 w-4" />
+                  </button>
+                </div>
               </div>
-            </div>
-          ))}
+            );
+          })}
         </div>
       )}
     </div>
diff --git a/frontend/src/features/home/ui/ScriptEditor.tsx b/frontend/src/features/home/ui/ScriptEditor.tsx
index 1830df8..e8f0875 100644
--- a/frontend/src/features/home/ui/ScriptEditor.tsx
+++ b/frontend/src/features/home/ui/ScriptEditor.tsx
@@ -1,4 +1,17 @@
-import { FileText, Loader2, Sparkles } from "lucide-react";
+import { useEffect, useRef, useState } from "react";
+import { FileText, Languages, Loader2, RotateCcw, Sparkles } from "lucide-react";
+
+const LANGUAGES = [
+  { code: "English", label: "英语 English" },
+  { code: "日本語", label: "日语 日本語" },
+  { code: "한국어", label: "韩语 한국어" },
+  { code: "Français", label: "法语 Français" },
+  { code: "Deutsch", label: "德语 Deutsch" },
+  { code: "Español", label: "西班牙语 Español" },
+  { code: "Русский", label: "俄语 Русский" },
+  { code: "Italiano", label: "意大利语 Italiano" },
+  { code: "Português", label: "葡萄牙语 Português" },
+];
 
 interface ScriptEditorProps {
   text: string;
@@ -6,6 +19,10 @@ interface ScriptEditorProps {
   onOpenExtractModal: () => void;
   onGenerateMeta: () => void;
   isGeneratingMeta: boolean;
+  onTranslate: (targetLang: string) => void;
+  isTranslating: boolean;
+  hasOriginalText: boolean;
+  onRestoreOriginal: () => void;
 }
 
 export function ScriptEditor({
@@ -14,14 +31,37 @@ export function ScriptEditor({
   onOpenExtractModal,
   onGenerateMeta,
   isGeneratingMeta,
+  onTranslate,
+  isTranslating,
+  hasOriginalText,
+  onRestoreOriginal,
 }: ScriptEditorProps) {
+  const [showLangMenu, setShowLangMenu] = useState(false);
+  const langMenuRef = useRef<HTMLDivElement>(null);
+
+  useEffect(() => {
+    if (!showLangMenu) return;
+    const handleClickOutside = (e: MouseEvent) => {
+      if (langMenuRef.current && !langMenuRef.current.contains(e.target as Node)) {
+        setShowLangMenu(false);
+      }
+    };
+    document.addEventListener("mousedown", handleClickOutside);
+    return () => document.removeEventListener("mousedown", handleClickOutside);
+  }, [showLangMenu]);
+
+  const handleSelectLang = (langCode: string) => {
+    setShowLangMenu(false);
+    onTranslate(langCode);
+  };
+
   return (
-    <div className="bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
-      <div className="flex flex-wrap justify-between items-center gap-2 mb-4">
-        <h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap">
+    <div className="relative z-10 bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
+      <div className="mb-4 space-y-3">
+        <h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2">
           ✍️ 文案提取与编辑
         </h2>
-        <div className="flex gap-2 flex-shrink-0">
+        <div className="flex gap-2 flex-wrap justify-end">
           <button
             onClick={onOpenExtractModal}
             className="px-2 py-1 text-xs rounded transition-all whitespace-nowrap bg-purple-600 hover:bg-purple-700 text-white flex items-center gap-1"
@@ -29,6 +69,54 @@ export function ScriptEditor({
             <FileText className="h-3.5 w-3.5" />
             文案提取助手
           </button>
+          <div className="relative" ref={langMenuRef}>
+            <button
+              onClick={() => setShowLangMenu((prev) => !prev)}
+              disabled={isTranslating || !text.trim()}
+              className={`px-2 py-1 text-xs rounded transition-all whitespace-nowrap ${
+                isTranslating || !text.trim()
+                  ? "bg-gray-600 cursor-not-allowed text-gray-400"
+                  : "bg-gradient-to-r from-emerald-600 to-teal-600 hover:from-emerald-700 hover:to-teal-700 text-white"
+              }`}
+            >
+              {isTranslating ? (
+                <span className="flex items-center gap-1">
+                  <Loader2 className="h-3.5 w-3.5 animate-spin" />
+                  翻译中...
+                </span>
+              ) : (
+                <span className="flex items-center gap-1">
+                  <Languages className="h-3.5 w-3.5" />
+                  AI多语言
+                </span>
+              )}
+            </button>
+            {showLangMenu && (
+              <div className="absolute right-0 top-full mt-1 z-50 bg-gray-800 border border-white/10 rounded-lg shadow-xl py-1 min-w-[160px]">
+                {hasOriginalText && (
+                  <>
+                    <button
+                      onClick={() => { setShowLangMenu(false); onRestoreOriginal(); }}
+                      className="w-full text-left px-3 py-1.5 text-xs text-amber-400 hover:bg-white/10 transition-colors flex items-center gap-1"
+                    >
+                      <RotateCcw className="h-3 w-3" />
+                      还原原文
+                    </button>
+                    <div className="border-t border-white/10 my-1" />
+                  </>
+                )}
+                {LANGUAGES.map((lang) => (
+                  <button
+                    key={lang.code}
+                    onClick={() => handleSelectLang(lang.code)}
+                    className="w-full text-left px-3 py-1.5 text-xs text-gray-200 hover:bg-white/10 transition-colors"
+                  >
+                    {lang.label}
+                  </button>
+                ))}
+              </div>
+            )}
+          </div>
           <button
             onClick={onGenerateMeta}
             disabled={isGeneratingMeta || !text.trim()}
diff --git a/frontend/src/features/home/ui/TitleSubtitlePanel.tsx b/frontend/src/features/home/ui/TitleSubtitlePanel.tsx
index 618e014..324eb28 100644
--- a/frontend/src/features/home/ui/TitleSubtitlePanel.tsx
+++ b/frontend/src/features/home/ui/TitleSubtitlePanel.tsx
@@ -52,8 +52,6 @@ interface TitleSubtitlePanelProps {
   onTitleTopMarginChange: (value: number) => void;
   subtitleBottomMargin: number;
   onSubtitleBottomMarginChange: (value: number) => void;
-  enableSubtitles: boolean;
-  onToggleSubtitles: (value: boolean) => void;
   resolveAssetUrl: (path?: string | null) => string | null;
   getFontFormat: (fontFile?: string) => string;
   buildTextShadow: (color: string, size: number) => string;
@@ -82,8 +80,6 @@ export function TitleSubtitlePanel({
   onTitleTopMarginChange,
   subtitleBottomMargin,
   onSubtitleBottomMarginChange,
-  enableSubtitles,
-  onToggleSubtitles,
   resolveAssetUrl,
   getFontFormat,
   buildTextShadow,
@@ -117,7 +113,7 @@ export function TitleSubtitlePanel({
           subtitleFontSize={subtitleFontSize}
           titleTopMargin={titleTopMargin}
           subtitleBottomMargin={subtitleBottomMargin}
-          enableSubtitles={enableSubtitles}
+          enableSubtitles={true}
           resolveAssetUrl={resolveAssetUrl}
           getFontFormat={getFontFormat}
           buildTextShadow={buildTextShadow}
@@ -186,7 +182,7 @@ export function TitleSubtitlePanel({
         </div>
       )}
 
-      {enableSubtitles && subtitleStyles.length > 0 && (
+      {subtitleStyles.length > 0 && (
         <div className="mt-4">
           <label className="text-sm text-gray-300 mb-2 block">字幕样式</label>
           <div className="grid grid-cols-2 gap-2">
@@ -232,22 +228,6 @@ export function TitleSubtitlePanel({
           </div>
         </div>
       )}
-
-      <div className="mt-4 pt-4 border-t border-white/10 flex items-center justify-between">
-        <div>
-          <span className="text-sm text-gray-300">逐字高亮字幕</span>
-          <p className="text-xs text-gray-500 mt-1">自动生成卡拉OK效果字幕</p>
-        </div>
-        <label className="relative inline-flex items-center cursor-pointer">
-          <input
-            type="checkbox"
-            checked={enableSubtitles}
-            onChange={(e) => onToggleSubtitles(e.target.checked)}
-            className="sr-only peer"
-          />
-          <div className="w-11 h-6 bg-gray-600 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-purple-600"></div>
-        </label>
-      </div>
     </div>
   );
 }
diff --git a/frontend/src/shared/types/material.ts b/frontend/src/shared/types/material.ts
new file mode 100644
index 0000000..accd834
--- /dev/null
+++ b/frontend/src/shared/types/material.ts
@@ -0,0 +1,7 @@
+export interface Material {
+  id: string;
+  name: string;
+  path: string;
+  size_mb: number;
+  scene?: string;
+}
diff --git a/models/Qwen3-TTS/qwen_tts_server.py b/models/Qwen3-TTS/qwen_tts_server.py
index c53982c..a0d835c 100644
--- a/models/Qwen3-TTS/qwen_tts_server.py
+++ b/models/Qwen3-TTS/qwen_tts_server.py
@@ -134,10 +134,14 @@ async def generate(
     try:
         print(f"🎤 Generating: {text[:30]}...")
         print(f"📝 Ref text: {ref_text[:50]}...")
+        print(f"🌐 Language: {language}")
 
         start = time.time()
 
-        wavs, sr = _model.generate_voice_clone(
+        # 在线程池中运行，避免阻塞事件循环导致健康检查超时
+        import asyncio
+        wavs, sr = await asyncio.to_thread(
+            _model.generate_voice_clone,
             text=text,
             language=language,
             ref_audio=ref_audio_path,