Compare commits

...

2 Commits

Author SHA1 Message Date
Kevin Wong
3129d45b25 更新 2026-02-09 14:47:19 +08:00
Kevin Wong
e226224119 更新 2026-02-08 19:54:11 +08:00
50 changed files with 2708 additions and 1500 deletions

17
.gitignore vendored
View File

@@ -20,11 +20,14 @@ node_modules/
out/
.turbo/
# ============ IDE ============
# ============ IDE / AI 工具 ============
.vscode/
.idea/
*.swp
*.swo
.agents/
.opencode/
.claude/
# ============ 系统文件 ============
.DS_Store
@@ -35,11 +38,21 @@ desktop.ini
backend/outputs/
backend/uploads/
backend/cookies/
backend/user_data/
backend/debug_screenshots/
*_cookies.json
# ============ MuseTalk ============
# ============ 模型权重 ============
models/*/checkpoints/
models/MuseTalk/models/
models/MuseTalk/results/
models/LatentSync/temp/
# ============ Remotion 构建 ============
remotion/dist/
# ============ 临时文件 ============
Temp/
# ============ 日志 ============
*.log

View File

@@ -29,15 +29,15 @@ backend/
├── app/
│ ├── core/ # config、deps、security、response
│ ├── modules/ # 业务模块(路由 + 逻辑)
│ │ ├── videos/ # 视频生成任务
│ │ ├── materials/ # 素材管理
│ │ ├── videos/ # 视频生成任务router/schemas/service/workflow
│ │ ├── materials/ # 素材管理router/schemas/service
│ │ ├── publish/ # 多平台发布
│ │ ├── auth/ # 认证与会话
│ │ ├── ai/ # AI 功能(标题标签生成等)
│ │ ├── assets/ # 静态资源(字体/样式/BGM
│ │ ├── ref_audios/ # 声音克隆参考音频
│ │ ├── ref_audios/ # 声音克隆参考音频router/schemas/service
│ │ ├── login_helper/ # 扫码登录辅助
│ │ ├── tools/ # 工具接口
│ │ ├── tools/ # 工具接口router/schemas/service
│ │ └── admin/ # 管理员功能
│ ├── repositories/ # Supabase 数据访问
│ ├── services/ # 外部服务集成
@@ -124,10 +124,13 @@ backend/user_data/{user_uuid}/cookies/
## 8. 开发流程建议
- **新增功能**:先建模块,再写 router/service/workflow
- **修复 Bug**:顺手把涉及的逻辑抽到对应 service/workflow。
- **新增功能**:先建模块,**必须**包含 `router.py + schemas.py + service.py`,不允许 router-only
- **修复 Bug**:顺手把涉及的逻辑抽到对应 service/workflow(渐进式改造)
- **改旧模块**:改动哪部分就拆哪部分,不要求一次重构整个文件。
- **核心流程变更**:必跑冒烟(登录/生成/发布)。
> **渐进原则**:新代码高标准,旧代码逐步改。不做大规模一次性重构,避免引入回归风险。
---
## 9. 常用环境变量

View File

@@ -15,15 +15,15 @@ backend/
├── app/
│ ├── core/ # 核心配置 (config.py, security.py, response.py)
│ ├── modules/ # 业务模块 (router/service/workflow/schemas)
│ │ ├── videos/ # 视频生成任务
│ │ ├── materials/ # 素材管理
│ │ ├── videos/ # 视频生成任务router/schemas/service/workflow
│ │ ├── materials/ # 素材管理router/schemas/service
│ │ ├── publish/ # 多平台发布
│ │ ├── auth/ # 认证与会话
│ │ ├── ai/ # AI 功能(标题标签生成)
│ │ ├── assets/ # 静态资源(字体/样式/BGM
│ │ ├── ref_audios/ # 声音克隆参考音频
│ │ ├── ref_audios/ # 声音克隆参考音频router/schemas/service
│ │ ├── login_helper/ # 扫码登录辅助
│ │ ├── tools/ # 工具接口(文案提取等
│ │ ├── tools/ # 工具接口(router/schemas/service
│ │ └── admin/ # 管理员功能
│ ├── repositories/ # Supabase 数据访问
│ ├── services/ # 外部服务集成 (TTS/Remotion/Storage/Uploader 等)
@@ -108,6 +108,8 @@ backend/
`POST /api/videos/generate` 支持以下可选字段:
- `material_path`: 视频素材路径(单素材模式)
- `material_paths`: 多素材路径数组多机位模式≥2 个素材时按句子自动切换)
- `tts_mode`: TTS 模式 (`edgetts` / `voiceclone`)
- `voice`: EdgeTTS 音色 IDedgetts 模式)
- `ref_audio_id` / `ref_text`: 参考音频 ID 与文本voiceclone 模式)

View File

@@ -342,6 +342,6 @@ models/Qwen3-TTS/
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day11.md](./Day11.md) - 上传架构重构
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 部署指南

View File

@@ -273,7 +273,7 @@ pm2 logs vigent2-qwen-tts --lines 50
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day12.md](./Day12.md) - iOS 兼容与 Qwen3-TTS 部署
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 部署指南
- [SUBTITLE_DEPLOY.md](../SUBTITLE_DEPLOY.md) - 字幕功能部署指南

View File

@@ -397,6 +397,6 @@ if ((status === 401 || status === 403) && !isRedirecting && !isPublicPath) {
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day13.md](./Day13.md) - 声音克隆功能集成 + 字幕功能
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 1.7B 部署指南

View File

@@ -342,7 +342,7 @@ pm2 restart vigent2-backend vigent2-frontend
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day14.md](./Day14.md) - 模型升级 + AI 标题标签
- [AUTH_DEPLOY.md](../AUTH_DEPLOY.md) - 认证系统部署指南

View File

@@ -136,4 +136,4 @@ if service["failures"] >= service['threshold']:
- [x] `Docs/QWEN3_TTS_DEPLOY.md`: 添加 Flash Attention 安装指南
- [x] `Docs/DEPLOY_MANUAL.md`: 添加 Watchdog 部署说明
- [x] `Docs/task_complete.md`: 更新进度至 100% (Day 16)
- [x] `Docs/TASK_COMPLETE.md`: 更新进度至 100% (Day 16)

View File

@@ -246,3 +246,204 @@ PLATFORM_CONFIGS = {
pm2 restart vigent2-backend # 发布服务 + QR登录
npm run build && pm2 restart vigent2-frontend # 刷脸验证UI
```
---
## 🏗️ 架构优化:前端结构微调 + 后端模块分层 (Day 21)
### 概述
根据架构审计结果,完成前端目录规范化和后端核心模块的分层补全。
### 一、前端结构微调
#### 1. ScriptExtractionModal 迁移
- `components/ScriptExtractionModal.tsx``features/home/ui/ScriptExtractionModal.tsx`
- 连带 `components/script-extraction/` 目录一并迁移到 `features/home/ui/script-extraction/`
- 更新 `HomePage.tsx` 的 import 路径
#### 2. contexts/ 目录归并
- `src/contexts/AuthContext.tsx``src/shared/contexts/AuthContext.tsx`
- `src/contexts/TaskContext.tsx``src/shared/contexts/TaskContext.tsx`
- 更新 6 处 importlayout.tsx, useHomeController.ts, usePublishController.ts, AccountSettingsDropdown.tsx, GlobalTaskIndicator.tsx
- 删除空的 `src/contexts/` 目录
#### 3. 清理重构遗留空目录
- 删除 `src/lib/``src/components/home/``src/hooks/`
### 二、后端模块分层补全
将 3 个 400+ 行的 router-only 模块拆分为 `router.py + schemas.py + service.py`
| 模块 | 改造前 | 改造后 router |
|------|--------|--------------|
| `materials/` | 416 行 | 63 行 |
| `tools/` | 417 行 | 33 行 |
| `ref_audios/` | 421 行 | 71 行 |
业务逻辑全部提取到 `service.py`,数据模型定义在 `schemas.py`router 只做参数校验 + 调用 service + 返回响应。
### 三、开发规范更新
`BACKEND_DEV.md` 第 8 节新增渐进原则:
- 新模块**必须**包含 `router.py + schemas.py + service.py`
- 改旧模块时顺手拆涉及的部分
- 新代码高标准,旧代码逐步改
### 涉及文件汇总
| 文件 | 变更 |
|------|------|
| `frontend/src/features/home/ui/ScriptExtractionModal.tsx` | 从 components/ 迁入 |
| `frontend/src/features/home/ui/script-extraction/` | 从 components/ 迁入 |
| `frontend/src/shared/contexts/AuthContext.tsx` | 从 contexts/ 迁入 |
| `frontend/src/shared/contexts/TaskContext.tsx` | 从 contexts/ 迁入 |
| `backend/app/modules/materials/schemas.py` | **新建** |
| `backend/app/modules/materials/service.py` | **新建** |
| `backend/app/modules/materials/router.py` | 精简为薄路由 |
| `backend/app/modules/tools/schemas.py` | **新建** |
| `backend/app/modules/tools/service.py` | **新建** |
| `backend/app/modules/tools/router.py` | 精简为薄路由 |
| `backend/app/modules/ref_audios/schemas.py` | **新建** |
| `backend/app/modules/ref_audios/service.py` | **新建** |
| `backend/app/modules/ref_audios/router.py` | 精简为薄路由 |
| `Docs/BACKEND_DEV.md` | 目录结构标注分层、新增渐进原则 |
| `Docs/BACKEND_README.md` | 目录结构标注分层 |
| `Docs/FRONTEND_DEV.md` | 更新目录结构contexts 迁移、ScriptExtractionModal 迁移) |
### 重启要求
```bash
pm2 restart vigent2-backend
npm run build && pm2 restart vigent2-frontend
```
---
## 🎬 多素材视频生成(多机位效果)
### 概述
支持用户上传多个不同角度的自拍视频,生成视频时按句子自动切换素材,最终效果类似多机位拍摄。单素材时走原有流程,无额外开销。
### 核心架构
#### 流水线变更
```
【单素材(不变)】
text → TTS → audio → LatentSync(1个素材+完整audio) → Whisper字幕 → Remotion → 成片
【多素材(新增)】
text → TTS → audio → Whisper字幕(提前) → 按素材数量均分时长(对齐字边界)
→ 对每段: 切分audio + LatentSync(素材[i]+音频片段[i])
→ FFmpeg拼接所有片段 → Remotion(完整字幕时间戳) → 成片
```
#### 素材切换逻辑(均分方案)
1. Whisper 对完整音频转录,得到字级别时间戳
2. 按素材数量**均分音频总时长**`total_duration / N`
3. 每个分割点对齐到最近的 Whisper 字边界,避免在字中间切分
4. 首段 start 扩展为 0.0,末段 end 扩展为音频结尾,确保完整覆盖
> **设计决策**:最初方案基于原始文案标点分句,但用户文案往往不含句号(只有逗号),导致只产生 1 段。改为均分方案后不依赖文案标点,对任何输入都能正确切分。
---
### 一、后端改动
#### 1. `backend/app/modules/videos/schemas.py`
- 新增 `material_paths: Optional[List[str]]` 字段
- 保留 `material_path: str` 向后兼容
#### 2. `backend/app/modules/videos/workflow.py`(核心改动)
**新增函数**
- `_split_equal(segments, material_paths)`: 按素材数量均分音频时长,对齐到最近的 Whisper 字边界
**修改 `process_video_generation()`**
- `is_multi = len(material_paths) > 1` 判断走多素材/单素材分支
- 多素材分支Whisper 提前 → 均分切分 → 音频切分 → 逐段 LatentSync → FFmpeg 拼接
#### 3. `backend/app/services/video_service.py`
- 新增 `concat_videos()`: FFmpeg concat demuxer (`-c copy`) 拼接视频片段
- 新增 `split_audio()`: FFmpeg 按时间范围切分音频 (`-ss` + `-t` + `-c copy`)
#### 4. `backend/scripts/watchdog.py`
- 健康检查阈值从 3 次提高到 5 次(容忍期 2.5 分钟)
- 新增重启后 120 秒冷却期,避免模型加载期间被误判为故障
- 启动时给所有服务 60 秒初始冷却期
---
### 二、前端改动
#### 1. 新增依赖
```bash
npm install @dnd-kit/core @dnd-kit/sortable @dnd-kit/utilities
```
#### 2. `frontend/src/features/home/model/useMaterials.ts`
- `selectedMaterial: string``selectedMaterials: string[]`(多选)
- 新增 `toggleMaterial(id)`: 切换选中/取消至少保留1个
- 新增 `reorderMaterials(activeId, overId)`: 拖拽排序
- 上传格式扩展:新增 `.mkv/.webm/.flv/.wmv/.m4v/.ts/.mts`
#### 3. `frontend/src/features/home/ui/MaterialSelector.tsx`(重写)
- 素材列表每行增加复选框 + 序号徽标(①②③)
- 选中 ≥2 个时显示拖拽排序区(@dnd-kit `SortableContext`
- 每个排序项:拖拽把手 + 序号 + 素材名 + 移除按钮
- HTML input accept 改为 `video/*`
#### 4. `frontend/src/features/home/model/useHomeController.ts`
- 多素材 payload`material_paths` 数组 + `material_path` 向后兼容
- `enable_subtitles` 硬编码为 `true`(移除开关)
- 验证:至少选中 1 个素材
#### 5. `frontend/src/features/home/model/useHomePersistence.ts`
- 素材持久化改为 JSON 数组,向后兼容旧格式(单字符串)
- 移除 `enableSubtitles` 持久化
#### 6. `frontend/src/features/home/ui/TitleSubtitlePanel.tsx`
- 移除"逐字高亮字幕"开关,字幕样式区始终显示
#### 7. `frontend/src/features/home/ui/HomePage.tsx`
- 更新 props 传递(`selectedMaterials`, `toggleMaterial`, `reorderMaterials`
---
### 三、Bug 修复记录
#### BUG-1: 多素材只使用第一个视频(基于标点的分句方案失败)
- **现象**: 选了 2 个素材但生成的视频只使用第 1 个,日志显示 `Multi-material: 1 segments, 2 materials`
- **根因 v1**: 最初通过正则 `[。!?!?]` 在 Whisper 输出中分句,但 Whisper 不输出标点。
- **修复 v1**: 改为用原始文案标点分句——但用户文案往往只含逗号(,),无句末标点(。!?),仍退化为 1 段。
- **最终修复**: 彻底放弃基于标点的分句方案,改为 `_split_equal()` **按素材数量均分音频时长**,对齐到最近的 Whisper 字边界。不依赖任何标点符号,对所有文案均有效。
#### BUG-2: 口型对不上(音频时间偏移)
- **根因**: `split_audio` 用 Whisper 的 start/end 时间(如 0.11~7.21)切分音频,但 `compose()` 用完整原始音频0.0~结尾)合成,导致时间偏移。
- **修复**: 强制首段 start=0.0,末段 end=音频实际时长,确保切分音频完整覆盖。
#### BUG-3: min_segment_sec 过度合并导致退化(已随方案切换移除)
- **根因**: 旧方案中 2 个句子第 2 句不足 3 秒时,最短时长检查合并为 1 段,多素材退化为单素材。
- **状态**: 均分方案不存在此问题,相关代码已移除。
---
### 涉及文件汇总
| 文件 | 变更类型 | 说明 |
|------|----------|------|
| `backend/app/modules/videos/schemas.py` | 修改 | 新增 material_paths 字段 |
| `backend/app/modules/videos/workflow.py` | 修改 | 多素材流水线核心逻辑 + 3个 Bug 修复 |
| `backend/app/services/video_service.py` | 修改 | 新增 concat_videos / split_audio |
| `backend/scripts/watchdog.py` | 修改 | 阈值优化 + 冷却期机制 |
| `frontend/package.json` | 修改 | 新增 @dnd-kit 依赖 |
| `frontend/src/features/home/model/useMaterials.ts` | 修改 | 多选 + 排序状态管理 |
| `frontend/src/features/home/ui/MaterialSelector.tsx` | 重写 | 多选复选框 + 拖拽排序 UI |
| `frontend/src/features/home/model/useHomeController.ts` | 修改 | 多素材 payload + 移除字幕开关 |
| `frontend/src/features/home/model/useHomePersistence.ts` | 修改 | JSON 数组持久化 |
| `frontend/src/features/home/ui/TitleSubtitlePanel.tsx` | 修改 | 移除字幕开关 |
| `frontend/src/features/home/ui/HomePage.tsx` | 修改 | 更新 props 传递 |
### 重启要求
```bash
pm2 restart vigent2-backend
npm run build && pm2 restart vigent2-frontend
```

221
Docs/DevLogs/Day22.md Normal file
View File

@@ -0,0 +1,221 @@
## 🔧 多素材生成优化与健壮性加固 (Day 22)
### 概述
对 Day 21 实现的多素材视频生成(多机位)功能进行全面审查,修复 6 个高优先级 Bug、完成 8 项体验优化,并将多素材流水线从"逐段 LatentSync"重构为"先拼接再推理"架构,推理次数从 N 次降为 1 次。
---
### 一、后端高优 Bug 修复
#### 1. `_split_equal()` 素材数 > 字符数边界溢出
- **问题**: 5 个素材但只有 2 个 Whisper 字符时,边界索引重复,部分素材被跳过
- **修复**: 加入 `n = min(n, len(all_chars))` 上限保护
- **文件**: `backend/app/modules/videos/workflow.py`
#### 2. 多素材 LatentSync 单段失败无 fallback
- **问题**: 单素材模式下 LatentSync 失败会 fallback 到原始素材,但多素材模式直接抛异常,整个任务失败
- **修复**: 多素材循环中加 try-except失败时 fallback 到原始素材片段
- **文件**: `backend/app/modules/videos/workflow.py`
#### 3. `num_segments == 0` 时 ZeroDivisionError
- **问题**: 所有 assignments 被跳过后 `i / num_segments` 触发除零
- **修复**: 循环前加 `if num_segments == 0` 检查并抛出明确错误
- **文件**: `backend/app/modules/videos/workflow.py`
#### 4. `split_audio` 未校验 duration > 0
- **问题**: `end <= start` 时 FFmpeg 行为异常
- **修复**: 加入 `if duration <= 0: raise ValueError(...)`
- **文件**: `backend/app/services/video_service.py`
#### 5. Whisper 失败时按时长均分兜底
- **问题**: Whisper 失败后直接退化为单素材,其他素材被浪费
- **修复**: 按 `audio_duration / len(material_paths)` 均分,不依赖字符对齐
- **文件**: `backend/app/modules/videos/workflow.py`
#### 6. `concat_videos` 空列表未检查
- **问题**: 传入空 `video_paths` 时 FFmpeg 报错
- **修复**: 加入 `if not video_paths: raise ValueError(...)`
- **文件**: `backend/app/services/video_service.py`
---
### 二、前端优化
#### 1. payload 构建非空断言修复
- `m!.path``m?.path` + `.filter(Boolean)`,防止素材被删后 crash
- **文件**: `frontend/src/features/home/model/useHomeController.ts`
#### 2. 生成按钮展示后端进度消息
- 新增 `message` prop生成中显示如"(正在处理片段 2/3...)"
- **文件**: `frontend/src/features/home/ui/GenerateActionBar.tsx`, `HomePage.tsx`
#### 3. 新上传素材自动选中
- 上传成功后对比前后素材列表,新增的 ID 自动追加到 `selectedMaterials`
- **文件**: `frontend/src/features/home/model/useMaterials.ts`
#### 4. Material 接口统一
- 三处 `interface Material` 重复定义提取到 `shared/types/material.ts`
- **文件**: `frontend/src/shared/types/material.ts` (新建), `useMaterials.ts`, `useHomeController.ts`, `MaterialSelector.tsx`
#### 5. 拖拽排序修复
- 移除 `DragOverlay``backdrop-blur` 创建新 containing block 导致定位错乱)
- 改为 `useSortable` 原生拖拽 + `CSS.Translate`,拖拽中元素高亮加阴影
- **文件**: `frontend/src/features/home/ui/MaterialSelector.tsx`
#### 6. 素材选择上限 4 个
- `toggleMaterial` 新增 `MAX_MATERIALS = 4` 限制
- UI 选满后未选中项变半透明禁用,提示文字改为"可多选最多4个"
- **文件**: `useMaterials.ts`, `MaterialSelector.tsx`
#### 7. 移动端排序区域响应式
- 素材列表 `max-h-64``max-h-48 sm:max-h-64`
- **文件**: `MaterialSelector.tsx`
#### 8. 多素材耗时提示
- 选中 ≥2 素材时生成按钮下方显示"多素材模式 (N 个机位),生成耗时较长"
- **文件**: `GenerateActionBar.tsx`, `HomePage.tsx`
---
### 三、核心架构重构:先拼接再推理
#### V1 (Day 21): 逐段 LatentSync
```
素材A → LatentSync(素材A, 音频片段1) → lipsync_A
素材B → LatentSync(素材B, 音频片段2) → lipsync_B
FFmpeg concat(lipsync_A, lipsync_B) → 最终视频
```
- 缺点N 个素材 = N 次 LatentSync 推理(每次 ~30s
#### V2 (Day 22): 先拼接再推理
```
素材A → prepare_segment(裁剪到3.67s) → prepared_A
素材B → prepare_segment(裁剪到4.00s) → prepared_B
FFmpeg concat(prepared_A, prepared_B) → concat_video (7.67s)
LatentSync(concat_video, 完整音频) → 最终视频
```
- 优点:只需 **1 次** LatentSync 推理,时间从 N×30s 降为 1×30s
#### 新增 `prepare_segment()` 方法
```python
def prepare_segment(self, video_path, target_duration, output_path, target_resolution=None):
# 素材时长 > 目标: 裁剪 (-t)
# 素材时长 < 目标: 循环 (-stream_loop) + 裁剪
# 分辨率一致: -c copy 无损 (不重编码)
# 分辨率不一致: scale + pad 统一到第一个素材分辨率
```
#### 分辨率处理策略
- 新增 `get_resolution()` 方法检测各素材分辨率
- 所有素材分辨率相同时:`-c copy` 无损裁剪(保持原画质)
- 分辨率不一致时:统一到第一个素材的分辨率,`force_original_aspect_ratio=decrease` + `pad` 居中
- LatentSync 只处理嘴部 512×512 区域,输出保持原分辨率
#### 时间对齐验证
| 环节 | 时间基准 | 对齐关系 |
|------|---------|---------|
| TTS 音频 | 原始时长 (7.67s) | 基准 |
| Whisper 字幕 | 基于 TTS 音频 | 时间戳对齐音频 |
| 均分切分 | assignments 总时长 = 音频时长 | 首段 start=0, 末段 end=audio_duration |
| prepare 各段 | `-t seg_dur` 精确截断 | 总和 ≈ 音频时长 |
| LatentSync | concat_video + 完整音频 | 内部 0.5s 容差 |
| compose | lipsync_video + 音频/BGM | `-shortest` 保证同步 |
| Remotion | 基于 captions_path 渲染字幕 | 时间戳对齐音频 |
---
### 涉及文件汇总
| 文件 | 变更类型 | 说明 |
|------|----------|------|
| `backend/app/modules/videos/workflow.py` | 修改 | 6 个 Bug 修复 + 流水线重构(先拼接再推理)|
| `backend/app/services/video_service.py` | 修改 | 新增 `prepare_segment()``get_resolution()``split_audio` 校验,`concat_videos` 空列表检查 |
| `frontend/src/shared/types/material.ts` | 新建 | 统一 Material 接口 |
| `frontend/src/features/home/model/useMaterials.ts` | 修改 | 上传自动选中、素材上限 4 个 |
| `frontend/src/features/home/model/useHomeController.ts` | 修改 | payload 非空断言修复、Material 接口引用 |
| `frontend/src/features/home/ui/MaterialSelector.tsx` | 修改 | 拖拽修复、上限 4 个 UI、移动端响应式 |
| `frontend/src/features/home/ui/GenerateActionBar.tsx` | 修改 | 进度消息展示、多素材耗时提示 |
| `frontend/src/features/home/ui/HomePage.tsx` | 修改 | 传递 message、materialCount prop |
---
### 四、AI 多语言翻译
#### 功能
在文案编辑区新增「AI多语言」按钮支持将中文口播文案一键翻译为 9 种语言,并可随时还原原文。
#### 支持语言
英语 English、日语 日本語、韩语 한국어、法语 Français、德语 Deutsch、西班牙语 Español、俄语 Русский、意大利语 Italiano、葡萄牙语 Português
#### 实现
##### 后端
- **`backend/app/services/glm_service.py`** — 新增 `translate_text()` 方法,调用智谱 GLM APItemperature=0.3prompt 要求只返回译文、保持语气风格
- **`backend/app/modules/ai/router.py`** — 新增 `POST /api/ai/translate` 接口,接收 `{text, target_lang}`,返回 `{translated_text}`
##### 前端
- **`frontend/src/features/home/ui/ScriptEditor.tsx`** — 新增 `LANGUAGES` 列表9 种语言)、语言下拉菜单(点击外部自动关闭)、翻译中 loading 状态、「还原原文」按钮(翻译过后出现在菜单顶部)
- **`frontend/src/features/home/model/useHomeController.ts`** — 新增 `handleTranslate`(调用翻译 API、首次翻译保存原文`originalText` 状态、`handleRestoreOriginal`(恢复原文)
#### 涉及文件
| 文件 | 变更 | 说明 |
|------|------|------|
| `backend/app/services/glm_service.py` | 修改 | 新增 `translate_text()` 方法 |
| `backend/app/modules/ai/router.py` | 修改 | 新增 `/api/ai/translate` 接口 |
| `frontend/src/features/home/ui/ScriptEditor.tsx` | 修改 | 语言菜单 UI、翻译 loading、还原原文按钮 |
| `frontend/src/features/home/model/useHomeController.ts` | 修改 | `handleTranslate``originalText``handleRestoreOriginal` |
---
### 五、TTS 多语言支持
#### 背景
翻译功能实现后,用户可将中文文案翻译为其他语言。但翻译后生成视频时 TTS 仍只支持中文:
- **EdgeTTS**:声音列表只有 5 个 `zh-CN-*` 中文声音
- **声音克隆 (Qwen3-TTS)**`language` 参数硬编码为 `"Chinese"`
#### 实现方案
##### 1. 前端:语言感知的声音列表
- `VOICES` 从扁平数组扩展为 `Record<string, VoiceOption[]>`,覆盖 10 种语言zh-CN / en-US / ja-JP / ko-KR / fr-FR / de-DE / es-ES / ru-RU / it-IT / pt-BR每种语言 2 个声音(男/女)
- 新增 `LANG_TO_LOCALE` 映射:翻译目标语言名 → EdgeTTS locale`"English" → "en-US"`
- 新增 `textLang` 状态,跟踪当前文案语言,默认 `"zh-CN"`
##### 2. 翻译时自动切换声音
- `handleTranslate` 成功后:根据目标语言设置 `textLang`EdgeTTS 模式下自动切换 `voice` 为目标语言的默认声音
- `handleRestoreOriginal` 还原时:重置 `textLang``"zh-CN"`,恢复中文默认声音
- `VoiceSelector` 根据 `textLang` 动态显示对应语言的声音列表
##### 3. 声音克隆语言透传
- 前端:新增 `LOCALE_TO_QWEN_LANG` 映射(`zh-CN→"Chinese"`, `en-US→"English"`, 其他→`"Auto"`
- 生成请求 payload 加入 `language` 字段(仅声音克隆模式)
- 后端 `GenerateRequest` schema 新增 `language: str = "Chinese"` 字段
- `workflow.py``language="Chinese"` 硬编码改为 `language=req.language`
##### 4. Bug 修复textLang 持久化
- **问题**: `voice` 已持久化但 `textLang` 未持久化,刷新页面后 `voice` 恢复为英文声音但 `textLang` 默认回中文,导致 VoiceSelector 显示中文声音列表却选中英文声音,无高亮按钮
- **修复**: 在 `useHomePersistence` 中加入 `textLang` 的 localStorage 读写
#### 数据流
```
用户翻译 "English"
→ ScriptEditor.onTranslate("English")
→ LANG_TO_LOCALE["English"] = "en-US"
→ setTextLang("en-US"), setVoice("en-US-GuyNeural")
→ VoiceSelector 显示 VOICES["en-US"] = [Guy, Jenny]
→ 生成时:
EdgeTTS: payload.voice = "en-US-GuyNeural"
声音克隆: payload.language = "English" (via getQwenLanguage)
```
#### 涉及文件
| 文件 | 变更 | 说明 |
|------|------|------|
| `frontend/src/features/home/model/useHomeController.ts` | 修改 | VOICES 多语言 Record、textLang 状态、LANG_TO_LOCALE / LOCALE_TO_QWEN_LANG 映射、翻译自动切换 voice |
| `frontend/src/features/home/model/useHomePersistence.ts` | 修改 | textLang 持久化读写 |
| `backend/app/modules/videos/schemas.py` | 修改 | GenerateRequest 加 `language` 字段 |
| `backend/app/modules/videos/workflow.py` | 修改 | 声音克隆调用处用 `req.language` 替代硬编码 |

View File

@@ -389,7 +389,7 @@ if not qr_element:
## 📋 文档规则优化 (16:42 - 17:10)
**问题**Doc_Rules需要优化,避免误删历史内容、规范工具使用、防止任务清单遗漏
**问题**DOC_RULES需要优化,避免误删历史内容、规范工具使用、防止任务清单遗漏
**优化内容(最终版)**
@@ -411,7 +411,7 @@ if not qr_element:
- 移除无关项目组件
**修改文件**
- `Docs/Doc_Rules.md` - 包含检查清单的最终完善版
- `Docs/DOC_RULES.md` - 包含检查清单的最终完善版
---

View File

@@ -8,8 +8,8 @@
| 规则 | 说明 |
|------|------|
| **默认更新** | 更新 `DayN.md` |
| **按需更新** | `task_complete.md` 仅在用户**明确要求**时更新 |
| **默认更新** | 更新 `DayN.md``TASK_COMPLETE.md` |
| **按需更新** | 其他文档仅在内容变化涉及时更新 |
| **智能修改** | 错误→替换,改进→追加(见下方详细规则) |
| **先读后写** | 更新前先查看文件当前内容 |
| **日内合并** | 同一天的多次小修改合并为最终版本 |
@@ -23,7 +23,7 @@
| 优先级 | 文件路径 | 检查重点 |
| :---: | :--- | :--- |
| 🔥 **High** | `Docs/DevLogs/DayN.md` | **(最新日志)** 详细记录变更、修复、代码片段 |
| 🔥 **High** | `Docs/task_complete.md` | **(任务总览)** 更新 `[x]`、进度条、时间线 |
| 🔥 **High** | `Docs/TASK_COMPLETE.md` | **(任务总览)** 更新 `[x]`、进度条、时间线 |
| ⚡ **Med** | `README.md` | **(项目主页)** 功能特性、技术栈、最新截图 |
| ⚡ **Med** | `Docs/DEPLOY_MANUAL.md` | **(部署手册)** 环境变量、依赖包、启动命令变更 |
| ⚡ **Med** | `Docs/BACKEND_DEV.md` | **(后端规范)** 接口契约、模块划分、环境变量 |
@@ -186,15 +186,15 @@ new_string: "**状态**:✅ 已修复"
```
ViGent2/Docs/
├── task_complete.md # 任务总览(仅按需更新)
├── Doc_Rules.md # 本文件
├── TASK_COMPLETE.md # 任务总览(仅按需更新)
├── DOC_RULES.md # 本文件
├── BACKEND_DEV.md # 后端开发规范
├── BACKEND_README.md # 后端功能文档
├── FRONTEND_DEV.md # 前端开发规范
├── FRONTEND_README.md # 前端功能文档
├── DEPLOY_MANUAL.md # 部署手册
├── SUPABASE_DEPLOY.md # Supabase 部署文档
├── LatentSync_DEPLOY.md # LatentSync 部署文档
├── LATENTSYNC_DEPLOY.md # LatentSync 部署文档
├── QWEN3_TTS_DEPLOY.md # 声音克隆部署文档
├── SUBTITLE_DEPLOY.md # 字幕系统部署文档
└── DevLogs/
@@ -206,8 +206,16 @@ ViGent2/Docs/
## 📅 DayN.md 更新规则(日常更新)
### 更新时机
> **边开发边记录,不要等到最后才写。**
- 每完成一个功能/修复后,**立即**追加到 DayN.md
- 避免积攒到对话末尾一次性补写,容易遗漏变更
- `TASK_COMPLETE.md` 同理,重要变更完成后及时同步
### 新建判断 (对话开始前)
1. **回顾进度**:查看 `task_complete.md` 了解当前状态
1. **回顾进度**:查看 `TASK_COMPLETE.md` 了解当前状态
2. **检查日期**:查看最新 `DayN.md`
- **今天 (与当前日期相同)** → 🚨 **绝对禁止创建新文件**,必须**追加**到现有 `DayN.md` 末尾!即使是完全不同的功能模块。
- **之前 (昨天或更早)** → 创建 `Day{N+1}.md`
@@ -263,17 +271,17 @@ ViGent2/Docs/
---
## 📝 task_complete.md 更新规则(仅按需)
## 📝 TASK_COMPLETE.md 更新规则
> ⚠️ **仅当用户明确要求更新 `task_complete.md` 时才更新**
> 与 DayN.md 同步更新,记录重要变更时更新任务总览。
### 更新原则
- **格式一致性**:直接参考 `task_complete.md` 现有格式追加内容。
- **格式一致性**:直接参考 `TASK_COMPLETE.md` 现有格式追加内容。
- **进度更新**:仅在阶段性里程碑时更新进度百分比。
### 🔍 完整性检查清单 (必做)
每次更新 `task_complete.md` 时,必须**逐一检查**以下所有板块:
每次更新 `TASK_COMPLETE.md` 时,必须**逐一检查**以下所有板块:
1. **文件头部 & 导航**
- [ ] `更新时间`:必须是当天日期

View File

@@ -28,6 +28,9 @@ frontend/src/
│ │ ├── HomeHeader.tsx
│ │ ├── MaterialSelector.tsx
│ │ ├── ScriptEditor.tsx
│ │ ├── ScriptExtractionModal.tsx
│ │ ├── script-extraction/
│ │ │ └── useScriptExtraction.ts
│ │ ├── TitleSubtitlePanel.tsx
│ │ ├── FloatingStylePreview.tsx
│ │ ├── VoiceSelector.tsx
@@ -55,11 +58,11 @@ frontend/src/
│ ├── types/
│ │ ├── user.ts # User 类型定义
│ │ └── publish.ts # 发布相关类型
│ └── contexts/ # 已迁移的 Context
├── contexts/ # 全局 ContextAuth、Task
│ └── contexts/ # 全局 ContextAuth、Task
│ ├── AuthContext.tsx
│ └── TaskContext.tsx
├── components/ # 遗留通用组件
── VideoPreviewModal.tsx
│ └── ScriptExtractionModal.tsx
── VideoPreviewModal.tsx
└── proxy.ts # Next.js middleware路由保护
```
@@ -278,8 +281,8 @@ import { formatDate } from '@/shared/lib/media';
- `shared/lib`通用工具函数media.ts / auth.ts / title.ts
- `shared/hooks`:跨功能通用 hooks
- `shared/types`跨功能实体类型User / PublishVideo 等)
- `contexts/`:全局 ContextAuthContext / TaskContext
- `components/`遗留通用组件VideoPreviewModal
- `shared/contexts`:全局 ContextAuthContext / TaskContext
- `components/`遗留通用组件VideoPreviewModal
## 类型定义规范

View File

@@ -10,18 +10,30 @@
> 这里记录了每一天的核心开发内容与 milestone。
### Day 21: 缺陷修复与持久化回归治理 (Current)
### Day 21: 缺陷修复 + 浮动预览 + 发布重构 + 架构优化 + 多素材生成 (Current)
- [x] **Remotion 崩溃容错**: 渲染进程 SIGABRT 退出时检查输出文件,避免误判失败导致标题/字幕丢失。
- [x] **首页作品选择持久化**: 修复 `fetchGeneratedVideos` 无条件覆盖恢复值的问题,新增 `preferVideoId` 参数控制选中逻辑。
- [x] **发布页作品选择持久化**: 根因为签名 URL 不稳定,全面改用 `video.id` 替代 `path` 进行选择/持久化/比较。
- [x] **预取缓存补全**: 首页预取发布页数据时加入 `id` 字段,确保缓存数据可用于持久化匹配。
- [x] **浮动样式预览窗口**: 标题字幕预览改为 `position: fixed` 浮动窗口,固定左上角,滚动时始终可见。
- [x] **移动端适配**: ScriptEditor 按钮换行、预览默认比例改为 9:16 竖屏。
- [x] **多平台发布重构**: 平台配置独立化DOUYIN_*/WEIXIN_*)、用户隔离 Cookie 管理、抖音刷脸验证二维码、微信发布流程优化。
- [x] **前端结构微调**: ScriptExtractionModal 迁移到 features/、contexts 迁移到 shared/contexts/、清理空目录。
- [x] **后端模块分层**: materials/tools/ref_audios 三个模块补全 router+schemas+service 分层。
- [x] **开发规范更新**: BACKEND_DEV.md 新增渐进原则、DOC_RULES.md 取消 TASK_COMPLETE.md 手动触发约束。
- [x] **文档全面更新**: BACKEND_DEV/README、FRONTEND_DEV、DEPLOY_MANUAL、README.md 同步更新。
- [x] **多素材视频生成(多机位效果)**: 支持多选素材 + 拖拽排序,按素材数量均分音频时长(对齐 Whisper 字边界)自动切换机位。逐段 LatentSync + FFmpeg 拼接。前端 @dnd-kit 拖拽排序 UI。
- [x] **字幕开关移除**: 默认启用逐字高亮字幕,移除开关及相关死代码。
- [x] **视频格式扩展**: 上传支持 mkv/webm/flv/wmv/m4v/ts/mts 等常见格式。
- [x] **Watchdog 优化**: 健康检查阈值提高到 5 次,新增重启冷却期 120 秒,避免误重启。
- [x] **多素材 Bug 修复**: 修复标点分句方案对无句末标点文案无效(改为均分方案)、音频时间偏移导致口型不对齐等缺陷。
### Day 20: 代码质量与安全优化
- [x] **功能性修复**: LatentSync 回退逻辑、任务状态接口认证、User 类型统一。
- [x] **性能优化**: N+1 查询修复、视频上传流式处理、httpx 异步替换、GLM 异步包装。
- [x] **安全修复**: 硬编码 Cookie 配置化、日志敏感信息脱敏、ffprobe 安全调用、CORS 配置化。
- [x] **配置优化**: 存储路径环境变量化、Remotion 预编译加速、LatentSync 绝对路径。
- [x] **文档更新**: 更新 Doc_Rules.md 清单,补齐后端与部署文档;更新 SUBTITLE_DEPLOY.md, FRONTEND_DEV.md, implementation_plan.md。
- [x] **文档更新**: 更新 DOC_RULES.md 清单,补齐后端与部署文档;更新 SUBTITLE_DEPLOY.md, FRONTEND_DEV.md, implementation_plan.md。
- [x] **缺陷修复**: 修复 Remotion 路径解析、发布页持久化竞态、首页选中回归、素材闭包陷阱。
### Day 19: 自动发布稳定性与发布体验优化 🚀

View File

@@ -24,6 +24,33 @@ class GenerateMetaResponse(BaseModel):
tags: list[str]
class TranslateRequest(BaseModel):
"""翻译请求"""
text: str
target_lang: str
@router.post("/translate")
async def translate_text(req: TranslateRequest):
"""
AI 翻译文案
将文案翻译为指定目标语言
"""
if not req.text or not req.text.strip():
raise HTTPException(status_code=400, detail="文案不能为空")
if not req.target_lang or not req.target_lang.strip():
raise HTTPException(status_code=400, detail="目标语言不能为空")
try:
logger.info(f"Translating text to {req.target_lang}: {req.text[:50]}...")
translated = await glm_service.translate_text(req.text.strip(), req.target_lang.strip())
return success_response({"translated_text": translated})
except Exception as e:
logger.error(f"Translate failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/generate-meta")
async def generate_meta(req: GenerateMetaRequest):
"""

View File

@@ -1,416 +1,62 @@
from fastapi import APIRouter, UploadFile, File, HTTPException, Request, BackgroundTasks, Depends
from app.core.config import settings
from app.core.deps import get_current_user
from app.core.response import success_response
from app.services.storage import storage_service
import re
import time
import traceback
import os
import aiofiles
from pathlib import Path
from loguru import logger
import asyncio
from pydantic import BaseModel
from typing import Optional
import httpx
from fastapi import APIRouter, HTTPException, Request, Depends
from loguru import logger
from app.core.deps import get_current_user
from app.core.response import success_response
from app.modules.materials.schemas import RenameMaterialRequest
from app.modules.materials import service
router = APIRouter()
class RenameMaterialRequest(BaseModel):
new_name: str
def sanitize_filename(filename: str) -> str:
safe_name = re.sub(r'[<>:"/\\|?*]', '_', filename)
if len(safe_name) > 100:
ext = Path(safe_name).suffix
safe_name = safe_name[:100 - len(ext)] + ext
return safe_name
async def process_and_upload(temp_file_path: str, original_filename: str, content_type: str, user_id: str):
"""Background task to strip multipart headers and upload to Supabase"""
try:
logger.info(f"Processing raw upload: {temp_file_path} for user {user_id}")
# 1. Analyze file to find actual video content (strip multipart boundaries)
# This is a simplified manual parser for a SINGLE file upload.
# Structure:
# --boundary
# Content-Disposition: form-data; name="file"; filename="..."
# Content-Type: video/mp4
# \r\n\r\n
# [DATA]
# \r\n--boundary--
# We need to read the first few KB to find the header end
start_offset = 0
end_offset = 0
boundary = b""
file_size = os.path.getsize(temp_file_path)
with open(temp_file_path, 'rb') as f:
# Read first 4KB to find header
head = f.read(4096)
# Find boundary
first_line_end = head.find(b'\r\n')
if first_line_end == -1:
raise Exception("Could not find boundary in multipart body")
boundary = head[:first_line_end] # e.g. --boundary123
logger.info(f"Detected boundary: {boundary}")
# Find end of headers (\r\n\r\n)
header_end = head.find(b'\r\n\r\n')
if header_end == -1:
raise Exception("Could not find end of multipart headers")
start_offset = header_end + 4
logger.info(f"Video data starts at offset: {start_offset}")
# Find end boundary (read from end of file)
# It should be \r\n + boundary + -- + \r\n
# We seek to end-200 bytes
f.seek(max(0, file_size - 200))
tail = f.read()
# The closing boundary is usually --boundary--
# We look for the last occurrence of the boundary
last_boundary_pos = tail.rfind(boundary)
if last_boundary_pos != -1:
# The data ends before \r\n + boundary
# The tail buffer relative position needs to be converted to absolute
end_pos_in_tail = last_boundary_pos
# We also need to check for the preceding \r\n
if end_pos_in_tail >= 2 and tail[end_pos_in_tail-2:end_pos_in_tail] == b'\r\n':
end_pos_in_tail -= 2
# Absolute end offset
end_offset = (file_size - 200) + last_boundary_pos
# Correction for CRLF before boundary
# Actually, simply: read until (file_size - len(tail) + last_boundary_pos) - 2
end_offset = (max(0, file_size - 200) + last_boundary_pos) - 2
else:
logger.warning("Could not find closing boundary, assuming EOF")
end_offset = file_size
logger.info(f"Video data ends at offset: {end_offset}. Total video size: {end_offset - start_offset}")
# 2. Extract and Upload to Supabase
# Since we have the file on disk, we can just pass the file object (seeked) to upload_file?
# Or if upload_file expects bytes/path, checking storage.py...
# It takes `file_data` (bytes) or file-like?
# supabase-py's `upload` method handles parsing if we pass a file object.
# But we need to pass ONLY the video slice.
# So we create a generator or a sliced file object?
# Simpler: Read the slice into memory if < 1GB? Or copy to new temp file?
# Copying to new temp file is safer for memory.
video_path = temp_file_path + "_video.mp4"
with open(temp_file_path, 'rb') as src, open(video_path, 'wb') as dst:
src.seek(start_offset)
# Copy in chunks
bytes_to_copy = end_offset - start_offset
copied = 0
while copied < bytes_to_copy:
chunk_size = min(1024*1024*10, bytes_to_copy - copied) # 10MB chunks
chunk = src.read(chunk_size)
if not chunk:
break
dst.write(chunk)
copied += len(chunk)
logger.info(f"Extracted video content to {video_path}")
# 3. Upload to Supabase with user isolation
timestamp = int(time.time())
safe_name = re.sub(r'[^a-zA-Z0-9._-]', '', original_filename)
# 使用 user_id 作为目录前缀实现隔离
storage_path = f"{user_id}/{timestamp}_{safe_name}"
# Use storage service (this calls Supabase which might do its own http request)
# We read the cleaned video file
with open(video_path, 'rb') as f:
file_content = f.read() # Still reading into memory for simple upload call, but server has 32GB RAM so ok for 500MB
await storage_service.upload_file(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path,
file_data=file_content,
content_type=content_type
)
logger.info(f"Upload to Supabase complete: {storage_path}")
# Cleanup
os.remove(temp_file_path)
os.remove(video_path)
return storage_path
except Exception as e:
logger.error(f"Background upload processing failed: {e}\n{traceback.format_exc()}")
raise
router = APIRouter()
@router.post("")
async def upload_material(
request: Request,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
):
user_id = current_user["id"]
logger.info(f"ENTERED upload_material (Streaming Mode) for user {user_id}. Headers: {request.headers}")
filename = "unknown_video.mp4" # Fallback
content_type = "video/mp4"
# Try to parse filename from header if possible (unreliable in raw stream)
# We will rely on post-processing or client hint
# Frontend sends standard multipart.
# Create temp file
timestamp = int(time.time())
temp_filename = f"upload_{timestamp}.raw"
temp_path = os.path.join("/tmp", temp_filename) # Use /tmp on Linux
# Ensure /tmp exists (it does) but verify paths
if os.name == 'nt': # Local dev
temp_path = f"d:/tmp/{temp_filename}"
os.makedirs("d:/tmp", exist_ok=True)
logger.info(f"Upload material request from user {user_id}")
try:
total_size = 0
last_log = 0
async with aiofiles.open(temp_path, 'wb') as f:
async for chunk in request.stream():
await f.write(chunk)
total_size += len(chunk)
# Log progress every 20MB
if total_size - last_log > 20 * 1024 * 1024:
logger.info(f"Receiving stream... Processed {total_size / (1024*1024):.2f} MB")
last_log = total_size
logger.info(f"Stream reception complete. Total size: {total_size} bytes. Saved to {temp_path}")
if total_size == 0:
raise HTTPException(400, "Received empty body")
# Attempt to extract filename from the saved file's first bytes?
# Or just accept it as "uploaded_video.mp4" for now to prove it works.
# We can try to regex the header in the file content we just wrote.
# Implemented in background task to return success immediately.
# Wait, if we return immediately, the user's UI might not show the file yet?
# The prompt says "Wait for upload".
# But to avoid User Waiting Timeout, maybe returning early is better?
# NO, user expects the file to be in the list.
# So we Must await the processing.
# But "Processing" (Strip + Upload to Supabase) takes time.
# Receiving took time.
# If we await Supabase upload, does it timeout?
# Supabase upload is outgoing. Usually faster/stable.
# Let's await the processing to ensure "List Materials" shows it.
# We need to extract the filename for the list.
# Quick extract filename from first 4kb
with open(temp_path, 'rb') as f:
head = f.read(4096).decode('utf-8', errors='ignore')
match = re.search(r'filename="([^"]+)"', head)
if match:
filename = match.group(1)
logger.info(f"Extracted filename from body: {filename}")
# Run processing sync (in await)
storage_path = await process_and_upload(temp_path, filename, content_type, user_id)
# Get signed URL (it exists now)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path
)
size_mb = total_size / (1024 * 1024) # Approximate (includes headers)
# 从 storage_path 提取显示名
display_name = storage_path.split('/')[-1] # 去掉 user_id 前缀
if '_' in display_name:
parts = display_name.split('_', 1)
if parts[0].isdigit():
display_name = parts[1]
return success_response({
"id": storage_path,
"name": display_name,
"path": signed_url,
"size_mb": size_mb,
"type": "video"
})
result = await service.upload_material(request, user_id)
return success_response(result)
except ValueError as e:
raise HTTPException(400, str(e))
except Exception as e:
error_msg = f"Streaming upload failed: {str(e)}"
detail_msg = f"Exception: {repr(e)}\nArgs: {e.args}\n{traceback.format_exc()}"
logger.error(error_msg + "\n" + detail_msg)
# Write to debug file
try:
with open("debug_upload.log", "a") as logf:
logf.write(f"\n--- Error at {time.ctime()} ---\n")
logf.write(detail_msg)
logf.write("\n-----------------------------\n")
except:
pass
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
raise HTTPException(500, f"Upload failed. Check server logs. Error: {str(e)}")
raise HTTPException(500, f"Upload failed. Error: {str(e)}")
@router.get("")
async def list_materials(current_user: dict = Depends(get_current_user)):
user_id = current_user["id"]
try:
# 只列出当前用户目录下的文件
files_obj = await storage_service.list_files(
bucket=storage_service.BUCKET_MATERIALS,
path=user_id
)
semaphore = asyncio.Semaphore(8)
async def build_item(f):
name = f.get('name')
if not name or name == '.emptyFolderPlaceholder':
return None
display_name = name
if '_' in name:
parts = name.split('_', 1)
if parts[0].isdigit():
display_name = parts[1]
full_path = f"{user_id}/{name}"
async with semaphore:
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=full_path
)
metadata = f.get('metadata', {})
size = metadata.get('size', 0)
created_at_str = f.get('created_at', '')
created_at = 0
if created_at_str:
from datetime import datetime
try:
dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
created_at = int(dt.timestamp())
except Exception:
pass
return {
"id": full_path,
"name": display_name,
"path": signed_url,
"size_mb": size / (1024 * 1024),
"type": "video",
"created_at": created_at
}
tasks = [build_item(f) for f in files_obj]
results = await asyncio.gather(*tasks, return_exceptions=True)
materials = []
for item in results:
if not item:
continue
if isinstance(item, Exception):
logger.warning(f"Material signed url build failed: {item}")
continue
materials.append(item)
materials.sort(key=lambda x: x['id'], reverse=True)
return success_response({"materials": materials})
except Exception as e:
logger.error(f"List materials failed: {e}")
return success_response({"materials": []}, message="获取素材失败")
materials = await service.list_materials(user_id)
return success_response({"materials": materials})
@router.delete("/{material_id:path}")
async def delete_material(material_id: str, current_user: dict = Depends(get_current_user)):
@router.delete("/{material_id:path}")
async def delete_material(material_id: str, current_user: dict = Depends(get_current_user)):
user_id = current_user["id"]
# 验证 material_id 属于当前用户
if not material_id.startswith(f"{user_id}/"):
raise HTTPException(403, "无权删除此素材")
try:
await storage_service.delete_file(
bucket=storage_service.BUCKET_MATERIALS,
path=material_id
)
return success_response(message="素材已删除")
except Exception as e:
raise HTTPException(500, f"删除失败: {str(e)}")
@router.put("/{material_id:path}")
async def rename_material(
material_id: str,
payload: RenameMaterialRequest,
current_user: dict = Depends(get_current_user)
):
user_id = current_user["id"]
if not material_id.startswith(f"{user_id}/"):
raise HTTPException(403, "无权重命名此素材")
new_name_raw = payload.new_name.strip() if payload.new_name else ""
if not new_name_raw:
raise HTTPException(400, "新名称不能为空")
old_name = material_id.split("/", 1)[1]
old_ext = Path(old_name).suffix
base_name = Path(new_name_raw).stem if Path(new_name_raw).suffix else new_name_raw
safe_base = sanitize_filename(base_name).strip()
if not safe_base:
raise HTTPException(400, "新名称无效")
new_filename = f"{safe_base}{old_ext}"
prefix = None
if "_" in old_name:
maybe_prefix, _ = old_name.split("_", 1)
if maybe_prefix.isdigit():
prefix = maybe_prefix
if prefix:
new_filename = f"{prefix}_{new_filename}"
new_path = f"{user_id}/{new_filename}"
try:
if new_path != material_id:
await storage_service.move_file(
bucket=storage_service.BUCKET_MATERIALS,
from_path=material_id,
to_path=new_path
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=new_path
)
display_name = new_filename
if "_" in new_filename:
parts = new_filename.split("_", 1)
if parts[0].isdigit():
display_name = parts[1]
return success_response({
"id": new_path,
"name": display_name,
"path": signed_url,
}, message="重命名成功")
except Exception as e:
raise HTTPException(500, f"重命名失败: {str(e)}")
await service.delete_material(material_id, user_id)
return success_response(message="素材已删除")
except PermissionError as e:
raise HTTPException(403, str(e))
except Exception as e:
raise HTTPException(500, f"删除失败: {str(e)}")
@router.put("/{material_id:path}")
async def rename_material(
material_id: str,
payload: RenameMaterialRequest,
current_user: dict = Depends(get_current_user)
):
user_id = current_user["id"]
try:
result = await service.rename_material(material_id, payload.new_name, user_id)
return success_response(result, message="重命名成功")
except PermissionError as e:
raise HTTPException(403, str(e))
except ValueError as e:
raise HTTPException(400, str(e))
except Exception as e:
raise HTTPException(500, f"重命名失败: {str(e)}")

View File

@@ -0,0 +1,14 @@
from pydantic import BaseModel
class RenameMaterialRequest(BaseModel):
new_name: str
class MaterialItem(BaseModel):
id: str
name: str
path: str
size_mb: float
type: str = "video"
created_at: int = 0

View File

@@ -0,0 +1,296 @@
import re
import os
import time
import asyncio
import traceback
import aiofiles
from pathlib import Path
from loguru import logger
from app.services.storage import storage_service
def sanitize_filename(filename: str) -> str:
safe_name = re.sub(r'[<>:"/\\|?*]', '_', filename)
if len(safe_name) > 100:
ext = Path(safe_name).suffix
safe_name = safe_name[:100 - len(ext)] + ext
return safe_name
def _extract_display_name(storage_name: str) -> str:
"""从存储文件名中提取显示名(去掉时间戳前缀)"""
if '_' in storage_name:
parts = storage_name.split('_', 1)
if parts[0].isdigit():
return parts[1]
return storage_name
async def _process_and_upload(temp_file_path: str, original_filename: str, content_type: str, user_id: str) -> str:
"""Strip multipart headers and upload to Supabase, return storage_path"""
try:
logger.info(f"Processing raw upload: {temp_file_path} for user {user_id}")
file_size = os.path.getsize(temp_file_path)
with open(temp_file_path, 'rb') as f:
head = f.read(4096)
first_line_end = head.find(b'\r\n')
if first_line_end == -1:
raise Exception("Could not find boundary in multipart body")
boundary = head[:first_line_end]
logger.info(f"Detected boundary: {boundary}")
header_end = head.find(b'\r\n\r\n')
if header_end == -1:
raise Exception("Could not find end of multipart headers")
start_offset = header_end + 4
logger.info(f"Video data starts at offset: {start_offset}")
f.seek(max(0, file_size - 200))
tail = f.read()
last_boundary_pos = tail.rfind(boundary)
if last_boundary_pos != -1:
end_offset = (max(0, file_size - 200) + last_boundary_pos) - 2
else:
logger.warning("Could not find closing boundary, assuming EOF")
end_offset = file_size
logger.info(f"Video data ends at offset: {end_offset}. Total video size: {end_offset - start_offset}")
video_path = temp_file_path + "_video.mp4"
with open(temp_file_path, 'rb') as src, open(video_path, 'wb') as dst:
src.seek(start_offset)
bytes_to_copy = end_offset - start_offset
copied = 0
while copied < bytes_to_copy:
chunk_size = min(1024 * 1024 * 10, bytes_to_copy - copied)
chunk = src.read(chunk_size)
if not chunk:
break
dst.write(chunk)
copied += len(chunk)
logger.info(f"Extracted video content to {video_path}")
timestamp = int(time.time())
safe_name = re.sub(r'[^a-zA-Z0-9._-]', '', original_filename)
storage_path = f"{user_id}/{timestamp}_{safe_name}"
with open(video_path, 'rb') as f:
file_content = f.read()
await storage_service.upload_file(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path,
file_data=file_content,
content_type=content_type
)
logger.info(f"Upload to Supabase complete: {storage_path}")
os.remove(temp_file_path)
os.remove(video_path)
return storage_path
except Exception as e:
logger.error(f"Background upload processing failed: {e}\n{traceback.format_exc()}")
raise
async def upload_material(request, user_id: str) -> dict:
"""接收流式上传并存储到 Supabase返回素材信息"""
filename = "unknown_video.mp4"
content_type = "video/mp4"
timestamp = int(time.time())
temp_filename = f"upload_{timestamp}.raw"
temp_path = os.path.join("/tmp", temp_filename)
if os.name == 'nt':
temp_path = f"d:/tmp/{temp_filename}"
os.makedirs("d:/tmp", exist_ok=True)
try:
total_size = 0
last_log = 0
async with aiofiles.open(temp_path, 'wb') as f:
async for chunk in request.stream():
await f.write(chunk)
total_size += len(chunk)
if total_size - last_log > 20 * 1024 * 1024:
logger.info(f"Receiving stream... Processed {total_size / (1024*1024):.2f} MB")
last_log = total_size
logger.info(f"Stream reception complete. Total size: {total_size} bytes. Saved to {temp_path}")
if total_size == 0:
raise ValueError("Received empty body")
with open(temp_path, 'rb') as f:
head = f.read(4096).decode('utf-8', errors='ignore')
match = re.search(r'filename="([^"]+)"', head)
if match:
filename = match.group(1)
logger.info(f"Extracted filename from body: {filename}")
storage_path = await _process_and_upload(temp_path, filename, content_type, user_id)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path
)
size_mb = total_size / (1024 * 1024)
display_name = _extract_display_name(storage_path.split('/')[-1])
return {
"id": storage_path,
"name": display_name,
"path": signed_url,
"size_mb": size_mb,
"type": "video"
}
except Exception as e:
error_msg = f"Streaming upload failed: {str(e)}"
detail_msg = f"Exception: {repr(e)}\nArgs: {e.args}\n{traceback.format_exc()}"
logger.error(error_msg + "\n" + detail_msg)
try:
with open("debug_upload.log", "a") as logf:
logf.write(f"\n--- Error at {time.ctime()} ---\n")
logf.write(detail_msg)
logf.write("\n-----------------------------\n")
except:
pass
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
raise
async def list_materials(user_id: str) -> list[dict]:
"""列出用户的所有素材"""
try:
files_obj = await storage_service.list_files(
bucket=storage_service.BUCKET_MATERIALS,
path=user_id
)
semaphore = asyncio.Semaphore(8)
async def build_item(f):
name = f.get('name')
if not name or name == '.emptyFolderPlaceholder':
return None
display_name = _extract_display_name(name)
full_path = f"{user_id}/{name}"
async with semaphore:
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=full_path
)
metadata = f.get('metadata', {})
size = metadata.get('size', 0)
created_at_str = f.get('created_at', '')
created_at = 0
if created_at_str:
from datetime import datetime
try:
dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
created_at = int(dt.timestamp())
except Exception:
pass
return {
"id": full_path,
"name": display_name,
"path": signed_url,
"size_mb": size / (1024 * 1024),
"type": "video",
"created_at": created_at
}
tasks = [build_item(f) for f in files_obj]
results = await asyncio.gather(*tasks, return_exceptions=True)
materials = []
for item in results:
if not item:
continue
if isinstance(item, Exception):
logger.warning(f"Material signed url build failed: {item}")
continue
materials.append(item)
materials.sort(key=lambda x: x['id'], reverse=True)
return materials
except Exception as e:
logger.error(f"List materials failed: {e}")
return []
async def delete_material(material_id: str, user_id: str) -> None:
"""删除素材"""
if not material_id.startswith(f"{user_id}/"):
raise PermissionError("无权删除此素材")
await storage_service.delete_file(
bucket=storage_service.BUCKET_MATERIALS,
path=material_id
)
async def rename_material(material_id: str, new_name_raw: str, user_id: str) -> dict:
"""重命名素材,返回更新后的素材信息"""
if not material_id.startswith(f"{user_id}/"):
raise PermissionError("无权重命名此素材")
new_name_raw = new_name_raw.strip() if new_name_raw else ""
if not new_name_raw:
raise ValueError("新名称不能为空")
old_name = material_id.split("/", 1)[1]
old_ext = Path(old_name).suffix
base_name = Path(new_name_raw).stem if Path(new_name_raw).suffix else new_name_raw
safe_base = sanitize_filename(base_name).strip()
if not safe_base:
raise ValueError("新名称无效")
new_filename = f"{safe_base}{old_ext}"
prefix = None
if "_" in old_name:
maybe_prefix, _ = old_name.split("_", 1)
if maybe_prefix.isdigit():
prefix = maybe_prefix
if prefix:
new_filename = f"{prefix}_{new_filename}"
new_path = f"{user_id}/{new_filename}"
if new_path != material_id:
await storage_service.move_file(
bucket=storage_service.BUCKET_MATERIALS,
from_path=material_id,
to_path=new_path
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=new_path
)
display_name = _extract_display_name(new_filename)
return {
"id": new_path,
"name": display_name,
"path": signed_url,
}

View File

@@ -1,83 +1,14 @@
"""
参考音频管理 API
支持上传/列表/删除参考音频,用于 Qwen3-TTS 声音克隆
"""
"""参考音频管理 API"""
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from pydantic import BaseModel
from typing import List, Optional
from pathlib import Path
from loguru import logger
import time
import json
import subprocess
import tempfile
import os
import re
from app.core.deps import get_current_user
from app.services.storage import storage_service
from app.core.response import success_response
from app.modules.ref_audios.schemas import RenameRequest
from app.modules.ref_audios import service
router = APIRouter()
# 支持的音频格式
ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'}
# 参考音频 bucket
BUCKET_REF_AUDIOS = "ref-audios"
class RefAudioResponse(BaseModel):
id: str
name: str
path: str # signed URL for playback
ref_text: str
duration_sec: float
created_at: int
class RefAudioListResponse(BaseModel):
items: List[RefAudioResponse]
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除特殊字符"""
safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename)
if len(safe_name) > 50:
ext = Path(safe_name).suffix
safe_name = safe_name[:50 - len(ext)] + ext
return safe_name
def get_audio_duration(file_path: str) -> float:
"""获取音频时长 (秒)"""
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', file_path],
capture_output=True, text=True, timeout=10
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"获取音频时长失败: {e}")
return 0.0
def convert_to_wav(input_path: str, output_path: str) -> bool:
"""将音频转换为 WAV 格式 (16kHz, mono)"""
try:
subprocess.run([
'ffmpeg', '-y', '-i', input_path,
'-ar', '16000', # 16kHz 采样率
'-ac', '1', # 单声道
'-acodec', 'pcm_s16le', # 16-bit PCM
output_path
], capture_output=True, timeout=60, check=True)
return True
except Exception as e:
logger.error(f"音频转换失败: {e}")
return False
@router.post("")
async def upload_ref_audio(
@@ -85,156 +16,12 @@ async def upload_ref_audio(
ref_text: str = Form(...),
user: dict = Depends(get_current_user)
):
"""
上传参考音频
- file: 音频文件 (支持 wav, mp3, m4a, webm 等)
- ref_text: 参考音频的转写文字 (必填)
"""
user_id = user["id"]
if not file.filename:
raise HTTPException(status_code=400, detail="文件名无效")
filename = file.filename
# 验证文件扩展名
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_AUDIO_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}"
)
# 验证 ref_text
if not ref_text or len(ref_text.strip()) < 2:
raise HTTPException(status_code=400, detail="参考文字不能为空")
"""上传参考音频"""
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
content = await file.read()
tmp_input.write(content)
tmp_input_path = tmp_input.name
# 转换为 WAV 格式
tmp_wav_path = tmp_input_path + ".wav"
if ext != '.wav':
if not convert_to_wav(tmp_input_path, tmp_wav_path):
raise HTTPException(status_code=500, detail="音频格式转换失败")
else:
# 即使是 wav 也要标准化格式
convert_to_wav(tmp_input_path, tmp_wav_path)
# 获取音频时长
duration = get_audio_duration(tmp_wav_path)
if duration < 1.0:
raise HTTPException(status_code=400, detail="音频时长过短,至少需要 1 秒")
if duration > 60.0:
raise HTTPException(status_code=400, detail="音频时长过长,最多 60 秒")
# 3. 处理重名逻辑 (Friendly Display Name)
original_name = filename
# 获取用户现有的所有参考音频列表 (为了检查文件名冲突)
# 注意: 这种列表方式在文件极多时性能一般,但考虑到单用户参考音频数量有限,目前可行
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
existing_names = set()
# 预加载所有现有的 display name
# 这里需要并发请求 metadata 可能会慢,优化: 仅检查 metadata 文件并解析
# 简易方案: 仅在 metadata 中读取 original_filename
# 但 list_files 返回的是 name我们需要 metadata
# 考虑到性能,这里使用一种妥协方案:
# 我们不做全量检查,而是简单的检查:如果用户上传 myvoice.wav
# 我们看看有没有 (timestamp)_myvoice.wav 这种其实并不能准确判断 display name 是否冲突
#
# 正确做法: 应该有个数据库表存 metadata。但目前是无数据库设计。
#
# 改用简单方案:
# 既然我们无法快速获取所有 display name
# 我们暂时只处理 "在新上传时original_filename 保持原样"
# 但用户希望 "如果在列表中看到重复的,自动加(1)"
#
# 鉴于无数据库架构的限制,要在上传时知道"已有的 display name" 成本太高(需遍历下载所有json)。
#
# 💡 替代方案:
# 我们不检查旧的。我们只保证**存储**唯一。
# 对于用户提到的 "新上传的文件名后加个数字" -> 这通常是指 "另存为" 的逻辑。
# 既然用户现在的痛点是 "显示了时间戳太丑",而我已经去掉了时间戳显示。
# 那么如果用户上传两个 "TEST.wav",列表里就会有两个 "TEST.wav" (但时间不同)。
# 这其实是可以接受的。
#
# 但如果用户强求 "自动重命名":
# 我们可以在这里做一个轻量级的 "同名检测"
# 检查有没有 *_{original_name} 的文件存在。
# 如果 storage 里已经有 123_abc.wav, 456_abc.wav
# 我们可以认为 abc.wav 已经存在。
dup_count = 0
search_suffix = f"_{original_name}" # 比如 _test.wav
for f in existing_files:
fname = f.get('name', '')
if fname.endswith(search_suffix):
dup_count += 1
final_display_name = original_name
if dup_count > 0:
name_stem = Path(original_name).stem
name_ext = Path(original_name).suffix
final_display_name = f"{name_stem}({dup_count}){name_ext}"
# 生成存储路径 (唯一ID)
timestamp = int(time.time())
safe_name = sanitize_filename(Path(filename).stem)
storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
# 上传 WAV 文件到 Supabase
with open(tmp_wav_path, 'rb') as f:
wav_data = f.read()
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=storage_path,
file_data=wav_data,
content_type="audio/wav"
)
# 上传元数据 JSON
metadata = {
"ref_text": ref_text.strip(),
"original_filename": final_display_name, # 这里的名字如果有重复会自动加(1)
"duration_sec": duration,
"created_at": timestamp
}
metadata_path = f"{user_id}/{timestamp}_{safe_name}.json"
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
# 获取签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
# 清理临时文件
os.unlink(tmp_input_path)
if os.path.exists(tmp_wav_path):
os.unlink(tmp_wav_path)
return success_response(RefAudioResponse(
id=storage_path,
name=filename,
path=signed_url,
ref_text=ref_text.strip(),
duration_sec=duration,
created_at=timestamp
).model_dump())
except HTTPException:
raise
result = await service.upload_ref_audio(file, ref_text, user["id"])
return success_response(result)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"上传参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
@@ -243,81 +30,9 @@ async def upload_ref_audio(
@router.get("")
async def list_ref_audios(user: dict = Depends(get_current_user)):
"""列出当前用户的所有参考音频"""
user_id = user["id"]
try:
# 列出用户目录下的文件
files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
# 过滤出 .wav 文件
wav_files = [f for f in files if f.get("name", "").endswith(".wav")]
if not wav_files:
return success_response(RefAudioListResponse(items=[]).model_dump())
# 并发获取所有 metadata 和签名 URL
async def fetch_audio_info(f):
"""获取单个音频的信息metadata + signed URL"""
name = f.get("name", "")
storage_path = f"{user_id}/{name}"
metadata_name = name.replace(".wav", ".json")
metadata_path = f"{user_id}/{metadata_name}"
ref_text = ""
duration_sec = 0.0
created_at = 0
original_filename = ""
try:
# 获取 metadata 内容
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
import httpx
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
ref_text = metadata.get("ref_text", "")
duration_sec = metadata.get("duration_sec", 0.0)
created_at = metadata.get("created_at", 0)
original_filename = metadata.get("original_filename", "")
except Exception as e:
logger.debug(f"读取 metadata 失败: {e}")
# 从文件名提取时间戳
try:
created_at = int(name.split("_")[0])
except:
pass
# 获取音频签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
# 优先显示原始文件名 (去掉时间戳前缀)
display_name = original_filename if original_filename else name
# 如果原始文件名丢失,尝试从现有文件名中通过正则去掉时间戳
if not display_name or display_name == name:
# 匹配 "1234567890_filename.wav"
match = re.match(r'^\d+_(.+)$', name)
if match:
display_name = match.group(1)
return RefAudioResponse(
id=storage_path,
name=display_name,
path=signed_url,
ref_text=ref_text,
duration_sec=duration_sec,
created_at=created_at
)
# 使用 asyncio.gather 并发获取所有音频信息
import asyncio
items = await asyncio.gather(*[fetch_audio_info(f) for f in wav_files])
# 按创建时间倒序排列
items = sorted(items, key=lambda x: x.created_at, reverse=True)
return success_response(RefAudioListResponse(items=items).model_dump())
result = await service.list_ref_audios(user["id"])
return success_response(result)
except Exception as e:
logger.error(f"列出参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"获取列表失败: {str(e)}")
@@ -326,96 +41,30 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
@router.delete("/{audio_id:path}")
async def delete_ref_audio(audio_id: str, user: dict = Depends(get_current_user)):
"""删除参考音频"""
user_id = user["id"]
# 安全检查:确保只能删除自己的文件
if not audio_id.startswith(f"{user_id}/"):
raise HTTPException(status_code=403, detail="无权删除此文件")
try:
# 删除 WAV 文件
await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id)
# 删除 metadata JSON
metadata_path = audio_id.replace(".wav", ".json")
try:
await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path)
except:
pass # metadata 可能不存在
await service.delete_ref_audio(audio_id, user["id"])
return success_response(message="删除成功")
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
logger.error(f"删除参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
class RenameRequest(BaseModel):
new_name: str
@router.put("/{audio_id:path}")
async def rename_ref_audio(
audio_id: str,
request: RenameRequest,
user: dict = Depends(get_current_user)
):
"""重命名参考音频 (修改 metadata 中的 display name)"""
user_id = user["id"]
# 安全检查
if not audio_id.startswith(f"{user_id}/"):
raise HTTPException(status_code=403, detail="无权修改此文件")
new_name = request.new_name.strip()
if not new_name:
raise HTTPException(status_code=400, detail="新名称不能为空")
# 确保新名称有后缀 (保留原后缀或添加 .wav)
if not Path(new_name).suffix:
new_name += ".wav"
"""重命名参考音频"""
try:
# 1. 下载现有的 metadata
metadata_path = audio_id.replace(".wav", ".json")
try:
# 获取已有的 JSON
import httpx
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
if not metadata_url:
# 如果 json 不存在,则需要新建一个基础的
raise Exception("Metadata not found")
async with httpx.AsyncClient() as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
# 兜底:如果读取失败,构建最小元数据
metadata = {
"ref_text": "", # 可能丢失
"duration_sec": 0.0,
"created_at": int(time.time()),
"original_filename": new_name
}
# 2. 更新 original_filename
metadata["original_filename"] = new_name
# 3. 覆盖上传 metadata
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
return success_response({"name": new_name}, message="重命名成功")
result = await service.rename_ref_audio(audio_id, request.new_name, user["id"])
return success_response(result, message="重命名成功")
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"重命名失败: {e}")
raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")

View File

@@ -0,0 +1,19 @@
from pydantic import BaseModel
from typing import List
class RefAudioResponse(BaseModel):
id: str
name: str
path: str
ref_text: str
duration_sec: float
created_at: int
class RefAudioListResponse(BaseModel):
items: List[RefAudioResponse]
class RenameRequest(BaseModel):
new_name: str

View File

@@ -0,0 +1,269 @@
import re
import os
import time
import json
import asyncio
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
import httpx
from loguru import logger
from app.services.storage import storage_service
from app.modules.ref_audios.schemas import RefAudioResponse, RefAudioListResponse
ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'}
BUCKET_REF_AUDIOS = "ref-audios"
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除特殊字符"""
safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename)
if len(safe_name) > 50:
ext = Path(safe_name).suffix
safe_name = safe_name[:50 - len(ext)] + ext
return safe_name
def _get_audio_duration(file_path: str) -> float:
"""获取音频时长 (秒)"""
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', file_path],
capture_output=True, text=True, timeout=10
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"获取音频时长失败: {e}")
return 0.0
def _convert_to_wav(input_path: str, output_path: str) -> bool:
"""将音频转换为 WAV 格式 (16kHz, mono)"""
try:
subprocess.run([
'ffmpeg', '-y', '-i', input_path,
'-ar', '16000',
'-ac', '1',
'-acodec', 'pcm_s16le',
output_path
], capture_output=True, timeout=60, check=True)
return True
except Exception as e:
logger.error(f"音频转换失败: {e}")
return False
async def upload_ref_audio(file, ref_text: str, user_id: str) -> dict:
"""上传参考音频:转码、获取时长、存储到 Supabase"""
if not file.filename:
raise ValueError("文件名无效")
filename = file.filename
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_AUDIO_EXTENSIONS:
raise ValueError(f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}")
if not ref_text or len(ref_text.strip()) < 2:
raise ValueError("参考文字不能为空")
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
content = await file.read()
tmp_input.write(content)
tmp_input_path = tmp_input.name
try:
# 转换为 WAV 格式
tmp_wav_path = tmp_input_path + ".wav"
if not _convert_to_wav(tmp_input_path, tmp_wav_path):
raise RuntimeError("音频格式转换失败")
# 获取音频时长
duration = _get_audio_duration(tmp_wav_path)
if duration < 1.0:
raise ValueError("音频时长过短,至少需要 1 秒")
if duration > 60.0:
raise ValueError("音频时长过长,最多 60 秒")
# 检查重名
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
dup_count = 0
search_suffix = f"_{filename}"
for f in existing_files:
fname = f.get('name', '')
if fname.endswith(search_suffix):
dup_count += 1
final_display_name = filename
if dup_count > 0:
name_stem = Path(filename).stem
name_ext = Path(filename).suffix
final_display_name = f"{name_stem}({dup_count}){name_ext}"
# 生成存储路径
timestamp = int(time.time())
safe_name = sanitize_filename(Path(filename).stem)
storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
# 上传 WAV 文件
with open(tmp_wav_path, 'rb') as f:
wav_data = f.read()
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=storage_path,
file_data=wav_data,
content_type="audio/wav"
)
# 上传元数据 JSON
metadata = {
"ref_text": ref_text.strip(),
"original_filename": final_display_name,
"duration_sec": duration,
"created_at": timestamp
}
metadata_path = f"{user_id}/{timestamp}_{safe_name}.json"
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
# 获取签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
return RefAudioResponse(
id=storage_path,
name=filename,
path=signed_url,
ref_text=ref_text.strip(),
duration_sec=duration,
created_at=timestamp
).model_dump()
finally:
os.unlink(tmp_input_path)
if os.path.exists(tmp_input_path + ".wav"):
os.unlink(tmp_input_path + ".wav")
async def list_ref_audios(user_id: str) -> dict:
"""列出用户的所有参考音频"""
files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
wav_files = [f for f in files if f.get("name", "").endswith(".wav")]
if not wav_files:
return RefAudioListResponse(items=[]).model_dump()
async def fetch_audio_info(f):
name = f.get("name", "")
storage_path = f"{user_id}/{name}"
metadata_name = name.replace(".wav", ".json")
metadata_path = f"{user_id}/{metadata_name}"
ref_text = ""
duration_sec = 0.0
created_at = 0
original_filename = ""
try:
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
ref_text = metadata.get("ref_text", "")
duration_sec = metadata.get("duration_sec", 0.0)
created_at = metadata.get("created_at", 0)
original_filename = metadata.get("original_filename", "")
except Exception as e:
logger.debug(f"读取 metadata 失败: {e}")
try:
created_at = int(name.split("_")[0])
except:
pass
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
display_name = original_filename if original_filename else name
if not display_name or display_name == name:
match = re.match(r'^\d+_(.+)$', name)
if match:
display_name = match.group(1)
return RefAudioResponse(
id=storage_path,
name=display_name,
path=signed_url,
ref_text=ref_text,
duration_sec=duration_sec,
created_at=created_at
)
items = await asyncio.gather(*[fetch_audio_info(f) for f in wav_files])
items = sorted(items, key=lambda x: x.created_at, reverse=True)
return RefAudioListResponse(items=items).model_dump()
async def delete_ref_audio(audio_id: str, user_id: str) -> None:
"""删除参考音频及其元数据"""
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权删除此文件")
await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id)
metadata_path = audio_id.replace(".wav", ".json")
try:
await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path)
except:
pass
async def rename_ref_audio(audio_id: str, new_name: str, user_id: str) -> dict:
"""重命名参考音频(修改 metadata 中的 display name"""
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权修改此文件")
new_name = new_name.strip()
if not new_name:
raise ValueError("新名称不能为空")
if not Path(new_name).suffix:
new_name += ".wav"
# 下载现有 metadata
metadata_path = audio_id.replace(".wav", ".json")
try:
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
async with httpx.AsyncClient() as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
metadata = {
"ref_text": "",
"duration_sec": 0.0,
"created_at": int(time.time()),
"original_filename": new_name
}
# 更新并覆盖上传
metadata["original_filename"] = new_name
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
return {"name": new_name}

View File

@@ -1,417 +1,32 @@
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from typing import Optional, Any, cast
import asyncio
import shutil
import os
import time
from pathlib import Path
from loguru import logger
from typing import Optional
import traceback
import re
import json
import requests
from urllib.parse import unquote
from loguru import logger
from app.services.whisper_service import whisper_service
from app.services.glm_service import glm_service
from app.core.response import success_response
from app.modules.tools import service
router = APIRouter()
@router.post("/extract-script")
async def extract_script_tool(
file: Optional[UploadFile] = File(None),
url: Optional[str] = Form(None),
rewrite: bool = Form(True)
):
"""
独立文案提取工具
支持上传视频/音频 OR 输入视频链接 -> 提取文字 -> (可选) AI洗稿
"""
if not file and not url:
raise HTTPException(400, "必须提供文件或视频链接")
temp_path = None
"""独立文案提取工具"""
try:
timestamp = int(time.time())
temp_dir = Path("/tmp")
if os.name == 'nt':
temp_dir = Path("d:/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 获取/保存文件
loop = asyncio.get_event_loop()
if file:
filename = file.filename
if not filename:
raise HTTPException(400, "文件名无效")
safe_filename = Path(filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
# 文件 I/O 放入线程池
await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
logger.info(f"Tool processing upload file: {temp_path}")
else:
if not url:
raise HTTPException(400, "必须提供视频链接")
url_value: str = url
# URL 下载逻辑
# 自动提取文案中的链接 (支持 Douyin/Bilibili 等分享文案)
url_match = re.search(r'https?://[^\s]+', url_value)
if url_match:
extracted_url = url_match.group(0)
logger.info(f"Extracted URL from text: {extracted_url}")
url_value = extracted_url
logger.info(f"Tool downloading URL: {url_value}")
# 封装 yt-dlp 下载函数 (Blocking)
def _download_yt_dlp():
import yt_dlp
logger.info("Attempting download with yt-dlp...")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(temp_dir / f"tool_download_{timestamp}_%(id)s.%(ext)s"),
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
}
with yt_dlp.YoutubeDL() as ydl_raw:
ydl: Any = ydl_raw
ydl.params.update(ydl_opts)
info = ydl.extract_info(url_value, download=True)
if 'requested_downloads' in info:
downloaded_file = info['requested_downloads'][0]['filepath']
else:
ext = info.get('ext', 'mp4')
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
return Path(downloaded_file)
# 先尝试 yt-dlp (Run in Executor)
try:
temp_path = await loop.run_in_executor(None, _download_yt_dlp)
logger.info(f"yt-dlp downloaded to: {temp_path}")
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual Douyin fallback...")
# 失败则尝试手动解析 (Douyin Fallback)
if "douyin" in url_value:
manual_path = await download_douyin_manual(url_value, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Douyin fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
elif "bilibili" in url_value:
manual_path = await download_bilibili_manual(url_value, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Bilibili fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
else:
raise HTTPException(400, f"视频下载失败: {str(e)}")
if not temp_path or not temp_path.exists():
raise HTTPException(400, "文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k)
import subprocess
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
def _convert_audio():
try:
convert_cmd = [
'ffmpeg',
'-i', str(temp_path),
'-vn', # 忽略视频
'-acodec', 'pcm_s16le',
'-ar', '16000', # Whisper 推荐采样率
'-ac', '1', # 单声道
'-y', # 覆盖
str(audio_path)
]
# 捕获 stderr
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
# 检查是否为 HTML
head = b""
try:
with open(temp_path, 'rb') as f:
head = f.read(100)
except: pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise ValueError("HTML_DETECTED")
raise ValueError("CONVERT_FAILED")
# 执行转换 (Run in Executor)
try:
await loop.run_in_executor(None, _convert_audio)
logger.info(f"Converted to WAV: {audio_path}")
target_path = audio_path
except ValueError as ve:
if str(ve) == "HTML_DETECTED":
raise HTTPException(400, "下载的文件是网页而非视频,请重试或手动上传。")
else:
raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(target_path))
# 3. AI 洗稿 (GLM)
rewritten = None
if rewrite:
if script and len(script.strip()) > 0:
logger.info("Rewriting script...")
rewritten = await glm_service.rewrite_script(script)
else:
logger.warning("No script extracted, skipping rewrite")
return success_response({
"original_script": script,
"rewritten_script": rewritten
})
except HTTPException as he:
raise he
result = await service.extract_script(file=file, url=url, rewrite=rewrite)
return success_response(result)
except ValueError as e:
raise HTTPException(400, str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Tool extract failed: {e}")
logger.error(traceback.format_exc())
# Friendly error message
msg = str(e)
if "Fresh cookies" in msg:
msg = "下载失败:目标平台开启了反爬验证,请过段时间重试或直接上传视频文件。"
raise HTTPException(500, f"提取失败: {msg}")
finally:
# 清理临时文件
if temp_path and temp_path.exists():
try:
os.remove(temp_path)
logger.info(f"Cleaned up temp file: {temp_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {temp_path}: {e}")
async def download_douyin_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载抖音视频 (Fallback logic - Ported from SuperIPAgent/douyinDownloader)
使用特定的 User Profile URL 和硬编码 Cookie 绕过反爬
"""
import httpx
logger.info(f"[SuperIPAgent] Starting download for: {url}")
try:
# 1. 提取 Modal ID (支持短链跳转)
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
# 如果是短链或重定向 - 使用异步 httpx
async with httpx.AsyncClient(follow_redirects=True, timeout=10.0) as client:
resp = await client.get(url, headers=headers)
final_url = str(resp.url)
logger.info(f"[SuperIPAgent] Final URL: {final_url}")
modal_id = None
match = re.search(r'/video/(\d+)', final_url)
if match:
modal_id = match.group(1)
if not modal_id:
logger.error("[SuperIPAgent] Could not extract modal_id")
return None
logger.info(f"[SuperIPAgent] Extracted modal_id: {modal_id}")
# 2. 构造特定请求 URL (Copy from SuperIPAgent)
# 使用特定用户的 Profile 页 + modal_id 参数,配合特定 Cookie
target_url = f"https://www.douyin.com/user/MS4wLjABAAAAN_s_hups7LD0N4qnrM3o2gI0vuG3pozNaEolz2_py3cHTTrpVr1Z4dukFD9SOlwY?from_tab_name=main&modal_id={modal_id}"
# 3. 使用配置的 Cookie (从环境变量 DOUYIN_COOKIE 读取)
from app.core.config import settings
if not settings.DOUYIN_COOKIE:
logger.warning("[SuperIPAgent] DOUYIN_COOKIE 未配置,视频下载可能失败")
headers_with_cookie = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"cookie": settings.DOUYIN_COOKIE,
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}
logger.info(f"[SuperIPAgent] Requesting page with Cookie...")
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(target_url, headers=headers_with_cookie)
# 4. 解析 RENDER_DATA
content_match = re.findall(r'<script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
# 尝试解码后再查找?或者结构变了
# 再尝试找 SSR_HYDRATED_DATA
if "SSR_HYDRATED_DATA" in response.text:
content_match = re.findall(r'<script id="SSR_HYDRATED_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
logger.error(f"[SuperIPAgent] Could not find RENDER_DATA in page (len={len(response.text)})")
return None
content = unquote(content_match[0])
try:
data = json.loads(content)
except:
logger.error("[SuperIPAgent] JSON decode failed")
return None
# 5. 提取视频流
video_url = None
try:
# 路径通常是: app -> videoDetail -> video -> bitRateList -> playAddr -> src
if "app" in data and "videoDetail" in data["app"]:
info = data["app"]["videoDetail"]["video"]
if "bitRateList" in info and info["bitRateList"]:
video_url = info["bitRateList"][0]["playAddr"][0]["src"]
elif "playAddr" in info and info["playAddr"]:
video_url = info["playAddr"][0]["src"]
except Exception as e:
logger.error(f"[SuperIPAgent] Path extraction failed: {e}")
if not video_url:
logger.error("[SuperIPAgent] No video_url found")
return None
if video_url.startswith("//"):
video_url = "https:" + video_url
logger.info(f"[SuperIPAgent] Found video URL: {video_url[:50]}...")
# 6. 下载 (带 Header) - 使用异步 httpx
temp_path = temp_dir / f"douyin_manual_{timestamp}.mp4"
download_headers = {
'Referer': 'https://www.douyin.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream("GET", video_url, headers=download_headers) as dl_resp:
if dl_resp.status_code == 200:
with open(temp_path, 'wb') as f:
async for chunk in dl_resp.aiter_bytes(chunk_size=8192):
f.write(chunk)
logger.info(f"[SuperIPAgent] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[SuperIPAgent] Download failed: {dl_resp.status_code}")
return None
except Exception as e:
logger.error(f"[SuperIPAgent] Logic failed: {e}")
return None
async def download_bilibili_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载 Bilibili 视频 (Fallback logic - Playwright Version)
B站通常音视频分离这里只提取音频即可因为只需要文案
"""
from playwright.async_api import async_playwright
logger.info(f"[Playwright] Starting Bilibili download for: {url}")
playwright = None
browser = None
try:
playwright = await async_playwright().start()
# Launch browser (ensure chromium is installed: playwright install chromium)
browser = await playwright.chromium.launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
# Mobile User Agent often gives single stream?
# But Bilibili mobile web is tricky. Desktop is fine.
context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
# Intercept audio responses?
# Bilibili streams are usually .m4s
# But finding the initial state is easier.
logger.info("[Playwright] Navigating to Bilibili...")
await page.goto(url, timeout=45000)
# Wait for video element (triggers loading)
try:
await page.wait_for_selector('video', timeout=15000)
except:
logger.warning("[Playwright] Video selector timeout")
# 1. Try extracting from __playinfo__
# window.__playinfo__ contains dash streams
playinfo = await page.evaluate("window.__playinfo__")
audio_url = None
if playinfo and "data" in playinfo and "dash" in playinfo["data"]:
dash = playinfo["data"]["dash"]
if "audio" in dash and dash["audio"]:
audio_url = dash["audio"][0]["baseUrl"]
logger.info(f"[Playwright] Found audio stream in __playinfo__: {audio_url[:50]}...")
# 2. If playinfo fails, try extracting video src (sometimes it's a blob, which we can't fetch easily without interception)
# But interception is complex. Let's try requests with Referer if we have URL.
if not audio_url:
logger.warning("[Playwright] Could not find audio in __playinfo__")
return None
# Download the audio stream
temp_path = temp_dir / f"bilibili_audio_{timestamp}.m4s" # usually m4s
try:
api_request = context.request
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
logger.info(f"[Playwright] Downloading audio stream...")
response = await api_request.get(audio_url, headers=headers)
if response.status == 200:
body = await response.body()
with open(temp_path, 'wb') as f:
f.write(body)
logger.info(f"[Playwright] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[Playwright] API Request failed: {response.status}")
return None
except Exception as e:
logger.error(f"[Playwright] Download logic error: {e}")
return None
except Exception as e:
logger.error(f"[Playwright] Bilibili download failed: {e}")
return None
finally:
if browser:
await browser.close()
if playwright:
await playwright.stop()

View File

@@ -0,0 +1,7 @@
from pydantic import BaseModel
from typing import Optional
class ExtractScriptResponse(BaseModel):
original_script: Optional[str] = None
rewritten_script: Optional[str] = None

View File

@@ -0,0 +1,355 @@
import asyncio
import os
import re
import json
import time
import shutil
import subprocess
import traceback
from pathlib import Path
from typing import Optional, Any
from urllib.parse import unquote
import httpx
from loguru import logger
from app.services.whisper_service import whisper_service
from app.services.glm_service import glm_service
async def extract_script(file=None, url: Optional[str] = None, rewrite: bool = True) -> dict:
"""
文案提取:上传文件或视频链接 -> Whisper 转写 -> (可选) GLM 洗稿
"""
if not file and not url:
raise ValueError("必须提供文件或视频链接")
temp_path = None
try:
timestamp = int(time.time())
temp_dir = Path("/tmp")
if os.name == 'nt':
temp_dir = Path("d:/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
loop = asyncio.get_event_loop()
# 1. 获取/保存文件
if file:
filename = file.filename
if not filename:
raise ValueError("文件名无效")
safe_filename = Path(filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
logger.info(f"Tool processing upload file: {temp_path}")
else:
temp_path = await _download_video(url, temp_dir, timestamp)
if not temp_path or not temp_path.exists():
raise ValueError("文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k)
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
try:
await loop.run_in_executor(None, lambda: _convert_to_wav(temp_path, audio_path))
logger.info(f"Converted to WAV: {audio_path}")
except ValueError as ve:
if str(ve) == "HTML_DETECTED":
raise ValueError("下载的文件是网页而非视频,请重试或手动上传。")
else:
raise ValueError("下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(audio_path))
# 3. AI 洗稿 (GLM)
rewritten = None
if rewrite and script and len(script.strip()) > 0:
logger.info("Rewriting script...")
rewritten = await glm_service.rewrite_script(script)
return {
"original_script": script,
"rewritten_script": rewritten
}
finally:
if temp_path and temp_path.exists():
try:
os.remove(temp_path)
logger.info(f"Cleaned up temp file: {temp_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {temp_path}: {e}")
def _convert_to_wav(input_path: Path, output_path: Path) -> None:
"""FFmpeg 转换为 16k WAV"""
try:
convert_cmd = [
'ffmpeg',
'-i', str(input_path),
'-vn',
'-acodec', 'pcm_s16le',
'-ar', '16000',
'-ac', '1',
'-y',
str(output_path)
]
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
head = b""
try:
with open(input_path, 'rb') as f:
head = f.read(100)
except:
pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise ValueError("HTML_DETECTED")
raise ValueError("CONVERT_FAILED")
async def _download_video(url: str, temp_dir: Path, timestamp: int) -> Path:
"""下载视频yt-dlp 优先,失败回退手动解析)"""
url_value = url
url_match = re.search(r'https?://[^\s]+', url_value)
if url_match:
extracted_url = url_match.group(0)
logger.info(f"Extracted URL from text: {extracted_url}")
url_value = extracted_url
logger.info(f"Tool downloading URL: {url_value}")
loop = asyncio.get_event_loop()
# 先尝试 yt-dlp
try:
temp_path = await loop.run_in_executor(None, lambda: _download_yt_dlp(url_value, temp_dir, timestamp))
logger.info(f"yt-dlp downloaded to: {temp_path}")
return temp_path
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual fallback...")
if "douyin" in url_value:
manual_path = await _download_douyin_manual(url_value, temp_dir, timestamp)
if manual_path:
return manual_path
raise ValueError(f"视频下载失败。yt-dlp 报错: {str(e)}")
elif "bilibili" in url_value:
manual_path = await _download_bilibili_manual(url_value, temp_dir, timestamp)
if manual_path:
return manual_path
raise ValueError(f"视频下载失败。yt-dlp 报错: {str(e)}")
else:
raise ValueError(f"视频下载失败: {str(e)}")
def _download_yt_dlp(url_value: str, temp_dir: Path, timestamp: int) -> Path:
"""yt-dlp 下载(阻塞调用,应在线程池中运行)"""
import yt_dlp
logger.info("Attempting download with yt-dlp...")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(temp_dir / f"tool_download_{timestamp}_%(id)s.%(ext)s"),
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
}
with yt_dlp.YoutubeDL() as ydl_raw:
ydl: Any = ydl_raw
ydl.params.update(ydl_opts)
info = ydl.extract_info(url_value, download=True)
if 'requested_downloads' in info:
downloaded_file = info['requested_downloads'][0]['filepath']
else:
ext = info.get('ext', 'mp4')
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
return Path(downloaded_file)
async def _download_douyin_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""手动下载抖音视频 (Fallback)"""
logger.info(f"[SuperIPAgent] Starting download for: {url}")
try:
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
async with httpx.AsyncClient(follow_redirects=True, timeout=10.0) as client:
resp = await client.get(url, headers=headers)
final_url = str(resp.url)
logger.info(f"[SuperIPAgent] Final URL: {final_url}")
modal_id = None
match = re.search(r'/video/(\d+)', final_url)
if match:
modal_id = match.group(1)
if not modal_id:
logger.error("[SuperIPAgent] Could not extract modal_id")
return None
logger.info(f"[SuperIPAgent] Extracted modal_id: {modal_id}")
target_url = f"https://www.douyin.com/user/MS4wLjABAAAAN_s_hups7LD0N4qnrM3o2gI0vuG3pozNaEolz2_py3cHTTrpVr1Z4dukFD9SOlwY?from_tab_name=main&modal_id={modal_id}"
from app.core.config import settings
if not settings.DOUYIN_COOKIE:
logger.warning("[SuperIPAgent] DOUYIN_COOKIE 未配置,视频下载可能失败")
headers_with_cookie = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"cookie": settings.DOUYIN_COOKIE,
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}
logger.info(f"[SuperIPAgent] Requesting page with Cookie...")
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(target_url, headers=headers_with_cookie)
content_match = re.findall(r'<script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
if "SSR_HYDRATED_DATA" in response.text:
content_match = re.findall(r'<script id="SSR_HYDRATED_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
logger.error(f"[SuperIPAgent] Could not find RENDER_DATA in page (len={len(response.text)})")
return None
content = unquote(content_match[0])
try:
data = json.loads(content)
except:
logger.error("[SuperIPAgent] JSON decode failed")
return None
video_url = None
try:
if "app" in data and "videoDetail" in data["app"]:
info = data["app"]["videoDetail"]["video"]
if "bitRateList" in info and info["bitRateList"]:
video_url = info["bitRateList"][0]["playAddr"][0]["src"]
elif "playAddr" in info and info["playAddr"]:
video_url = info["playAddr"][0]["src"]
except Exception as e:
logger.error(f"[SuperIPAgent] Path extraction failed: {e}")
if not video_url:
logger.error("[SuperIPAgent] No video_url found")
return None
if video_url.startswith("//"):
video_url = "https:" + video_url
logger.info(f"[SuperIPAgent] Found video URL: {video_url[:50]}...")
temp_path = temp_dir / f"douyin_manual_{timestamp}.mp4"
download_headers = {
'Referer': 'https://www.douyin.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream("GET", video_url, headers=download_headers) as dl_resp:
if dl_resp.status_code == 200:
with open(temp_path, 'wb') as f:
async for chunk in dl_resp.aiter_bytes(chunk_size=8192):
f.write(chunk)
logger.info(f"[SuperIPAgent] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[SuperIPAgent] Download failed: {dl_resp.status_code}")
return None
except Exception as e:
logger.error(f"[SuperIPAgent] Logic failed: {e}")
return None
async def _download_bilibili_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""手动下载 Bilibili 视频 (Playwright Fallback)"""
from playwright.async_api import async_playwright
logger.info(f"[Playwright] Starting Bilibili download for: {url}")
playwright = None
browser = None
try:
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
logger.info("[Playwright] Navigating to Bilibili...")
await page.goto(url, timeout=45000)
try:
await page.wait_for_selector('video', timeout=15000)
except:
logger.warning("[Playwright] Video selector timeout")
playinfo = await page.evaluate("window.__playinfo__")
audio_url = None
if playinfo and "data" in playinfo and "dash" in playinfo["data"]:
dash = playinfo["data"]["dash"]
if "audio" in dash and dash["audio"]:
audio_url = dash["audio"][0]["baseUrl"]
logger.info(f"[Playwright] Found audio stream in __playinfo__: {audio_url[:50]}...")
if not audio_url:
logger.warning("[Playwright] Could not find audio in __playinfo__")
return None
temp_path = temp_dir / f"bilibili_audio_{timestamp}.m4s"
try:
api_request = context.request
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
logger.info(f"[Playwright] Downloading audio stream...")
response = await api_request.get(audio_url, headers=headers)
if response.status == 200:
body = await response.body()
with open(temp_path, 'wb') as f:
f.write(body)
logger.info(f"[Playwright] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[Playwright] API Request failed: {response.status}")
return None
except Exception as e:
logger.error(f"[Playwright] Download logic error: {e}")
return None
except Exception as e:
logger.error(f"[Playwright] Bilibili download failed: {e}")
return None
finally:
if browser:
await browser.close()
if playwright:
await playwright.stop()

View File

@@ -1,14 +1,16 @@
from pydantic import BaseModel
from typing import Optional
from typing import Optional, List
class GenerateRequest(BaseModel):
text: str
voice: str = "zh-CN-YunxiNeural"
material_path: str
material_paths: Optional[List[str]] = None
tts_mode: str = "edgetts"
ref_audio_id: Optional[str] = None
ref_text: Optional[str] = None
language: str = "zh-CN"
title: Optional[str] = None
enable_subtitles: bool = True
subtitle_style_id: Optional[str] = None

View File

@@ -1,4 +1,4 @@
from typing import Optional, Any
from typing import Optional, Any, List
from pathlib import Path
import time
import traceback
@@ -24,6 +24,17 @@ from .schemas import GenerateRequest
from .task_store import task_store
def _locale_to_whisper_lang(locale: str) -> str:
"""'en-US''en', 'zh-CN''zh'"""
return locale.split("-")[0] if "-" in locale else locale
def _locale_to_qwen_lang(locale: str) -> str:
"""'zh-CN''Chinese', 'en-US''English', 其他 → 'Auto'"""
mapping = {"zh": "Chinese", "en": "English"}
return mapping.get(locale.split("-")[0], "Auto")
_lipsync_service: Optional[LipSyncService] = None
_lipsync_ready: Optional[bool] = None
_lipsync_last_check: float = 0
@@ -79,19 +90,107 @@ def _update_task(task_id: str, **updates: Any) -> None:
task_store.update(task_id, updates)
# ── 多素材辅助函数 ──
def _split_equal(segments: List[dict], material_paths: List[str]) -> List[dict]:
"""按素材数量均分音频时长,对齐到最近的 Whisper 字边界。
Args:
segments: Whisper 产出的 segment 列表, 每个包含 words (字级时间戳)
material_paths: 素材路径列表
Returns:
[{"material_path": "...", "start": 0.0, "end": 5.2, "index": 0}, ...]
"""
# 展平所有 Whisper 字符
all_chars: List[dict] = []
for seg in segments:
for w in seg.get("words", []):
all_chars.append(w)
n = len(material_paths)
if not all_chars or n == 0:
return [{"material_path": material_paths[0] if material_paths else "",
"start": 0.0, "end": 99999.0, "index": 0}]
# 素材数不能超过字符数,否则边界会重复
if n > len(all_chars):
logger.warning(f"[MultiMat] 素材数({n}) > 字符数({len(all_chars)}),裁剪为 {len(all_chars)}")
n = len(all_chars)
total_start = all_chars[0]["start"]
total_end = all_chars[-1]["end"]
seg_dur = (total_end - total_start) / n
# 计算 N-1 个分割点,对齐到最近的字边界
boundaries = [0] # 第一段从第 0 个字开始
for i in range(1, n):
target_time = total_start + i * seg_dur
# 找到 start 时间最接近 target_time 的字
best_idx = boundaries[-1] + 1 # 至少比上一个边界后移 1
best_diff = float("inf")
for j in range(boundaries[-1] + 1, len(all_chars)):
diff = abs(all_chars[j]["start"] - target_time)
if diff < best_diff:
best_diff = diff
best_idx = j
elif diff > best_diff:
break # 时间递增,差值开始变大后可以停了
boundaries.append(min(best_idx, len(all_chars) - 1))
boundaries.append(len(all_chars)) # 最后一段到末尾
# 按边界生成分配结果
assignments: List[dict] = []
for i in range(n):
s_idx = boundaries[i]
e_idx = boundaries[i + 1]
if s_idx >= len(all_chars) or s_idx >= e_idx:
continue
assignments.append({
"material_path": material_paths[i],
"start": all_chars[s_idx]["start"],
"end": all_chars[e_idx - 1]["end"],
"text": "".join(c["word"] for c in all_chars[s_idx:e_idx]),
"index": len(assignments),
})
if not assignments:
return [{"material_path": material_paths[0], "start": 0.0, "end": 99999.0, "index": 0}]
logger.info(f"[MultiMat] 均分 {len(all_chars)} 字为 {len(assignments)}")
for a in assignments:
dur = a["end"] - a["start"]
logger.info(f"{a['index']}: [{a['start']:.2f}-{a['end']:.2f}s] ({dur:.1f}s) {a['text'][:20]}")
return assignments
async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
temp_files = []
try:
start_time = time.time()
# ── 确定素材列表 ──
material_paths: List[str] = []
if req.material_paths and len(req.material_paths) > 1:
material_paths = req.material_paths
else:
material_paths = [req.material_path]
is_multi = len(material_paths) > 1
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
temp_dir = settings.UPLOAD_DIR / "temp"
temp_dir.mkdir(parents=True, exist_ok=True)
input_material_path = temp_dir / f"{task_id}_input.mp4"
temp_files.append(input_material_path)
await _download_material(req.material_path, input_material_path)
# 单素材模式:下载主素材
if not is_multi:
input_material_path = temp_dir / f"{task_id}_input.mp4"
temp_files.append(input_material_path)
await _download_material(material_paths[0], input_material_path)
_update_task(task_id, message="正在生成语音...", progress=10)
@@ -119,7 +218,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
ref_audio_path=str(ref_audio_local),
ref_text=req.ref_text,
output_path=str(audio_path),
language="Chinese"
language=_locale_to_qwen_lang(req.language)
)
else:
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
@@ -128,52 +227,183 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
tts_time = time.time() - start_time
print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
_update_task(task_id, progress=25)
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
lipsync = _get_lipsync_service()
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
temp_files.append(lipsync_video_path)
lipsync_start = time.time()
is_ready = await _check_lipsync_ready()
if is_ready:
print(f"[LipSync] Starting LatentSync inference...")
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
else:
print(f"[LipSync] LatentSync not ready, copying original video")
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
import shutil
shutil.copy(str(input_material_path), lipsync_video_path)
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
video = VideoService()
captions_path = None
if req.enable_subtitles:
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
if is_multi:
# ══════════════════════════════════════
# 多素材流水线
# ══════════════════════════════════════
_update_task(task_id, progress=12, message="正在生成字幕 (Whisper)...")
captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(captions_path)
try:
await whisper_service.align(
captions_data = await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=str(captions_path)
output_path=str(captions_path),
language=_locale_to_whisper_lang(req.language),
)
print(f"[Pipeline] Whisper alignment completed")
print(f"[Pipeline] Whisper alignment completed (multi-material)")
except Exception as e:
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
logger.warning(f"Whisper alignment failed: {e}")
captions_data = None
captions_path = None
_update_task(task_id, progress=15, message="正在分配素材...")
if captions_data and captions_data.get("segments"):
assignments = _split_equal(captions_data["segments"], material_paths)
else:
# Whisper 失败 → 按时长均分(不依赖字符对齐)
logger.warning("[MultiMat] Whisper 无数据,按时长均分")
audio_dur = video._get_duration(str(audio_path))
if audio_dur <= 0:
audio_dur = 30.0 # 安全兜底
seg_dur = audio_dur / len(material_paths)
assignments = [
{"material_path": material_paths[i], "start": i * seg_dur,
"end": (i + 1) * seg_dur, "index": i}
for i in range(len(material_paths))
]
# 扩展段覆盖完整音频范围首段从0开始末段到音频结尾
audio_duration = video._get_duration(str(audio_path))
if assignments and audio_duration > 0:
assignments[0]["start"] = 0.0
assignments[-1]["end"] = audio_duration
num_segments = len(assignments)
print(f"[Pipeline] Multi-material: {num_segments} segments, {len(material_paths)} materials")
if num_segments == 0:
raise RuntimeError("Multi-material: no valid segments after splitting")
lipsync_start = time.time()
# ── 第一步:下载所有素材并检测分辨率 ──
material_locals: List[Path] = []
resolutions = []
for i, assignment in enumerate(assignments):
material_local = temp_dir / f"{task_id}_material_{i}.mp4"
temp_files.append(material_local)
await _download_material(assignment["material_path"], material_local)
material_locals.append(material_local)
resolutions.append(video.get_resolution(str(material_local)))
# 分辨率不一致时,统一到第一个素材的分辨率
base_res = resolutions[0] if resolutions else (0, 0)
need_scale = any(r != base_res for r in resolutions) and base_res[0] > 0
if need_scale:
logger.info(f"[MultiMat] 素材分辨率不一致,统一到 {base_res[0]}x{base_res[1]}")
# ── 第二步:裁剪每段素材到对应时长 ──
prepared_segments: List[Path] = []
for i, assignment in enumerate(assignments):
seg_progress = 15 + int((i / num_segments) * 30) # 15% → 45%
seg_dur = assignment["end"] - assignment["start"]
_update_task(
task_id,
progress=seg_progress,
message=f"正在准备素材 {i+1}/{num_segments}..."
)
prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
temp_files.append(prepared_path)
video.prepare_segment(
str(material_locals[i]), seg_dur, str(prepared_path),
target_resolution=base_res if need_scale else None
)
prepared_segments.append(prepared_path)
# ── 第二步:拼接所有素材片段 ──
_update_task(task_id, progress=50, message="正在拼接素材片段...")
concat_path = temp_dir / f"{task_id}_concat.mp4"
temp_files.append(concat_path)
video.concat_videos(
[str(p) for p in prepared_segments],
str(concat_path)
)
# ── 第三步:一次 LatentSync 推理 ──
is_ready = await _check_lipsync_ready()
if is_ready:
_update_task(task_id, progress=55, message="正在合成唇形 (LatentSync)...")
print(f"[LipSync] Multi-material: single LatentSync on concatenated video")
try:
await lipsync.generate(str(concat_path), str(audio_path), str(lipsync_video_path))
except Exception as e:
logger.warning(f"[LipSync] Failed, fallback to concat without lipsync: {e}")
import shutil
shutil.copy(str(concat_path), str(lipsync_video_path))
else:
print(f"[LipSync] Not ready, using concatenated video without lipsync")
import shutil
shutil.copy(str(concat_path), str(lipsync_video_path))
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] Multi-material prepare + concat + LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
# 如果用户关闭了字幕,清除 captions_pathWhisper 仅用于句子切分)
if not req.enable_subtitles:
captions_path = None
else:
# ══════════════════════════════════════
# 单素材流水线(原有逻辑)
# ══════════════════════════════════════
_update_task(task_id, progress=25)
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
lipsync_start = time.time()
is_ready = await _check_lipsync_ready()
if is_ready:
print(f"[LipSync] Starting LatentSync inference...")
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
else:
print(f"[LipSync] LatentSync not ready, copying original video")
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
import shutil
shutil.copy(str(input_material_path), lipsync_video_path)
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
# 单素材模式Whisper 在 LatentSync 之后
if req.enable_subtitles:
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(captions_path)
try:
await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=str(captions_path),
language=_locale_to_whisper_lang(req.language),
)
print(f"[Pipeline] Whisper alignment completed")
except Exception as e:
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
captions_path = None
_update_task(task_id, progress=85)
video = VideoService()
final_audio_path = audio_path
if req.bgm_id:
_update_task(task_id, message="正在合成背景音乐...", progress=86)

View File

@@ -43,6 +43,7 @@ class GLMService:
要求:
1. 标题要简洁有力能吸引观众点击不超过10个字
2. 标签要与内容相关便于搜索和推荐只要3个
3. 标题和标签必须使用与口播文案相同的语言(如文案是英文就用英文,日文就用日文)
请严格按以下JSON格式返回不要包含其他内容
{{"title": "标题", "tags": ["标签1", "标签2", "标签3"]}}"""
@@ -120,6 +121,49 @@ class GLMService:
async def translate_text(self, text: str, target_lang: str) -> str:
"""
将文案翻译为指定语言
Args:
text: 原始文案
target_lang: 目标语言(如 English, 日本語 等)
Returns:
翻译后的文案
"""
prompt = f"""请将以下文案翻译为{target_lang}
原文:
{text}
要求:
1. 只返回翻译后的文案,不要添加任何解释或说明
2. 保持原文的语气和风格
3. 翻译要自然流畅,符合目标语言的表达习惯"""
try:
client = self._get_client()
logger.info(f"Using GLM to translate text to {target_lang}")
import asyncio
response = await asyncio.to_thread(
client.chat.completions.create,
model=settings.GLM_MODEL,
messages=[{"role": "user", "content": prompt}],
thinking={"type": "disabled"},
max_tokens=2000,
temperature=0.3
)
content = response.choices[0].message.content
logger.info("GLM translation completed")
return content.strip()
except Exception as e:
logger.error(f"GLM translate error: {e}")
raise Exception(f"AI 翻译失败: {str(e)}")
def _parse_json_response(self, content: str) -> dict:
"""解析 GLM 返回的 JSON 内容"""
# 尝试直接解析

View File

@@ -138,3 +138,109 @@ class VideoService:
return output_path
else:
raise RuntimeError("FFmpeg composition failed")
def concat_videos(self, video_paths: list, output_path: str) -> str:
"""使用 FFmpeg concat demuxer 拼接多个视频片段"""
if not video_paths:
raise ValueError("No video segments to concat")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# 生成 concat list 文件
list_path = Path(output_path).parent / f"{Path(output_path).stem}_concat.txt"
with open(list_path, "w", encoding="utf-8") as f:
for vp in video_paths:
f.write(f"file '{vp}'\n")
cmd = [
"ffmpeg", "-y",
"-f", "concat",
"-safe", "0",
"-i", str(list_path),
"-c", "copy",
output_path,
]
try:
if self._run_ffmpeg(cmd):
return output_path
else:
raise RuntimeError("FFmpeg concat failed")
finally:
try:
list_path.unlink(missing_ok=True)
except Exception:
pass
def split_audio(self, audio_path: str, start: float, end: float, output_path: str) -> str:
"""用 FFmpeg 按时间范围切分音频"""
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
duration = end - start
if duration <= 0:
raise ValueError(f"Invalid audio split range: start={start}, end={end}, duration={duration}")
cmd = [
"ffmpeg", "-y",
"-ss", str(start),
"-t", str(duration),
"-i", audio_path,
"-c", "copy",
output_path,
]
if self._run_ffmpeg(cmd):
return output_path
raise RuntimeError(f"FFmpeg audio split failed: {start}-{end}")
def get_resolution(self, file_path: str) -> tuple:
"""获取视频分辨率,返回 (width, height)"""
cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'v:0',
'-show_entries', 'stream=width,height',
'-of', 'csv=p=0',
file_path
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
parts = result.stdout.strip().split(',')
return (int(parts[0]), int(parts[1]))
except Exception:
return (0, 0)
def prepare_segment(self, video_path: str, target_duration: float, output_path: str,
target_resolution: tuple = None) -> str:
"""将素材视频裁剪或循环到指定时长(无音频)。
target_resolution: (width, height) 如需统一分辨率则传入,否则保持原分辨率。
"""
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
video_dur = self._get_duration(video_path)
if video_dur <= 0:
video_dur = target_duration
needs_loop = target_duration > video_dur
needs_scale = target_resolution is not None
cmd = ["ffmpeg", "-y"]
if needs_loop:
loop_count = int(target_duration / video_dur) + 1
cmd.extend(["-stream_loop", str(loop_count)])
cmd.extend(["-i", video_path, "-t", str(target_duration), "-an"])
if needs_scale:
w, h = target_resolution
cmd.extend(["-vf", f"scale={w}:{h}:force_original_aspect_ratio=decrease,pad={w}:{h}:(ow-iw)/2:(oh-ih)/2"])
# 需要循环或缩放时必须重编码,否则用 stream copy 保持原画质
if needs_loop or needs_scale:
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18"])
else:
cmd.extend(["-c:v", "copy"])
cmd.append(output_path)
if self._run_ffmpeg(cmd):
return output_path
raise RuntimeError(f"FFmpeg prepare_segment failed: {video_path}")

View File

@@ -48,7 +48,7 @@ class VoiceCloneService:
"""
# 使用锁确保串行执行,避免 GPU 显存溢出
async with self._lock:
logger.info(f"🎤 Voice Clone: {text[:30]}...")
logger.info(f"🎤 Voice Clone: {text[:30]}... (language={language})")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# 读取参考音频

View File

@@ -20,16 +20,23 @@ MAX_CHARS_PER_LINE = 12
def split_word_to_chars(word: str, start: float, end: float) -> list:
"""
将词拆分成单个字符,时间戳线性插值
将词拆分成单个字符,时间戳线性插值
保留英文词前的空格Whisper 输出如 " Hello"),用于正确重建英文字幕。
Args:
word: 词文本
word: 词文本(可能含前导空格)
start: 词开始时间
end: 词结束时间
Returns:
单字符列表,每个包含 word/start/end
"""
# 保留前导空格(英文 Whisper 输出常见 " Hello" 形式)
leading_space = ""
if word and not word[0].strip():
leading_space = " "
word = word.lstrip()
tokens = []
ascii_buffer = ""
@@ -54,7 +61,8 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
return []
if len(tokens) == 1:
return [{"word": tokens[0], "start": start, "end": end}]
w = leading_space + tokens[0] if leading_space else tokens[0]
return [{"word": w, "start": start, "end": end}]
# 线性插值时间戳
duration = end - start
@@ -64,8 +72,11 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
for i, token in enumerate(tokens):
token_start = start + i * token_duration
token_end = start + (i + 1) * token_duration
w = token
if i == 0 and leading_space:
w = leading_space + w
result.append({
"word": token,
"word": w,
"start": round(token_start, 3),
"end": round(token_end, 3)
})
@@ -108,7 +119,7 @@ def split_segment_to_lines(words: List[dict], max_chars: int = MAX_CHARS_PER_LIN
if should_break and current_words:
segments.append({
"text": current_text,
"text": current_text.strip(),
"start": current_words[0]["start"],
"end": current_words[-1]["end"],
"words": current_words.copy()
@@ -119,7 +130,7 @@ def split_segment_to_lines(words: List[dict], max_chars: int = MAX_CHARS_PER_LIN
# 处理剩余的字
if current_words:
segments.append({
"text": current_text,
"text": current_text.strip(),
"start": current_words[0]["start"],
"end": current_words[-1]["end"],
"words": current_words.copy()
@@ -162,7 +173,8 @@ class WhisperService:
self,
audio_path: str,
text: str,
output_path: Optional[str] = None
output_path: Optional[str] = None,
language: str = "zh",
) -> dict:
"""
对音频进行转录,生成字级别时间戳
@@ -171,12 +183,16 @@ class WhisperService:
audio_path: 音频文件路径
text: 原始文本(用于参考,但实际使用 whisper 转录结果)
output_path: 可选,输出 JSON 文件路径
language: 语言代码 (zh/en 等)
Returns:
包含字级别时间戳的字典
"""
import asyncio
# 英文等西文需要更大的每行字数
max_chars = 40 if language != "zh" else MAX_CHARS_PER_LINE
def _do_transcribe():
model = self._load_model()
@@ -185,7 +201,7 @@ class WhisperService:
# 转录并获取字级别时间戳
segments_iter, info = model.transcribe(
audio_path,
language="zh",
language=language,
word_timestamps=True, # 启用字级别时间戳
vad_filter=True, # 启用 VAD 过滤静音
)
@@ -198,9 +214,10 @@ class WhisperService:
all_words = []
if segment.words:
for word_info in segment.words:
word_text = word_info.word.strip()
if word_text:
word_text = word_info.word
if word_text.strip():
# 将词拆分成单字,时间戳线性插值
# 保留前导空格用于英文词间距
chars = split_word_to_chars(
word_text,
word_info.start,
@@ -210,7 +227,7 @@ class WhisperService:
# 将长段落按标点和字数拆分成多行
if all_words:
line_segments = split_segment_to_lines(all_words, MAX_CHARS_PER_LINE)
line_segments = split_segment_to_lines(all_words, max_chars)
all_segments.extend(line_segments)
logger.info(f"Generated {len(all_segments)} subtitle segments")

View File

@@ -8,6 +8,9 @@
"name": "frontend",
"version": "0.1.0",
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/sortable": "^10.0.0",
"@dnd-kit/utilities": "^3.2.2",
"@supabase/supabase-js": "^2.93.1",
"axios": "^1.13.4",
"lucide-react": "^0.563.0",
@@ -281,6 +284,59 @@
"node": ">=6.9.0"
}
},
"node_modules/@dnd-kit/accessibility": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/@dnd-kit/accessibility/-/accessibility-3.1.1.tgz",
"integrity": "sha512-2P+YgaXF+gRsIihwwY1gCsQSYnu9Zyj2py8kY5fFvUM1qm2WA2u639R6YNVfU4GWr+ZM5mqEsfHZZLoRONbemw==",
"license": "MIT",
"dependencies": {
"tslib": "^2.0.0"
},
"peerDependencies": {
"react": ">=16.8.0"
}
},
"node_modules/@dnd-kit/core": {
"version": "6.3.1",
"resolved": "https://registry.npmjs.org/@dnd-kit/core/-/core-6.3.1.tgz",
"integrity": "sha512-xkGBRQQab4RLwgXxoqETICr6S5JlogafbhNsidmrkVv2YRs5MLwpjoF2qpiGjQt8S9AoxtIV603s0GIUpY5eYQ==",
"license": "MIT",
"dependencies": {
"@dnd-kit/accessibility": "^3.1.1",
"@dnd-kit/utilities": "^3.2.2",
"tslib": "^2.0.0"
},
"peerDependencies": {
"react": ">=16.8.0",
"react-dom": ">=16.8.0"
}
},
"node_modules/@dnd-kit/sortable": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/@dnd-kit/sortable/-/sortable-10.0.0.tgz",
"integrity": "sha512-+xqhmIIzvAYMGfBYYnbKuNicfSsk4RksY2XdmJhT+HAC01nix6fHCztU68jooFiMUB01Ky3F0FyOvhG/BZrWkg==",
"license": "MIT",
"dependencies": {
"@dnd-kit/utilities": "^3.2.2",
"tslib": "^2.0.0"
},
"peerDependencies": {
"@dnd-kit/core": "^6.3.0",
"react": ">=16.8.0"
}
},
"node_modules/@dnd-kit/utilities": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/@dnd-kit/utilities/-/utilities-3.2.2.tgz",
"integrity": "sha512-+MKAJEOfaBe5SmV6t34p80MMKhjvUz0vRrvVJbPT0WElzaOJ/1xs+D+KDv+tD/NE5ujfrChEcshd4fLn0wpiqg==",
"license": "MIT",
"dependencies": {
"tslib": "^2.0.0"
},
"peerDependencies": {
"react": ">=16.8.0"
}
},
"node_modules/@emnapi/core": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.8.1.tgz",

View File

@@ -9,6 +9,9 @@
"lint": "eslint"
},
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/sortable": "^10.0.0",
"@dnd-kit/utilities": "^3.2.2",
"@supabase/supabase-js": "^2.93.1",
"axios": "^1.13.4",
"lucide-react": "^0.563.0",

View File

@@ -1,8 +1,8 @@
import type { Metadata, Viewport } from "next";
import { Geist, Geist_Mono } from "next/font/google";
import "./globals.css";
import { AuthProvider } from "@/contexts/AuthContext";
import { TaskProvider } from "@/contexts/TaskContext";
import { AuthProvider } from "@/shared/contexts/AuthContext";
import { TaskProvider } from "@/shared/contexts/TaskContext";
import { Toaster } from "sonner";

View File

@@ -1,7 +1,7 @@
"use client";
import { useState, useEffect, useRef } from "react";
import { useAuth } from "@/contexts/AuthContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import api from "@/shared/api/axios";
import { ApiResponse } from "@/shared/api/types";

View File

@@ -1,6 +1,6 @@
"use client";
import { useTask } from "@/contexts/TaskContext";
import { useTask } from "@/shared/contexts/TaskContext";
import Link from "next/link";
import { usePathname } from "next/navigation";

View File

@@ -11,8 +11,8 @@ import {
} from "@/shared/lib/media";
import { clampTitle } from "@/shared/lib/title";
import { useTitleInput } from "@/shared/hooks/useTitleInput";
import { useAuth } from "@/contexts/AuthContext";
import { useTask } from "@/contexts/TaskContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import { useTask } from "@/shared/contexts/TaskContext";
import { toast } from "sonner";
import { usePublishPrefetch } from "@/shared/hooks/usePublishPrefetch";
import { PublishAccount } from "@/shared/types/publish";
@@ -25,13 +25,64 @@ import { useRefAudios } from "@/features/home/model/useRefAudios";
import { useTitleSubtitleStyles } from "@/features/home/model/useTitleSubtitleStyles";
import { ApiResponse, unwrap } from "@/shared/api/types";
const VOICES = [
{ id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
{ id: "zh-CN-YunjianNeural", name: "云 (男声-新闻)" },
{ id: "zh-CN-YunyangNeural", name: "云 (男声-专业)" },
{ id: "zh-CN-XiaoxiaoNeural", name: "晓晓 (声-活泼)" },
{ id: "zh-CN-XiaoyiNeural", name: "晓 (女声-温柔)" },
];
const VOICES: Record<string, { id: string; name: string }[]> = {
"zh-CN": [
{ id: "zh-CN-YunxiNeural", name: "云 (男声-年轻)" },
{ id: "zh-CN-YunjianNeural", name: "云 (男声-新闻)" },
{ id: "zh-CN-YunyangNeural", name: "云扬 (声-专业)" },
{ id: "zh-CN-XiaoxiaoNeural", name: "晓 (女声-活泼)" },
{ id: "zh-CN-XiaoyiNeural", name: "晓伊 (女声-温柔)" },
],
"en-US": [
{ id: "en-US-GuyNeural", name: "Guy (Male)" },
{ id: "en-US-JennyNeural", name: "Jenny (Female)" },
],
"ja-JP": [
{ id: "ja-JP-KeitaNeural", name: "圭太 (男声)" },
{ id: "ja-JP-NanamiNeural", name: "七海 (女声)" },
],
"ko-KR": [
{ id: "ko-KR-InJoonNeural", name: "인준 (男声)" },
{ id: "ko-KR-SunHiNeural", name: "선히 (女声)" },
],
"fr-FR": [
{ id: "fr-FR-HenriNeural", name: "Henri (Male)" },
{ id: "fr-FR-DeniseNeural", name: "Denise (Female)" },
],
"de-DE": [
{ id: "de-DE-ConradNeural", name: "Conrad (Male)" },
{ id: "de-DE-KatjaNeural", name: "Katja (Female)" },
],
"es-ES": [
{ id: "es-ES-AlvaroNeural", name: "Álvaro (Male)" },
{ id: "es-ES-ElviraNeural", name: "Elvira (Female)" },
],
"ru-RU": [
{ id: "ru-RU-DmitryNeural", name: "Дмитрий (Male)" },
{ id: "ru-RU-SvetlanaNeural", name: "Светлана (Female)" },
],
"it-IT": [
{ id: "it-IT-DiegoNeural", name: "Diego (Male)" },
{ id: "it-IT-ElsaNeural", name: "Elsa (Female)" },
],
"pt-BR": [
{ id: "pt-BR-AntonioNeural", name: "Antonio (Male)" },
{ id: "pt-BR-FranciscaNeural", name: "Francisca (Female)" },
],
};
const LANG_TO_LOCALE: Record<string, string> = {
"中文": "zh-CN",
"English": "en-US",
"日本語": "ja-JP",
"한국어": "ko-KR",
"Français": "fr-FR",
"Deutsch": "de-DE",
"Español": "es-ES",
"Русский": "ru-RU",
"Italiano": "it-IT",
"Português": "pt-BR",
};
@@ -70,22 +121,17 @@ interface RefAudio {
created_at: number;
}
interface Material {
id: string;
name: string;
path: string;
size_mb: number;
scene?: string;
}
import type { Material } from "@/shared/types/material";
export const useHomeController = () => {
const apiBase = getApiBaseUrl();
const [selectedMaterial, setSelectedMaterial] = useState<string>("");
const [selectedMaterials, setSelectedMaterials] = useState<string[]>([]);
const [previewMaterial, setPreviewMaterial] = useState<string | null>(null);
const [text, setText] = useState<string>("");
const [voice, setVoice] = useState<string>("zh-CN-YunxiNeural");
const [textLang, setTextLang] = useState<string>("zh-CN");
// 使用全局任务状态
const { currentTask, isGenerating, startTask } = useTask();
@@ -96,7 +142,6 @@ export const useHomeController = () => {
// 字幕和标题相关状态
const [videoTitle, setVideoTitle] = useState<string>("");
const [enableSubtitles, setEnableSubtitles] = useState<boolean>(true);
const [selectedSubtitleStyleId, setSelectedSubtitleStyleId] = useState<string>("");
const [selectedTitleStyleId, setSelectedTitleStyleId] = useState<string>("");
const [subtitleFontSize, setSubtitleFontSize] = useState<number>(80);
@@ -181,8 +226,8 @@ export const useHomeController = () => {
{ new_name: editMaterialName.trim() }
);
const payload = unwrap(res);
if (selectedMaterial === materialId && payload?.id) {
setSelectedMaterial(payload.id);
if (selectedMaterials.includes(materialId) && payload?.id) {
setSelectedMaterials((prev) => prev.map((x) => (x === materialId ? payload.id : x)));
}
setEditingMaterialId(null);
setEditMaterialName("");
@@ -197,6 +242,10 @@ export const useHomeController = () => {
// AI 生成标题标签
const [isGeneratingMeta, setIsGeneratingMeta] = useState(false);
// AI 多语言翻译
const [isTranslating, setIsTranslating] = useState(false);
const [originalText, setOriginalText] = useState<string | null>(null);
// 在线录音相关
const [isRecording, setIsRecording] = useState(false);
const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null);
@@ -226,11 +275,13 @@ export const useHomeController = () => {
uploadError,
setUploadError,
fetchMaterials,
toggleMaterial,
reorderMaterials,
deleteMaterial,
handleUpload,
} = useMaterials({
selectedMaterial,
setSelectedMaterial,
selectedMaterials,
setSelectedMaterials,
});
const {
@@ -338,14 +389,14 @@ export const useHomeController = () => {
setText,
videoTitle,
setVideoTitle,
enableSubtitles,
setEnableSubtitles,
ttsMode,
setTtsMode,
voice,
setVoice,
selectedMaterial,
setSelectedMaterial,
textLang,
setTextLang,
selectedMaterials,
setSelectedMaterials,
selectedSubtitleStyleId,
setSelectedSubtitleStyleId,
selectedTitleStyleId,
@@ -410,7 +461,8 @@ export const useHomeController = () => {
}, [isGenerating, currentTask, fetchGeneratedVideos]);
useEffect(() => {
const material = materials.find((item) => item.id === selectedMaterial);
const firstSelected = selectedMaterials[0];
const material = materials.find((item) => item.id === firstSelected);
if (!material?.path) {
setMaterialDimensions(null);
return;
@@ -450,7 +502,7 @@ export const useHomeController = () => {
video.removeEventListener("loadedmetadata", handleLoaded);
video.removeEventListener("error", handleError);
};
}, [materials, selectedMaterial]);
}, [materials, selectedMaterials]);
useEffect(() => {
@@ -486,12 +538,13 @@ export const useHomeController = () => {
}, [selectedBgmId, bgmList]);
useEffect(() => {
if (!selectedMaterial) return;
const target = materialItemRefs.current[selectedMaterial];
const firstSelected = selectedMaterials[0];
if (!firstSelected) return;
const target = materialItemRefs.current[firstSelected];
if (target) {
target.scrollIntoView({ block: "nearest", behavior: "smooth" });
}
}, [selectedMaterial, materials]);
}, [selectedMaterials, materials]);
// 【修复】历史视频默认选中逻辑
// 当持久化恢复完成,且列表加载完毕,如果没选中任何视频,默认选中第一个
@@ -639,9 +692,58 @@ export const useHomeController = () => {
}
};
// AI 多语言翻译
const handleTranslate = async (targetLang: string) => {
if (!text.trim()) {
toast.error("请先输入口播文案");
return;
}
// 首次翻译时保存原文
if (originalText === null) {
setOriginalText(text);
}
setIsTranslating(true);
try {
const { data: res } = await api.post<ApiResponse<{ translated_text: string }>>(
"/api/ai/translate",
{ text: text.trim(), target_lang: targetLang }
);
const payload = unwrap(res);
setText(payload.translated_text || "");
// 根据翻译目标语言更新 textLang 并自动切换声音
const locale = LANG_TO_LOCALE[targetLang] || "zh-CN";
setTextLang(locale);
if (ttsMode === "edgetts") {
const langVoices = VOICES[locale] || VOICES["zh-CN"];
setVoice(langVoices[0].id);
}
} catch (err: unknown) {
console.error("AI translate failed:", err);
const axiosErr = err as { response?: { data?: { message?: string } }; message?: string };
const errorMsg = axiosErr.response?.data?.message || axiosErr.message || String(err);
toast.error(`AI 翻译失败: ${errorMsg}`);
} finally {
setIsTranslating(false);
}
};
const handleRestoreOriginal = () => {
if (originalText !== null) {
setText(originalText);
setOriginalText(null);
setTextLang("zh-CN");
if (ttsMode === "edgetts") {
setVoice(VOICES["zh-CN"][0].id);
}
}
};
// 生成视频
const handleGenerate = async () => {
if (!selectedMaterial || !text.trim()) {
if (selectedMaterials.length === 0 || !text.trim()) {
toast.error("请先选择素材并填写文案");
return;
}
@@ -663,26 +765,33 @@ export const useHomeController = () => {
try {
// 查找选中的素材对象以获取路径
const materialObj = materials.find((m) => m.id === selectedMaterial);
if (!materialObj) {
const firstMaterialObj = materials.find((m) => m.id === selectedMaterials[0]);
if (!firstMaterialObj) {
toast.error("素材数据异常");
return;
}
// 构建请求参数
const payload: Record<string, unknown> = {
material_path: materialObj.path,
material_path: firstMaterialObj.path,
text: text,
tts_mode: ttsMode,
title: videoTitle.trim() || undefined,
enable_subtitles: enableSubtitles,
enable_subtitles: true,
};
if (enableSubtitles && selectedSubtitleStyleId) {
// 多素材
if (selectedMaterials.length > 1) {
payload.material_paths = selectedMaterials
.map((id) => materials.find((x) => x.id === id)?.path)
.filter((path): path is string => !!path);
}
if (selectedSubtitleStyleId) {
payload.subtitle_style_id = selectedSubtitleStyleId;
}
if (enableSubtitles && subtitleFontSize) {
if (subtitleFontSize) {
payload.subtitle_font_size = Math.round(subtitleFontSize);
}
@@ -698,15 +807,15 @@ export const useHomeController = () => {
payload.title_top_margin = Math.round(titleTopMargin);
}
if (enableSubtitles) {
payload.subtitle_bottom_margin = Math.round(subtitleBottomMargin);
}
payload.subtitle_bottom_margin = Math.round(subtitleBottomMargin);
if (enableBgm && selectedBgmId) {
payload.bgm_id = selectedBgmId;
payload.bgm_volume = bgmVolume;
}
payload.language = textLang;
if (ttsMode === "edgetts") {
payload.voice = voice;
} else {
@@ -774,8 +883,9 @@ export const useHomeController = () => {
fetchMaterials,
deleteMaterial,
handleUpload,
selectedMaterial,
setSelectedMaterial,
selectedMaterials,
toggleMaterial,
reorderMaterials,
handlePreviewMaterial,
editingMaterialId,
editMaterialName,
@@ -789,6 +899,10 @@ export const useHomeController = () => {
setExtractModalOpen,
handleGenerateMeta,
isGeneratingMeta,
handleTranslate,
isTranslating,
originalText,
handleRestoreOriginal,
showStylePreview,
setShowStylePreview,
videoTitle,
@@ -809,17 +923,16 @@ export const useHomeController = () => {
setTitleTopMargin,
subtitleBottomMargin,
setSubtitleBottomMargin,
enableSubtitles,
setEnableSubtitles,
resolveAssetUrl,
getFontFormat,
buildTextShadow,
materialDimensions,
ttsMode,
setTtsMode,
voices: VOICES,
voices: VOICES[textLang] || VOICES["zh-CN"],
voice,
setVoice,
textLang,
refAudios,
selectedRefAudio,
handleSelectRefAudio,

View File

@@ -17,14 +17,14 @@ interface UseHomePersistenceOptions {
setText: React.Dispatch<React.SetStateAction<string>>;
videoTitle: string;
setVideoTitle: React.Dispatch<React.SetStateAction<string>>;
enableSubtitles: boolean;
setEnableSubtitles: React.Dispatch<React.SetStateAction<boolean>>;
ttsMode: 'edgetts' | 'voiceclone';
setTtsMode: React.Dispatch<React.SetStateAction<'edgetts' | 'voiceclone'>>;
voice: string;
setVoice: React.Dispatch<React.SetStateAction<string>>;
selectedMaterial: string;
setSelectedMaterial: React.Dispatch<React.SetStateAction<string>>;
textLang: string;
setTextLang: React.Dispatch<React.SetStateAction<string>>;
selectedMaterials: string[];
setSelectedMaterials: React.Dispatch<React.SetStateAction<string[]>>;
selectedSubtitleStyleId: string;
setSelectedSubtitleStyleId: React.Dispatch<React.SetStateAction<string>>;
selectedTitleStyleId: string;
@@ -57,14 +57,14 @@ export const useHomePersistence = ({
setText,
videoTitle,
setVideoTitle,
enableSubtitles,
setEnableSubtitles,
ttsMode,
setTtsMode,
voice,
setVoice,
selectedMaterial,
setSelectedMaterial,
textLang,
setTextLang,
selectedMaterials,
setSelectedMaterials,
selectedSubtitleStyleId,
setSelectedSubtitleStyleId,
selectedTitleStyleId,
@@ -96,9 +96,9 @@ export const useHomePersistence = ({
const savedText = localStorage.getItem(`vigent_${storageKey}_text`);
const savedTitle = localStorage.getItem(`vigent_${storageKey}_title`);
const savedSubtitles = localStorage.getItem(`vigent_${storageKey}_subtitles`);
const savedTtsMode = localStorage.getItem(`vigent_${storageKey}_ttsMode`);
const savedVoice = localStorage.getItem(`vigent_${storageKey}_voice`);
const savedTextLang = localStorage.getItem(`vigent_${storageKey}_textLang`);
const savedMaterial = localStorage.getItem(`vigent_${storageKey}_material`);
const savedSubtitleStyle = localStorage.getItem(`vigent_${storageKey}_subtitleStyle`);
const savedTitleStyle = localStorage.getItem(`vigent_${storageKey}_titleStyle`);
@@ -113,11 +113,23 @@ export const useHomePersistence = ({
setText(savedText || "大家好,欢迎来到我的频道,今天给大家分享一些有趣的内容。");
setVideoTitle(savedTitle ? clampTitle(savedTitle) : "");
setEnableSubtitles(savedSubtitles !== null ? savedSubtitles === 'true' : true);
setTtsMode((savedTtsMode as 'edgetts' | 'voiceclone') || 'edgetts');
setVoice(savedVoice || "zh-CN-YunxiNeural");
if (savedTextLang) setTextLang(savedTextLang);
if (savedMaterial) setSelectedMaterial(savedMaterial);
if (savedMaterial) {
try {
const parsed = JSON.parse(savedMaterial);
if (Array.isArray(parsed)) {
setSelectedMaterials(parsed);
} else {
setSelectedMaterials([savedMaterial]);
}
} catch {
// 旧格式: 单字符串
setSelectedMaterials([savedMaterial]);
}
}
if (savedSubtitleStyle) setSelectedSubtitleStyleId(savedSubtitleStyle);
if (savedTitleStyle) setSelectedTitleStyleId(savedTitleStyle);
@@ -157,15 +169,15 @@ export const useHomePersistence = ({
isAuthLoading,
setBgmVolume,
setEnableBgm,
setEnableSubtitles,
setSelectedBgmId,
setSelectedMaterial,
setSelectedMaterials,
setSelectedSubtitleStyleId,
setSelectedTitleStyleId,
setSelectedVideoId,
setSubtitleFontSize,
setSubtitleSizeLocked,
setText,
setTextLang,
setTitleFontSize,
setTitleSizeLocked,
setTitleTopMargin,
@@ -192,10 +204,6 @@ export const useHomePersistence = ({
return () => clearTimeout(timeout);
}, [videoTitle, storageKey, isRestored]);
useEffect(() => {
if (isRestored) localStorage.setItem(`vigent_${storageKey}_subtitles`, String(enableSubtitles));
}, [enableSubtitles, storageKey, isRestored]);
useEffect(() => {
if (isRestored) localStorage.setItem(`vigent_${storageKey}_ttsMode`, ttsMode);
}, [ttsMode, storageKey, isRestored]);
@@ -205,10 +213,14 @@ export const useHomePersistence = ({
}, [voice, storageKey, isRestored]);
useEffect(() => {
if (isRestored && selectedMaterial) {
localStorage.setItem(`vigent_${storageKey}_material`, selectedMaterial);
if (isRestored) localStorage.setItem(`vigent_${storageKey}_textLang`, textLang);
}, [textLang, storageKey, isRestored]);
useEffect(() => {
if (isRestored && selectedMaterials.length > 0) {
localStorage.setItem(`vigent_${storageKey}_material`, JSON.stringify(selectedMaterials));
}
}, [selectedMaterial, storageKey, isRestored]);
}, [selectedMaterials, storageKey, isRestored]);
useEffect(() => {
if (isRestored && selectedSubtitleStyleId) {

View File

@@ -2,23 +2,16 @@ import { useCallback, useState } from "react";
import api from "@/shared/api/axios";
import { ApiResponse, unwrap } from "@/shared/api/types";
import { toast } from "sonner";
interface Material {
id: string;
name: string;
scene: string;
size_mb: number;
path: string;
}
import type { Material } from "@/shared/types/material";
interface UseMaterialsOptions {
selectedMaterial: string;
setSelectedMaterial: React.Dispatch<React.SetStateAction<string>>;
selectedMaterials: string[];
setSelectedMaterials: React.Dispatch<React.SetStateAction<string[]>>;
}
export const useMaterials = ({
selectedMaterial,
setSelectedMaterial,
selectedMaterials,
setSelectedMaterials,
}: UseMaterialsOptions) => {
const [materials, setMaterials] = useState<Material[]>([]);
const [fetchError, setFetchError] = useState<string | null>(null);
@@ -41,12 +34,13 @@ export const useMaterials = ({
setMaterials(nextMaterials);
setLastMaterialCount(nextMaterials.length);
setSelectedMaterial((prev) => {
// 如果当前选中的素材在列表中依然存在,保持选中
const exists = nextMaterials.some((item) => item.id === prev);
if (exists) return prev;
setSelectedMaterials((prev) => {
// 保留已选中且仍存在的
const existingIds = new Set(nextMaterials.map((m) => m.id));
const kept = prev.filter((id) => existingIds.has(id));
if (kept.length > 0) return kept;
// 否则默认选中第一个
return nextMaterials[0]?.id || "";
return nextMaterials[0]?.id ? [nextMaterials[0].id] : [];
});
} catch (error) {
console.error("获取素材失败:", error);
@@ -54,29 +48,58 @@ export const useMaterials = ({
} finally {
setIsFetching(false);
}
}, [setSelectedMaterial]);
}, [setSelectedMaterials]);
const MAX_MATERIALS = 4;
const toggleMaterial = useCallback((id: string) => {
setSelectedMaterials((prev) => {
if (prev.includes(id)) {
// 不能取消最后一个
if (prev.length <= 1) return prev;
return prev.filter((x) => x !== id);
}
if (prev.length >= MAX_MATERIALS) return prev;
return [...prev, id];
});
}, [setSelectedMaterials]);
const reorderMaterials = useCallback((activeId: string, overId: string) => {
setSelectedMaterials((prev) => {
const oldIndex = prev.indexOf(activeId);
const newIndex = prev.indexOf(overId);
if (oldIndex === -1 || newIndex === -1) return prev;
const next = [...prev];
next.splice(oldIndex, 1);
next.splice(newIndex, 0, activeId);
return next;
});
}, [setSelectedMaterials]);
const deleteMaterial = useCallback(async (materialId: string) => {
if (!confirm("确定要删除这个素材吗?")) return;
try {
await api.delete(`/api/materials/${materialId}`);
fetchMaterials();
if (selectedMaterial === materialId) {
setSelectedMaterial("");
if (selectedMaterials.includes(materialId)) {
setSelectedMaterials((prev) => {
const next = prev.filter((id) => id !== materialId);
return next.length > 0 ? next : [];
});
}
} catch (error) {
toast.error("删除失败: " + error);
}
}, [fetchMaterials, selectedMaterial, setSelectedMaterial]);
}, [fetchMaterials, selectedMaterials, setSelectedMaterials]);
const handleUpload = useCallback(async (e: React.ChangeEvent<HTMLInputElement>) => {
const file = e.target.files?.[0];
if (!file) return;
const validTypes = ['.mp4', '.mov', '.avi'];
const validTypes = ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.wmv', '.m4v', '.ts', '.mts'];
const ext = file.name.toLowerCase().slice(file.name.lastIndexOf('.'));
if (!validTypes.includes(ext)) {
setUploadError('支持 MP4、MOV、AVI 格式');
setUploadError('支持的视频格式');
return;
}
@@ -100,7 +123,22 @@ export const useMaterials = ({
setUploadProgress(100);
setIsUploading(false);
fetchMaterials();
// 上传后重新拉列表并自动选中新素材
const { data: res } = await api.get<ApiResponse<{ materials: Material[] }>>(
`/api/materials?t=${new Date().getTime()}`
);
const payload = unwrap(res);
const nextMaterials = payload.materials || [];
setMaterials(nextMaterials);
setLastMaterialCount(nextMaterials.length);
// 找出新增的素材 ID 并自动选中
const oldIds = new Set(materials.map((m) => m.id));
const newIds = nextMaterials.filter((m) => !oldIds.has(m.id)).map((m) => m.id);
if (newIds.length > 0) {
setSelectedMaterials((prev) => [...prev, ...newIds]);
}
} catch (err: unknown) {
console.error("Upload failed:", err);
setIsUploading(false);
@@ -122,6 +160,8 @@ export const useMaterials = ({
uploadError,
setUploadError,
fetchMaterials,
toggleMaterial,
reorderMaterials,
deleteMaterial,
handleUpload,
};

View File

@@ -4,6 +4,7 @@ interface GenerateActionBarProps {
isGenerating: boolean;
progress: number;
disabled: boolean;
materialCount?: number;
onGenerate: () => void;
}
@@ -11,43 +12,51 @@ export function GenerateActionBar({
isGenerating,
progress,
disabled,
materialCount = 1,
onGenerate,
}: GenerateActionBarProps) {
return (
<button
onClick={onGenerate}
disabled={disabled}
className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${disabled
? "bg-gray-600 cursor-not-allowed text-gray-400"
: "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
}`}
>
{isGenerating ? (
<span className="flex items-center justify-center gap-3">
<svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
fill="none"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
/>
</svg>
... {progress}%
</span>
) : (
<span className="flex items-center justify-center gap-2">
<Rocket className="h-5 w-5" />
</span>
<div>
<button
onClick={onGenerate}
disabled={disabled}
className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${disabled
? "bg-gray-600 cursor-not-allowed text-gray-400"
: "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
}`}
>
{isGenerating ? (
<span className="flex items-center justify-center gap-3">
<svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
<circle
className="opacity-25"
cx="12"
cy="12"
r="10"
stroke="currentColor"
strokeWidth="4"
fill="none"
/>
<path
className="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
/>
</svg>
... {progress}%
</span>
) : (
<span className="flex items-center justify-center gap-2">
<Rocket className="h-5 w-5" />
</span>
)}
</button>
{!isGenerating && materialCount >= 2 && (
<p className="text-xs text-gray-400 text-center mt-1.5">
({materialCount} )
</p>
)}
</button>
</div>
);
}

View File

@@ -3,7 +3,7 @@
import { useEffect } from "react";
import { useRouter } from "next/navigation";
import VideoPreviewModal from "@/components/VideoPreviewModal";
import ScriptExtractionModal from "@/components/ScriptExtractionModal";
import ScriptExtractionModal from "./ScriptExtractionModal";
import { useHomeController } from "@/features/home/model/useHomeController";
import { BgmPanel } from "@/features/home/ui/BgmPanel";
import { GenerateActionBar } from "@/features/home/ui/GenerateActionBar";
@@ -34,8 +34,9 @@ export function HomePage() {
fetchMaterials,
deleteMaterial,
handleUpload,
selectedMaterial,
setSelectedMaterial,
selectedMaterials,
toggleMaterial,
reorderMaterials,
handlePreviewMaterial,
editingMaterialId,
editMaterialName,
@@ -49,6 +50,10 @@ export function HomePage() {
setExtractModalOpen,
handleGenerateMeta,
isGeneratingMeta,
handleTranslate,
isTranslating,
originalText,
handleRestoreOriginal,
showStylePreview,
setShowStylePreview,
videoTitle,
@@ -69,8 +74,6 @@ export function HomePage() {
setTitleTopMargin,
subtitleBottomMargin,
setSubtitleBottomMargin,
enableSubtitles,
setEnableSubtitles,
resolveAssetUrl,
getFontFormat,
buildTextShadow,
@@ -147,7 +150,7 @@ export function HomePage() {
{/* 素材选择 */}
<MaterialSelector
materials={materials}
selectedMaterial={selectedMaterial}
selectedMaterials={selectedMaterials}
isFetching={isFetching}
lastMaterialCount={lastMaterialCount}
editingMaterialId={editingMaterialId}
@@ -159,7 +162,8 @@ export function HomePage() {
apiBase={apiBase}
onUploadChange={handleUpload}
onRefresh={fetchMaterials}
onSelectMaterial={setSelectedMaterial}
onToggleMaterial={toggleMaterial}
onReorderMaterials={reorderMaterials}
onPreviewMaterial={handlePreviewMaterial}
onStartEditing={startMaterialEditing}
onEditNameChange={setEditMaterialName}
@@ -177,6 +181,10 @@ export function HomePage() {
onOpenExtractModal={() => setExtractModalOpen(true)}
onGenerateMeta={handleGenerateMeta}
isGeneratingMeta={isGeneratingMeta}
onTranslate={handleTranslate}
isTranslating={isTranslating}
hasOriginalText={originalText !== null}
onRestoreOriginal={handleRestoreOriginal}
/>
{/* 标题和字幕设置 */}
@@ -207,8 +215,6 @@ export function HomePage() {
onTitleTopMarginChange={setTitleTopMargin}
subtitleBottomMargin={subtitleBottomMargin}
onSubtitleBottomMarginChange={setSubtitleBottomMargin}
enableSubtitles={enableSubtitles}
onToggleSubtitles={setEnableSubtitles}
resolveAssetUrl={resolveAssetUrl}
getFontFormat={getFontFormat}
buildTextShadow={buildTextShadow}
@@ -276,7 +282,8 @@ export function HomePage() {
<GenerateActionBar
isGenerating={isGenerating}
progress={currentTask?.progress || 0}
disabled={isGenerating || !selectedMaterial || (ttsMode === "voiceclone" && !selectedRefAudio)}
materialCount={selectedMaterials.length}
disabled={isGenerating || selectedMaterials.length === 0 || (ttsMode === "voiceclone" && !selectedRefAudio)}
onGenerate={handleGenerate}
/>
</div>

View File

@@ -1,17 +1,25 @@
import type { ChangeEvent, MouseEvent } from "react";
import { Upload, RefreshCw, Eye, Trash2, X, Pencil, Check } from "lucide-react";
interface Material {
id: string;
name: string;
scene: string;
size_mb: number;
path: string;
}
import { type ChangeEvent, type MouseEvent } from "react";
import { Upload, RefreshCw, Eye, Trash2, X, Pencil, Check, GripVertical } from "lucide-react";
import type { Material } from "@/shared/types/material";
import {
DndContext,
closestCenter,
KeyboardSensor,
PointerSensor,
useSensor,
useSensors,
type DragEndEvent,
} from "@dnd-kit/core";
import {
SortableContext,
horizontalListSortingStrategy,
useSortable,
} from "@dnd-kit/sortable";
import { CSS } from "@dnd-kit/utilities";
interface MaterialSelectorProps {
materials: Material[];
selectedMaterial: string;
selectedMaterials: string[];
isFetching: boolean;
lastMaterialCount: number;
editingMaterialId: string | null;
@@ -23,7 +31,8 @@ interface MaterialSelectorProps {
apiBase: string;
onUploadChange: (event: ChangeEvent<HTMLInputElement>) => void;
onRefresh: () => void;
onSelectMaterial: (id: string) => void;
onToggleMaterial: (id: string) => void;
onReorderMaterials: (activeId: string, overId: string) => void;
onPreviewMaterial: (path: string) => void;
onStartEditing: (material: Material, event: MouseEvent) => void;
onEditNameChange: (value: string) => void;
@@ -34,9 +43,64 @@ interface MaterialSelectorProps {
registerMaterialRef: (id: string, element: HTMLDivElement | null) => void;
}
function SortableChip({
id,
index,
label,
onRemove,
}: {
id: string;
index: number;
label: string;
onRemove: () => void;
}) {
const {
attributes,
listeners,
setNodeRef,
transform,
transition,
isDragging,
} = useSortable({ id });
const style = {
transform: CSS.Translate.toString(transform),
transition,
};
const circledNumbers = ["\u2460", "\u2461", "\u2462", "\u2463", "\u2464", "\u2465", "\u2466", "\u2467", "\u2468", "\u2469"];
return (
<div
ref={setNodeRef}
style={style}
className={`flex items-center gap-1 rounded-lg px-2 py-1 text-xs whitespace-nowrap transition-colors ${
isDragging
? "bg-purple-500/50 border border-purple-400 text-white shadow-lg shadow-purple-500/30 z-10"
: "bg-purple-500/30 border border-purple-500/50 text-purple-200"
}`}
>
<span {...attributes} {...listeners} className="cursor-grab active:cursor-grabbing text-purple-400">
<GripVertical className="h-3 w-3" />
</span>
<span className="text-purple-300">{circledNumbers[index] || `${index + 1}`}</span>
<span className="max-w-[80px] truncate">{label}</span>
<button
onClick={(e) => {
e.stopPropagation();
onRemove();
}}
className="text-purple-400 hover:text-white ml-0.5"
>
<X className="h-3 w-3" />
</button>
</div>
);
}
export function MaterialSelector({
materials,
selectedMaterial,
selectedMaterials,
isFetching,
lastMaterialCount,
editingMaterialId,
@@ -48,7 +112,8 @@ export function MaterialSelector({
apiBase,
onUploadChange,
onRefresh,
onSelectMaterial,
onToggleMaterial,
onReorderMaterials,
onPreviewMaterial,
onStartEditing,
onEditNameChange,
@@ -58,20 +123,36 @@ export function MaterialSelector({
onClearUploadError,
registerMaterialRef,
}: MaterialSelectorProps) {
const sensors = useSensors(
useSensor(PointerSensor, { activationConstraint: { distance: 5 } }),
useSensor(KeyboardSensor)
);
const handleDragEnd = (event: DragEndEvent) => {
const { active, over } = event;
if (over && active.id !== over.id) {
onReorderMaterials(String(active.id), String(over.id));
}
};
const selectedSet = new Set(selectedMaterials);
const isFull = selectedMaterials.length >= 4;
const circledNumbers = ["\u2460", "\u2461", "\u2462", "\u2463", "\u2464", "\u2465", "\u2466", "\u2467", "\u2468", "\u2469"];
return (
<div className="bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
<div className="flex justify-between items-center gap-2 mb-4">
<h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap">
📹
<span className="ml-1 text-[11px] sm:text-xs text-gray-400/90 font-normal">
()
(4)
</span>
</h2>
<div className="flex gap-1.5">
<input
type="file"
id="video-upload"
accept=".mp4,.mov,.avi"
accept="video/*"
onChange={onUploadChange}
className="hidden"
/>
@@ -119,6 +200,38 @@ export function MaterialSelector({
</div>
)}
{/* 已选素材排列(拖拽排序区) - 仅当选中 >= 2 个时显示 */}
{selectedMaterials.length >= 2 && (
<div className="mb-3 p-3 bg-purple-500/10 rounded-xl border border-purple-500/20">
<div className="text-[11px] text-purple-300/70 mb-2">🎬 ()</div>
<DndContext
sensors={sensors}
collisionDetection={closestCenter}
onDragEnd={handleDragEnd}
>
<SortableContext
items={selectedMaterials}
strategy={horizontalListSortingStrategy}
>
<div className="flex flex-wrap gap-1.5">
{selectedMaterials.map((id, index) => {
const m = materials.find((x) => x.id === id);
return (
<SortableChip
key={id}
id={id}
index={index}
label={m?.scene || m?.name || id}
onRemove={() => onToggleMaterial(id)}
/>
);
})}
</div>
</SortableContext>
</DndContext>
</div>
)}
{fetchError ? (
<div className="p-4 bg-red-500/20 text-red-200 rounded-xl text-sm mb-4">
: {fetchError}
@@ -126,7 +239,7 @@ export function MaterialSelector({
API: {apiBase}/api/materials/
</div>
) : isFetching && materials.length === 0 ? (
<div className="space-y-2 max-h-64 overflow-y-auto hide-scrollbar" style={{ contentVisibility: 'auto' }}>
<div className="space-y-2 max-h-48 sm:max-h-64 overflow-y-auto hide-scrollbar" style={{ contentVisibility: 'auto' }}>
{Array.from({ length: Math.min(4, Math.max(1, lastMaterialCount || 1)) }).map((_, index) => (
<div
key={`material-skeleton-${index}`}
@@ -147,82 +260,99 @@ export function MaterialSelector({
</div>
) : (
<div
className="space-y-2 max-h-64 overflow-y-auto hide-scrollbar"
className="space-y-2 max-h-48 sm:max-h-64 overflow-y-auto hide-scrollbar"
style={{ contentVisibility: 'auto' }}
>
{materials.map((m) => (
<div
key={m.id}
ref={(el) => registerMaterialRef(m.id, el)}
className={`p-3 rounded-lg border transition-all flex items-center justify-between group ${selectedMaterial === m.id
? "border-purple-500 bg-purple-500/20"
: "border-white/10 bg-white/5 hover:border-white/30"
}`}
>
{editingMaterialId === m.id ? (
<div className="flex-1 flex items-center gap-2" onClick={(e) => e.stopPropagation()}>
<input
value={editMaterialName}
onChange={(e) => onEditNameChange(e.target.value)}
className="flex-1 bg-black/40 border border-white/20 rounded-md px-2 py-1 text-xs text-white"
autoFocus
/>
<button
onClick={(e) => onSaveEditing(m.id, e)}
className="p-1 text-green-400 hover:text-green-300"
title="保存"
>
<Check className="h-4 w-4" />
</button>
<button
onClick={onCancelEditing}
className="p-1 text-gray-400 hover:text-white"
title="取消"
>
<X className="h-4 w-4" />
</button>
</div>
) : (
<button onClick={() => onSelectMaterial(m.id)} className="flex-1 text-left">
<div className="text-white text-sm truncate">{m.scene || m.name}</div>
<div className="text-gray-400 text-xs">{m.size_mb.toFixed(1)} MB</div>
</button>
)}
<div className="flex items-center gap-2 pl-2">
<button
onClick={(e) => {
e.stopPropagation();
if (m.path) {
onPreviewMaterial(m.path);
}
}}
className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
title="预览视频"
>
<Eye className="h-4 w-4" />
</button>
{editingMaterialId !== m.id && (
<button
onClick={(e) => onStartEditing(m, e)}
className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
title="重命名"
>
<Pencil className="h-4 w-4" />
{materials.map((m) => {
const isSelected = selectedSet.has(m.id);
const selIndex = selectedMaterials.indexOf(m.id);
return (
<div
key={m.id}
ref={(el) => registerMaterialRef(m.id, el)}
className={`p-3 rounded-lg border transition-all flex items-center justify-between group ${isSelected
? "border-purple-500 bg-purple-500/20"
: isFull
? "border-white/5 bg-white/[0.02] opacity-50 cursor-not-allowed"
: "border-white/10 bg-white/5 hover:border-white/30"
}`}
>
{editingMaterialId === m.id ? (
<div className="flex-1 flex items-center gap-2" onClick={(e) => e.stopPropagation()}>
<input
value={editMaterialName}
onChange={(e) => onEditNameChange(e.target.value)}
className="flex-1 bg-black/40 border border-white/20 rounded-md px-2 py-1 text-xs text-white"
autoFocus
/>
<button
onClick={(e) => onSaveEditing(m.id, e)}
className="p-1 text-green-400 hover:text-green-300"
title="保存"
>
<Check className="h-4 w-4" />
</button>
<button
onClick={onCancelEditing}
className="p-1 text-gray-400 hover:text-white"
title="取消"
>
<X className="h-4 w-4" />
</button>
</div>
) : (
<button onClick={() => onToggleMaterial(m.id)} className="flex-1 text-left flex items-center gap-2">
{/* 复选框 */}
<span
className={`flex-shrink-0 w-4 h-4 rounded border flex items-center justify-center text-[10px] ${isSelected
? "border-purple-500 bg-purple-500 text-white"
: "border-white/30 text-transparent"
}`}
>
{isSelected ? (selIndex >= 0 ? circledNumbers[selIndex] || "✓" : "✓") : ""}
</span>
<div className="min-w-0">
<div className="text-white text-sm truncate">{m.scene || m.name}</div>
<div className="text-gray-400 text-xs">{m.size_mb.toFixed(1)} MB</div>
</div>
</button>
)}
<button
onClick={(e) => {
e.stopPropagation();
onDeleteMaterial(m.id);
}}
className="p-1 text-gray-500 hover:text-red-400 opacity-0 group-hover:opacity-100 transition-opacity"
title="删除素材"
>
<Trash2 className="h-4 w-4" />
</button>
<div className="flex items-center gap-2 pl-2">
<button
onClick={(e) => {
e.stopPropagation();
if (m.path) {
onPreviewMaterial(m.path);
}
}}
className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
title="预览视频"
>
<Eye className="h-4 w-4" />
</button>
{editingMaterialId !== m.id && (
<button
onClick={(e) => onStartEditing(m, e)}
className="p-1 text-gray-500 hover:text-white opacity-0 group-hover:opacity-100 transition-opacity"
title="重命名"
>
<Pencil className="h-4 w-4" />
</button>
)}
<button
onClick={(e) => {
e.stopPropagation();
onDeleteMaterial(m.id);
}}
className="p-1 text-gray-500 hover:text-red-400 opacity-0 group-hover:opacity-100 transition-opacity"
title="删除素材"
>
<Trash2 className="h-4 w-4" />
</button>
</div>
</div>
</div>
))}
);
})}
</div>
)}
</div>

View File

@@ -1,4 +1,17 @@
import { FileText, Loader2, Sparkles } from "lucide-react";
import { useEffect, useRef, useState } from "react";
import { FileText, Languages, Loader2, RotateCcw, Sparkles } from "lucide-react";
const LANGUAGES = [
{ code: "English", label: "英语 English" },
{ code: "日本語", label: "日语 日本語" },
{ code: "한국어", label: "韩语 한국어" },
{ code: "Français", label: "法语 Français" },
{ code: "Deutsch", label: "德语 Deutsch" },
{ code: "Español", label: "西班牙语 Español" },
{ code: "Русский", label: "俄语 Русский" },
{ code: "Italiano", label: "意大利语 Italiano" },
{ code: "Português", label: "葡萄牙语 Português" },
];
interface ScriptEditorProps {
text: string;
@@ -6,6 +19,10 @@ interface ScriptEditorProps {
onOpenExtractModal: () => void;
onGenerateMeta: () => void;
isGeneratingMeta: boolean;
onTranslate: (targetLang: string) => void;
isTranslating: boolean;
hasOriginalText: boolean;
onRestoreOriginal: () => void;
}
export function ScriptEditor({
@@ -14,14 +31,37 @@ export function ScriptEditor({
onOpenExtractModal,
onGenerateMeta,
isGeneratingMeta,
onTranslate,
isTranslating,
hasOriginalText,
onRestoreOriginal,
}: ScriptEditorProps) {
const [showLangMenu, setShowLangMenu] = useState(false);
const langMenuRef = useRef<HTMLDivElement>(null);
useEffect(() => {
if (!showLangMenu) return;
const handleClickOutside = (e: MouseEvent) => {
if (langMenuRef.current && !langMenuRef.current.contains(e.target as Node)) {
setShowLangMenu(false);
}
};
document.addEventListener("mousedown", handleClickOutside);
return () => document.removeEventListener("mousedown", handleClickOutside);
}, [showLangMenu]);
const handleSelectLang = (langCode: string) => {
setShowLangMenu(false);
onTranslate(langCode);
};
return (
<div className="bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
<div className="flex flex-wrap justify-between items-center gap-2 mb-4">
<h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap">
<div className="relative z-10 bg-white/5 rounded-2xl p-4 sm:p-6 border border-white/10 backdrop-blur-sm">
<div className="mb-4 space-y-3">
<h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2">
</h2>
<div className="flex gap-2 flex-shrink-0">
<div className="flex gap-2 flex-wrap justify-end">
<button
onClick={onOpenExtractModal}
className="px-2 py-1 text-xs rounded transition-all whitespace-nowrap bg-purple-600 hover:bg-purple-700 text-white flex items-center gap-1"
@@ -29,6 +69,54 @@ export function ScriptEditor({
<FileText className="h-3.5 w-3.5" />
</button>
<div className="relative" ref={langMenuRef}>
<button
onClick={() => setShowLangMenu((prev) => !prev)}
disabled={isTranslating || !text.trim()}
className={`px-2 py-1 text-xs rounded transition-all whitespace-nowrap ${
isTranslating || !text.trim()
? "bg-gray-600 cursor-not-allowed text-gray-400"
: "bg-gradient-to-r from-emerald-600 to-teal-600 hover:from-emerald-700 hover:to-teal-700 text-white"
}`}
>
{isTranslating ? (
<span className="flex items-center gap-1">
<Loader2 className="h-3.5 w-3.5 animate-spin" />
...
</span>
) : (
<span className="flex items-center gap-1">
<Languages className="h-3.5 w-3.5" />
AI多语言
</span>
)}
</button>
{showLangMenu && (
<div className="absolute right-0 top-full mt-1 z-50 bg-gray-800 border border-white/10 rounded-lg shadow-xl py-1 min-w-[160px]">
{hasOriginalText && (
<>
<button
onClick={() => { setShowLangMenu(false); onRestoreOriginal(); }}
className="w-full text-left px-3 py-1.5 text-xs text-amber-400 hover:bg-white/10 transition-colors flex items-center gap-1"
>
<RotateCcw className="h-3 w-3" />
</button>
<div className="border-t border-white/10 my-1" />
</>
)}
{LANGUAGES.map((lang) => (
<button
key={lang.code}
onClick={() => handleSelectLang(lang.code)}
className="w-full text-left px-3 py-1.5 text-xs text-gray-200 hover:bg-white/10 transition-colors"
>
{lang.label}
</button>
))}
</div>
)}
</div>
<button
onClick={onGenerateMeta}
disabled={isGeneratingMeta || !text.trim()}

View File

@@ -52,8 +52,6 @@ interface TitleSubtitlePanelProps {
onTitleTopMarginChange: (value: number) => void;
subtitleBottomMargin: number;
onSubtitleBottomMarginChange: (value: number) => void;
enableSubtitles: boolean;
onToggleSubtitles: (value: boolean) => void;
resolveAssetUrl: (path?: string | null) => string | null;
getFontFormat: (fontFile?: string) => string;
buildTextShadow: (color: string, size: number) => string;
@@ -82,8 +80,6 @@ export function TitleSubtitlePanel({
onTitleTopMarginChange,
subtitleBottomMargin,
onSubtitleBottomMarginChange,
enableSubtitles,
onToggleSubtitles,
resolveAssetUrl,
getFontFormat,
buildTextShadow,
@@ -117,7 +113,7 @@ export function TitleSubtitlePanel({
subtitleFontSize={subtitleFontSize}
titleTopMargin={titleTopMargin}
subtitleBottomMargin={subtitleBottomMargin}
enableSubtitles={enableSubtitles}
enableSubtitles={true}
resolveAssetUrl={resolveAssetUrl}
getFontFormat={getFontFormat}
buildTextShadow={buildTextShadow}
@@ -186,7 +182,7 @@ export function TitleSubtitlePanel({
</div>
)}
{enableSubtitles && subtitleStyles.length > 0 && (
{subtitleStyles.length > 0 && (
<div className="mt-4">
<label className="text-sm text-gray-300 mb-2 block"></label>
<div className="grid grid-cols-2 gap-2">
@@ -232,22 +228,6 @@ export function TitleSubtitlePanel({
</div>
</div>
)}
<div className="mt-4 pt-4 border-t border-white/10 flex items-center justify-between">
<div>
<span className="text-sm text-gray-300"></span>
<p className="text-xs text-gray-500 mt-1">OK效果字幕</p>
</div>
<label className="relative inline-flex items-center cursor-pointer">
<input
type="checkbox"
checked={enableSubtitles}
onChange={(e) => onToggleSubtitles(e.target.checked)}
className="sr-only peer"
/>
<div className="w-11 h-6 bg-gray-600 peer-focus:outline-none rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-purple-600"></div>
</label>
</div>
</div>
);
}

View File

@@ -5,8 +5,8 @@ import { ApiResponse, unwrap } from "@/shared/api/types";
import { formatDate, getApiBaseUrl, isAbsoluteUrl, resolveMediaUrl } from "@/shared/lib/media";
import { clampTitle } from "@/shared/lib/title";
import { useTitleInput } from "@/shared/hooks/useTitleInput";
import { useAuth } from "@/contexts/AuthContext";
import { useTask } from "@/contexts/TaskContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import { useTask } from "@/shared/contexts/TaskContext";
import { toast } from "sonner";
import { usePublishPrefetch } from "@/shared/hooks/usePublishPrefetch";
import {

View File

@@ -0,0 +1,7 @@
export interface Material {
id: string;
name: string;
path: string;
size_mb: number;
scene?: string;
}

View File

@@ -134,10 +134,14 @@ async def generate(
try:
print(f"🎤 Generating: {text[:30]}...")
print(f"📝 Ref text: {ref_text[:50]}...")
print(f"🌐 Language: {language}")
start = time.time()
wavs, sr = _model.generate_voice_clone(
# 在线程池中运行,避免阻塞事件循环导致健康检查超时
import asyncio
wavs, sr = await asyncio.to_thread(
_model.generate_voice_clone,
text=text,
language=language,
ref_audio=ref_audio_path,