This commit is contained in:
Kevin Wong
2026-02-08 19:54:11 +08:00
parent ee342cc40f
commit e226224119
32 changed files with 1180 additions and 1207 deletions

17
.gitignore vendored
View File

@@ -20,11 +20,14 @@ node_modules/
out/
.turbo/
# ============ IDE ============
# ============ IDE / AI 工具 ============
.vscode/
.idea/
*.swp
*.swo
.agents/
.opencode/
.claude/
# ============ 系统文件 ============
.DS_Store
@@ -35,11 +38,21 @@ desktop.ini
backend/outputs/
backend/uploads/
backend/cookies/
backend/user_data/
backend/debug_screenshots/
*_cookies.json
# ============ MuseTalk ============
# ============ 模型权重 ============
models/*/checkpoints/
models/MuseTalk/models/
models/MuseTalk/results/
models/LatentSync/temp/
# ============ Remotion 构建 ============
remotion/dist/
# ============ 临时文件 ============
Temp/
# ============ 日志 ============
*.log

View File

@@ -29,15 +29,15 @@ backend/
├── app/
│ ├── core/ # config、deps、security、response
│ ├── modules/ # 业务模块(路由 + 逻辑)
│ │ ├── videos/ # 视频生成任务
│ │ ├── materials/ # 素材管理
│ │ ├── videos/ # 视频生成任务router/schemas/service/workflow
│ │ ├── materials/ # 素材管理router/schemas/service
│ │ ├── publish/ # 多平台发布
│ │ ├── auth/ # 认证与会话
│ │ ├── ai/ # AI 功能(标题标签生成等)
│ │ ├── assets/ # 静态资源(字体/样式/BGM
│ │ ├── ref_audios/ # 声音克隆参考音频
│ │ ├── ref_audios/ # 声音克隆参考音频router/schemas/service
│ │ ├── login_helper/ # 扫码登录辅助
│ │ ├── tools/ # 工具接口
│ │ ├── tools/ # 工具接口router/schemas/service
│ │ └── admin/ # 管理员功能
│ ├── repositories/ # Supabase 数据访问
│ ├── services/ # 外部服务集成
@@ -124,10 +124,13 @@ backend/user_data/{user_uuid}/cookies/
## 8. 开发流程建议
- **新增功能**:先建模块,再写 router/service/workflow
- **修复 Bug**:顺手把涉及的逻辑抽到对应 service/workflow。
- **新增功能**:先建模块,**必须**包含 `router.py + schemas.py + service.py`,不允许 router-only
- **修复 Bug**:顺手把涉及的逻辑抽到对应 service/workflow(渐进式改造)
- **改旧模块**:改动哪部分就拆哪部分,不要求一次重构整个文件。
- **核心流程变更**:必跑冒烟(登录/生成/发布)。
> **渐进原则**:新代码高标准,旧代码逐步改。不做大规模一次性重构,避免引入回归风险。
---
## 9. 常用环境变量

View File

@@ -15,15 +15,15 @@ backend/
├── app/
│ ├── core/ # 核心配置 (config.py, security.py, response.py)
│ ├── modules/ # 业务模块 (router/service/workflow/schemas)
│ │ ├── videos/ # 视频生成任务
│ │ ├── materials/ # 素材管理
│ │ ├── videos/ # 视频生成任务router/schemas/service/workflow
│ │ ├── materials/ # 素材管理router/schemas/service
│ │ ├── publish/ # 多平台发布
│ │ ├── auth/ # 认证与会话
│ │ ├── ai/ # AI 功能(标题标签生成)
│ │ ├── assets/ # 静态资源(字体/样式/BGM
│ │ ├── ref_audios/ # 声音克隆参考音频
│ │ ├── ref_audios/ # 声音克隆参考音频router/schemas/service
│ │ ├── login_helper/ # 扫码登录辅助
│ │ ├── tools/ # 工具接口(文案提取等
│ │ ├── tools/ # 工具接口(router/schemas/service
│ │ └── admin/ # 管理员功能
│ ├── repositories/ # Supabase 数据访问
│ ├── services/ # 外部服务集成 (TTS/Remotion/Storage/Uploader 等)

View File

@@ -342,6 +342,6 @@ models/Qwen3-TTS/
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day11.md](./Day11.md) - 上传架构重构
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 部署指南

View File

@@ -273,7 +273,7 @@ pm2 logs vigent2-qwen-tts --lines 50
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day12.md](./Day12.md) - iOS 兼容与 Qwen3-TTS 部署
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 部署指南
- [SUBTITLE_DEPLOY.md](../SUBTITLE_DEPLOY.md) - 字幕功能部署指南

View File

@@ -397,6 +397,6 @@ if ((status === 401 || status === 403) && !isRedirecting && !isPublicPath) {
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day13.md](./Day13.md) - 声音克隆功能集成 + 字幕功能
- [QWEN3_TTS_DEPLOY.md](../QWEN3_TTS_DEPLOY.md) - Qwen3-TTS 1.7B 部署指南

View File

@@ -342,7 +342,7 @@ pm2 restart vigent2-backend vigent2-frontend
## 🔗 相关文档
- [task_complete.md](../task_complete.md) - 任务总览
- [TASK_COMPLETE.md](../TASK_COMPLETE.md) - 任务总览
- [Day14.md](./Day14.md) - 模型升级 + AI 标题标签
- [AUTH_DEPLOY.md](../AUTH_DEPLOY.md) - 认证系统部署指南

View File

@@ -136,4 +136,4 @@ if service["failures"] >= service['threshold']:
- [x] `Docs/QWEN3_TTS_DEPLOY.md`: 添加 Flash Attention 安装指南
- [x] `Docs/DEPLOY_MANUAL.md`: 添加 Watchdog 部署说明
- [x] `Docs/task_complete.md`: 更新进度至 100% (Day 16)
- [x] `Docs/TASK_COMPLETE.md`: 更新进度至 100% (Day 16)

View File

@@ -246,3 +246,72 @@ PLATFORM_CONFIGS = {
pm2 restart vigent2-backend # 发布服务 + QR登录
npm run build && pm2 restart vigent2-frontend # 刷脸验证UI
```
---
## 🏗️ 架构优化:前端结构微调 + 后端模块分层 (Day 21)
### 概述
根据架构审计结果,完成前端目录规范化和后端核心模块的分层补全。
### 一、前端结构微调
#### 1. ScriptExtractionModal 迁移
- `components/ScriptExtractionModal.tsx``features/home/ui/ScriptExtractionModal.tsx`
- 连带 `components/script-extraction/` 目录一并迁移到 `features/home/ui/script-extraction/`
- 更新 `HomePage.tsx` 的 import 路径
#### 2. contexts/ 目录归并
- `src/contexts/AuthContext.tsx``src/shared/contexts/AuthContext.tsx`
- `src/contexts/TaskContext.tsx``src/shared/contexts/TaskContext.tsx`
- 更新 6 处 importlayout.tsx, useHomeController.ts, usePublishController.ts, AccountSettingsDropdown.tsx, GlobalTaskIndicator.tsx
- 删除空的 `src/contexts/` 目录
#### 3. 清理重构遗留空目录
- 删除 `src/lib/``src/components/home/``src/hooks/`
### 二、后端模块分层补全
将 3 个 400+ 行的 router-only 模块拆分为 `router.py + schemas.py + service.py`
| 模块 | 改造前 | 改造后 router |
|------|--------|--------------|
| `materials/` | 416 行 | 63 行 |
| `tools/` | 417 行 | 33 行 |
| `ref_audios/` | 421 行 | 71 行 |
业务逻辑全部提取到 `service.py`,数据模型定义在 `schemas.py`router 只做参数校验 + 调用 service + 返回响应。
### 三、开发规范更新
`BACKEND_DEV.md` 第 8 节新增渐进原则:
- 新模块**必须**包含 `router.py + schemas.py + service.py`
- 改旧模块时顺手拆涉及的部分
- 新代码高标准,旧代码逐步改
### 涉及文件汇总
| 文件 | 变更 |
|------|------|
| `frontend/src/features/home/ui/ScriptExtractionModal.tsx` | 从 components/ 迁入 |
| `frontend/src/features/home/ui/script-extraction/` | 从 components/ 迁入 |
| `frontend/src/shared/contexts/AuthContext.tsx` | 从 contexts/ 迁入 |
| `frontend/src/shared/contexts/TaskContext.tsx` | 从 contexts/ 迁入 |
| `backend/app/modules/materials/schemas.py` | **新建** |
| `backend/app/modules/materials/service.py` | **新建** |
| `backend/app/modules/materials/router.py` | 精简为薄路由 |
| `backend/app/modules/tools/schemas.py` | **新建** |
| `backend/app/modules/tools/service.py` | **新建** |
| `backend/app/modules/tools/router.py` | 精简为薄路由 |
| `backend/app/modules/ref_audios/schemas.py` | **新建** |
| `backend/app/modules/ref_audios/service.py` | **新建** |
| `backend/app/modules/ref_audios/router.py` | 精简为薄路由 |
| `Docs/BACKEND_DEV.md` | 目录结构标注分层、新增渐进原则 |
| `Docs/BACKEND_README.md` | 目录结构标注分层 |
| `Docs/FRONTEND_DEV.md` | 更新目录结构contexts 迁移、ScriptExtractionModal 迁移) |
### 重启要求
```bash
pm2 restart vigent2-backend
npm run build && pm2 restart vigent2-frontend
```

View File

@@ -389,7 +389,7 @@ if not qr_element:
## 📋 文档规则优化 (16:42 - 17:10)
**问题**Doc_Rules需要优化,避免误删历史内容、规范工具使用、防止任务清单遗漏
**问题**DOC_RULES需要优化,避免误删历史内容、规范工具使用、防止任务清单遗漏
**优化内容(最终版)**
@@ -411,7 +411,7 @@ if not qr_element:
- 移除无关项目组件
**修改文件**
- `Docs/Doc_Rules.md` - 包含检查清单的最终完善版
- `Docs/DOC_RULES.md` - 包含检查清单的最终完善版
---

View File

@@ -8,8 +8,8 @@
| 规则 | 说明 |
|------|------|
| **默认更新** | 更新 `DayN.md` |
| **按需更新** | `task_complete.md` 仅在用户**明确要求**时更新 |
| **默认更新** | 更新 `DayN.md``TASK_COMPLETE.md` |
| **按需更新** | 其他文档仅在内容变化涉及时更新 |
| **智能修改** | 错误→替换,改进→追加(见下方详细规则) |
| **先读后写** | 更新前先查看文件当前内容 |
| **日内合并** | 同一天的多次小修改合并为最终版本 |
@@ -23,7 +23,7 @@
| 优先级 | 文件路径 | 检查重点 |
| :---: | :--- | :--- |
| 🔥 **High** | `Docs/DevLogs/DayN.md` | **(最新日志)** 详细记录变更、修复、代码片段 |
| 🔥 **High** | `Docs/task_complete.md` | **(任务总览)** 更新 `[x]`、进度条、时间线 |
| 🔥 **High** | `Docs/TASK_COMPLETE.md` | **(任务总览)** 更新 `[x]`、进度条、时间线 |
| ⚡ **Med** | `README.md` | **(项目主页)** 功能特性、技术栈、最新截图 |
| ⚡ **Med** | `Docs/DEPLOY_MANUAL.md` | **(部署手册)** 环境变量、依赖包、启动命令变更 |
| ⚡ **Med** | `Docs/BACKEND_DEV.md` | **(后端规范)** 接口契约、模块划分、环境变量 |
@@ -186,15 +186,15 @@ new_string: "**状态**:✅ 已修复"
```
ViGent2/Docs/
├── task_complete.md # 任务总览(仅按需更新)
├── Doc_Rules.md # 本文件
├── TASK_COMPLETE.md # 任务总览(仅按需更新)
├── DOC_RULES.md # 本文件
├── BACKEND_DEV.md # 后端开发规范
├── BACKEND_README.md # 后端功能文档
├── FRONTEND_DEV.md # 前端开发规范
├── FRONTEND_README.md # 前端功能文档
├── DEPLOY_MANUAL.md # 部署手册
├── SUPABASE_DEPLOY.md # Supabase 部署文档
├── LatentSync_DEPLOY.md # LatentSync 部署文档
├── LATENTSYNC_DEPLOY.md # LatentSync 部署文档
├── QWEN3_TTS_DEPLOY.md # 声音克隆部署文档
├── SUBTITLE_DEPLOY.md # 字幕系统部署文档
└── DevLogs/
@@ -206,8 +206,16 @@ ViGent2/Docs/
## 📅 DayN.md 更新规则(日常更新)
### 更新时机
> **边开发边记录,不要等到最后才写。**
- 每完成一个功能/修复后,**立即**追加到 DayN.md
- 避免积攒到对话末尾一次性补写,容易遗漏变更
- `TASK_COMPLETE.md` 同理,重要变更完成后及时同步
### 新建判断 (对话开始前)
1. **回顾进度**:查看 `task_complete.md` 了解当前状态
1. **回顾进度**:查看 `TASK_COMPLETE.md` 了解当前状态
2. **检查日期**:查看最新 `DayN.md`
- **今天 (与当前日期相同)** → 🚨 **绝对禁止创建新文件**,必须**追加**到现有 `DayN.md` 末尾!即使是完全不同的功能模块。
- **之前 (昨天或更早)** → 创建 `Day{N+1}.md`
@@ -263,17 +271,17 @@ ViGent2/Docs/
---
## 📝 task_complete.md 更新规则(仅按需)
## 📝 TASK_COMPLETE.md 更新规则
> ⚠️ **仅当用户明确要求更新 `task_complete.md` 时才更新**
> 与 DayN.md 同步更新,记录重要变更时更新任务总览。
### 更新原则
- **格式一致性**:直接参考 `task_complete.md` 现有格式追加内容。
- **格式一致性**:直接参考 `TASK_COMPLETE.md` 现有格式追加内容。
- **进度更新**:仅在阶段性里程碑时更新进度百分比。
### 🔍 完整性检查清单 (必做)
每次更新 `task_complete.md` 时,必须**逐一检查**以下所有板块:
每次更新 `TASK_COMPLETE.md` 时,必须**逐一检查**以下所有板块:
1. **文件头部 & 导航**
- [ ] `更新时间`:必须是当天日期

View File

@@ -28,6 +28,9 @@ frontend/src/
│ │ ├── HomeHeader.tsx
│ │ ├── MaterialSelector.tsx
│ │ ├── ScriptEditor.tsx
│ │ ├── ScriptExtractionModal.tsx
│ │ ├── script-extraction/
│ │ │ └── useScriptExtraction.ts
│ │ ├── TitleSubtitlePanel.tsx
│ │ ├── FloatingStylePreview.tsx
│ │ ├── VoiceSelector.tsx
@@ -55,11 +58,11 @@ frontend/src/
│ ├── types/
│ │ ├── user.ts # User 类型定义
│ │ └── publish.ts # 发布相关类型
│ └── contexts/ # 已迁移的 Context
├── contexts/ # 全局 ContextAuth、Task
│ └── contexts/ # 全局 ContextAuth、Task
│ ├── AuthContext.tsx
│ └── TaskContext.tsx
├── components/ # 遗留通用组件
── VideoPreviewModal.tsx
│ └── ScriptExtractionModal.tsx
── VideoPreviewModal.tsx
└── proxy.ts # Next.js middleware路由保护
```
@@ -278,8 +281,8 @@ import { formatDate } from '@/shared/lib/media';
- `shared/lib`通用工具函数media.ts / auth.ts / title.ts
- `shared/hooks`:跨功能通用 hooks
- `shared/types`跨功能实体类型User / PublishVideo 等)
- `contexts/`:全局 ContextAuthContext / TaskContext
- `components/`遗留通用组件VideoPreviewModal
- `shared/contexts`:全局 ContextAuthContext / TaskContext
- `components/`遗留通用组件VideoPreviewModal
## 类型定义规范

View File

@@ -10,18 +10,25 @@
> 这里记录了每一天的核心开发内容与 milestone。
### Day 21: 缺陷修复与持久化回归治理 (Current)
### Day 21: 缺陷修复 + 浮动预览 + 发布重构 + 架构优化 (Current)
- [x] **Remotion 崩溃容错**: 渲染进程 SIGABRT 退出时检查输出文件,避免误判失败导致标题/字幕丢失。
- [x] **首页作品选择持久化**: 修复 `fetchGeneratedVideos` 无条件覆盖恢复值的问题,新增 `preferVideoId` 参数控制选中逻辑。
- [x] **发布页作品选择持久化**: 根因为签名 URL 不稳定,全面改用 `video.id` 替代 `path` 进行选择/持久化/比较。
- [x] **预取缓存补全**: 首页预取发布页数据时加入 `id` 字段,确保缓存数据可用于持久化匹配。
- [x] **浮动样式预览窗口**: 标题字幕预览改为 `position: fixed` 浮动窗口,固定左上角,滚动时始终可见。
- [x] **移动端适配**: ScriptEditor 按钮换行、预览默认比例改为 9:16 竖屏。
- [x] **多平台发布重构**: 平台配置独立化DOUYIN_*/WEIXIN_*)、用户隔离 Cookie 管理、抖音刷脸验证二维码、微信发布流程优化。
- [x] **前端结构微调**: ScriptExtractionModal 迁移到 features/、contexts 迁移到 shared/contexts/、清理空目录。
- [x] **后端模块分层**: materials/tools/ref_audios 三个模块补全 router+schemas+service 分层。
- [x] **开发规范更新**: BACKEND_DEV.md 新增渐进原则、DOC_RULES.md 取消 TASK_COMPLETE.md 手动触发约束。
- [x] **文档全面更新**: BACKEND_DEV/README、FRONTEND_DEV、DEPLOY_MANUAL、README.md 同步更新。
### Day 20: 代码质量与安全优化
- [x] **功能性修复**: LatentSync 回退逻辑、任务状态接口认证、User 类型统一。
- [x] **性能优化**: N+1 查询修复、视频上传流式处理、httpx 异步替换、GLM 异步包装。
- [x] **安全修复**: 硬编码 Cookie 配置化、日志敏感信息脱敏、ffprobe 安全调用、CORS 配置化。
- [x] **配置优化**: 存储路径环境变量化、Remotion 预编译加速、LatentSync 绝对路径。
- [x] **文档更新**: 更新 Doc_Rules.md 清单,补齐后端与部署文档;更新 SUBTITLE_DEPLOY.md, FRONTEND_DEV.md, implementation_plan.md。
- [x] **文档更新**: 更新 DOC_RULES.md 清单,补齐后端与部署文档;更新 SUBTITLE_DEPLOY.md, FRONTEND_DEV.md, implementation_plan.md。
- [x] **缺陷修复**: 修复 Remotion 路径解析、发布页持久化竞态、首页选中回归、素材闭包陷阱。
### Day 19: 自动发布稳定性与发布体验优化 🚀

View File

@@ -1,354 +1,45 @@
from fastapi import APIRouter, UploadFile, File, HTTPException, Request, BackgroundTasks, Depends
from app.core.config import settings
from fastapi import APIRouter, HTTPException, Request, Depends
from loguru import logger
from app.core.deps import get_current_user
from app.core.response import success_response
from app.services.storage import storage_service
import re
import time
import traceback
import os
import aiofiles
from pathlib import Path
from loguru import logger
import asyncio
from pydantic import BaseModel
from typing import Optional
import httpx
from app.modules.materials.schemas import RenameMaterialRequest
from app.modules.materials import service
router = APIRouter()
class RenameMaterialRequest(BaseModel):
new_name: str
def sanitize_filename(filename: str) -> str:
safe_name = re.sub(r'[<>:"/\\|?*]', '_', filename)
if len(safe_name) > 100:
ext = Path(safe_name).suffix
safe_name = safe_name[:100 - len(ext)] + ext
return safe_name
async def process_and_upload(temp_file_path: str, original_filename: str, content_type: str, user_id: str):
"""Background task to strip multipart headers and upload to Supabase"""
try:
logger.info(f"Processing raw upload: {temp_file_path} for user {user_id}")
# 1. Analyze file to find actual video content (strip multipart boundaries)
# This is a simplified manual parser for a SINGLE file upload.
# Structure:
# --boundary
# Content-Disposition: form-data; name="file"; filename="..."
# Content-Type: video/mp4
# \r\n\r\n
# [DATA]
# \r\n--boundary--
# We need to read the first few KB to find the header end
start_offset = 0
end_offset = 0
boundary = b""
file_size = os.path.getsize(temp_file_path)
with open(temp_file_path, 'rb') as f:
# Read first 4KB to find header
head = f.read(4096)
# Find boundary
first_line_end = head.find(b'\r\n')
if first_line_end == -1:
raise Exception("Could not find boundary in multipart body")
boundary = head[:first_line_end] # e.g. --boundary123
logger.info(f"Detected boundary: {boundary}")
# Find end of headers (\r\n\r\n)
header_end = head.find(b'\r\n\r\n')
if header_end == -1:
raise Exception("Could not find end of multipart headers")
start_offset = header_end + 4
logger.info(f"Video data starts at offset: {start_offset}")
# Find end boundary (read from end of file)
# It should be \r\n + boundary + -- + \r\n
# We seek to end-200 bytes
f.seek(max(0, file_size - 200))
tail = f.read()
# The closing boundary is usually --boundary--
# We look for the last occurrence of the boundary
last_boundary_pos = tail.rfind(boundary)
if last_boundary_pos != -1:
# The data ends before \r\n + boundary
# The tail buffer relative position needs to be converted to absolute
end_pos_in_tail = last_boundary_pos
# We also need to check for the preceding \r\n
if end_pos_in_tail >= 2 and tail[end_pos_in_tail-2:end_pos_in_tail] == b'\r\n':
end_pos_in_tail -= 2
# Absolute end offset
end_offset = (file_size - 200) + last_boundary_pos
# Correction for CRLF before boundary
# Actually, simply: read until (file_size - len(tail) + last_boundary_pos) - 2
end_offset = (max(0, file_size - 200) + last_boundary_pos) - 2
else:
logger.warning("Could not find closing boundary, assuming EOF")
end_offset = file_size
logger.info(f"Video data ends at offset: {end_offset}. Total video size: {end_offset - start_offset}")
# 2. Extract and Upload to Supabase
# Since we have the file on disk, we can just pass the file object (seeked) to upload_file?
# Or if upload_file expects bytes/path, checking storage.py...
# It takes `file_data` (bytes) or file-like?
# supabase-py's `upload` method handles parsing if we pass a file object.
# But we need to pass ONLY the video slice.
# So we create a generator or a sliced file object?
# Simpler: Read the slice into memory if < 1GB? Or copy to new temp file?
# Copying to new temp file is safer for memory.
video_path = temp_file_path + "_video.mp4"
with open(temp_file_path, 'rb') as src, open(video_path, 'wb') as dst:
src.seek(start_offset)
# Copy in chunks
bytes_to_copy = end_offset - start_offset
copied = 0
while copied < bytes_to_copy:
chunk_size = min(1024*1024*10, bytes_to_copy - copied) # 10MB chunks
chunk = src.read(chunk_size)
if not chunk:
break
dst.write(chunk)
copied += len(chunk)
logger.info(f"Extracted video content to {video_path}")
# 3. Upload to Supabase with user isolation
timestamp = int(time.time())
safe_name = re.sub(r'[^a-zA-Z0-9._-]', '', original_filename)
# 使用 user_id 作为目录前缀实现隔离
storage_path = f"{user_id}/{timestamp}_{safe_name}"
# Use storage service (this calls Supabase which might do its own http request)
# We read the cleaned video file
with open(video_path, 'rb') as f:
file_content = f.read() # Still reading into memory for simple upload call, but server has 32GB RAM so ok for 500MB
await storage_service.upload_file(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path,
file_data=file_content,
content_type=content_type
)
logger.info(f"Upload to Supabase complete: {storage_path}")
# Cleanup
os.remove(temp_file_path)
os.remove(video_path)
return storage_path
except Exception as e:
logger.error(f"Background upload processing failed: {e}\n{traceback.format_exc()}")
raise
@router.post("")
async def upload_material(
request: Request,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user)
):
user_id = current_user["id"]
logger.info(f"ENTERED upload_material (Streaming Mode) for user {user_id}. Headers: {request.headers}")
filename = "unknown_video.mp4" # Fallback
content_type = "video/mp4"
# Try to parse filename from header if possible (unreliable in raw stream)
# We will rely on post-processing or client hint
# Frontend sends standard multipart.
# Create temp file
timestamp = int(time.time())
temp_filename = f"upload_{timestamp}.raw"
temp_path = os.path.join("/tmp", temp_filename) # Use /tmp on Linux
# Ensure /tmp exists (it does) but verify paths
if os.name == 'nt': # Local dev
temp_path = f"d:/tmp/{temp_filename}"
os.makedirs("d:/tmp", exist_ok=True)
logger.info(f"Upload material request from user {user_id}")
try:
total_size = 0
last_log = 0
async with aiofiles.open(temp_path, 'wb') as f:
async for chunk in request.stream():
await f.write(chunk)
total_size += len(chunk)
# Log progress every 20MB
if total_size - last_log > 20 * 1024 * 1024:
logger.info(f"Receiving stream... Processed {total_size / (1024*1024):.2f} MB")
last_log = total_size
logger.info(f"Stream reception complete. Total size: {total_size} bytes. Saved to {temp_path}")
if total_size == 0:
raise HTTPException(400, "Received empty body")
# Attempt to extract filename from the saved file's first bytes?
# Or just accept it as "uploaded_video.mp4" for now to prove it works.
# We can try to regex the header in the file content we just wrote.
# Implemented in background task to return success immediately.
# Wait, if we return immediately, the user's UI might not show the file yet?
# The prompt says "Wait for upload".
# But to avoid User Waiting Timeout, maybe returning early is better?
# NO, user expects the file to be in the list.
# So we Must await the processing.
# But "Processing" (Strip + Upload to Supabase) takes time.
# Receiving took time.
# If we await Supabase upload, does it timeout?
# Supabase upload is outgoing. Usually faster/stable.
# Let's await the processing to ensure "List Materials" shows it.
# We need to extract the filename for the list.
# Quick extract filename from first 4kb
with open(temp_path, 'rb') as f:
head = f.read(4096).decode('utf-8', errors='ignore')
match = re.search(r'filename="([^"]+)"', head)
if match:
filename = match.group(1)
logger.info(f"Extracted filename from body: {filename}")
# Run processing sync (in await)
storage_path = await process_and_upload(temp_path, filename, content_type, user_id)
# Get signed URL (it exists now)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path
)
size_mb = total_size / (1024 * 1024) # Approximate (includes headers)
# 从 storage_path 提取显示名
display_name = storage_path.split('/')[-1] # 去掉 user_id 前缀
if '_' in display_name:
parts = display_name.split('_', 1)
if parts[0].isdigit():
display_name = parts[1]
return success_response({
"id": storage_path,
"name": display_name,
"path": signed_url,
"size_mb": size_mb,
"type": "video"
})
result = await service.upload_material(request, user_id)
return success_response(result)
except ValueError as e:
raise HTTPException(400, str(e))
except Exception as e:
error_msg = f"Streaming upload failed: {str(e)}"
detail_msg = f"Exception: {repr(e)}\nArgs: {e.args}\n{traceback.format_exc()}"
logger.error(error_msg + "\n" + detail_msg)
# Write to debug file
try:
with open("debug_upload.log", "a") as logf:
logf.write(f"\n--- Error at {time.ctime()} ---\n")
logf.write(detail_msg)
logf.write("\n-----------------------------\n")
except:
pass
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
raise HTTPException(500, f"Upload failed. Check server logs. Error: {str(e)}")
raise HTTPException(500, f"Upload failed. Error: {str(e)}")
@router.get("")
async def list_materials(current_user: dict = Depends(get_current_user)):
user_id = current_user["id"]
try:
# 只列出当前用户目录下的文件
files_obj = await storage_service.list_files(
bucket=storage_service.BUCKET_MATERIALS,
path=user_id
)
semaphore = asyncio.Semaphore(8)
async def build_item(f):
name = f.get('name')
if not name or name == '.emptyFolderPlaceholder':
return None
display_name = name
if '_' in name:
parts = name.split('_', 1)
if parts[0].isdigit():
display_name = parts[1]
full_path = f"{user_id}/{name}"
async with semaphore:
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=full_path
)
metadata = f.get('metadata', {})
size = metadata.get('size', 0)
created_at_str = f.get('created_at', '')
created_at = 0
if created_at_str:
from datetime import datetime
try:
dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
created_at = int(dt.timestamp())
except Exception:
pass
return {
"id": full_path,
"name": display_name,
"path": signed_url,
"size_mb": size / (1024 * 1024),
"type": "video",
"created_at": created_at
}
tasks = [build_item(f) for f in files_obj]
results = await asyncio.gather(*tasks, return_exceptions=True)
materials = []
for item in results:
if not item:
continue
if isinstance(item, Exception):
logger.warning(f"Material signed url build failed: {item}")
continue
materials.append(item)
materials.sort(key=lambda x: x['id'], reverse=True)
materials = await service.list_materials(user_id)
return success_response({"materials": materials})
except Exception as e:
logger.error(f"List materials failed: {e}")
return success_response({"materials": []}, message="获取素材失败")
@router.delete("/{material_id:path}")
async def delete_material(material_id: str, current_user: dict = Depends(get_current_user)):
user_id = current_user["id"]
# 验证 material_id 属于当前用户
if not material_id.startswith(f"{user_id}/"):
raise HTTPException(403, "无权删除此素材")
try:
await storage_service.delete_file(
bucket=storage_service.BUCKET_MATERIALS,
path=material_id
)
await service.delete_material(material_id, user_id)
return success_response(message="素材已删除")
except PermissionError as e:
raise HTTPException(403, str(e))
except Exception as e:
raise HTTPException(500, f"删除失败: {str(e)}")
@@ -360,57 +51,12 @@ async def rename_material(
current_user: dict = Depends(get_current_user)
):
user_id = current_user["id"]
if not material_id.startswith(f"{user_id}/"):
raise HTTPException(403, "无权重命名此素材")
new_name_raw = payload.new_name.strip() if payload.new_name else ""
if not new_name_raw:
raise HTTPException(400, "新名称不能为空")
old_name = material_id.split("/", 1)[1]
old_ext = Path(old_name).suffix
base_name = Path(new_name_raw).stem if Path(new_name_raw).suffix else new_name_raw
safe_base = sanitize_filename(base_name).strip()
if not safe_base:
raise HTTPException(400, "新名称无效")
new_filename = f"{safe_base}{old_ext}"
prefix = None
if "_" in old_name:
maybe_prefix, _ = old_name.split("_", 1)
if maybe_prefix.isdigit():
prefix = maybe_prefix
if prefix:
new_filename = f"{prefix}_{new_filename}"
new_path = f"{user_id}/{new_filename}"
try:
if new_path != material_id:
await storage_service.move_file(
bucket=storage_service.BUCKET_MATERIALS,
from_path=material_id,
to_path=new_path
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=new_path
)
display_name = new_filename
if "_" in new_filename:
parts = new_filename.split("_", 1)
if parts[0].isdigit():
display_name = parts[1]
return success_response({
"id": new_path,
"name": display_name,
"path": signed_url,
}, message="重命名成功")
result = await service.rename_material(material_id, payload.new_name, user_id)
return success_response(result, message="重命名成功")
except PermissionError as e:
raise HTTPException(403, str(e))
except ValueError as e:
raise HTTPException(400, str(e))
except Exception as e:
raise HTTPException(500, f"重命名失败: {str(e)}")

View File

@@ -0,0 +1,14 @@
from pydantic import BaseModel
class RenameMaterialRequest(BaseModel):
new_name: str
class MaterialItem(BaseModel):
id: str
name: str
path: str
size_mb: float
type: str = "video"
created_at: int = 0

View File

@@ -0,0 +1,296 @@
import re
import os
import time
import asyncio
import traceback
import aiofiles
from pathlib import Path
from loguru import logger
from app.services.storage import storage_service
def sanitize_filename(filename: str) -> str:
safe_name = re.sub(r'[<>:"/\\|?*]', '_', filename)
if len(safe_name) > 100:
ext = Path(safe_name).suffix
safe_name = safe_name[:100 - len(ext)] + ext
return safe_name
def _extract_display_name(storage_name: str) -> str:
"""从存储文件名中提取显示名(去掉时间戳前缀)"""
if '_' in storage_name:
parts = storage_name.split('_', 1)
if parts[0].isdigit():
return parts[1]
return storage_name
async def _process_and_upload(temp_file_path: str, original_filename: str, content_type: str, user_id: str) -> str:
"""Strip multipart headers and upload to Supabase, return storage_path"""
try:
logger.info(f"Processing raw upload: {temp_file_path} for user {user_id}")
file_size = os.path.getsize(temp_file_path)
with open(temp_file_path, 'rb') as f:
head = f.read(4096)
first_line_end = head.find(b'\r\n')
if first_line_end == -1:
raise Exception("Could not find boundary in multipart body")
boundary = head[:first_line_end]
logger.info(f"Detected boundary: {boundary}")
header_end = head.find(b'\r\n\r\n')
if header_end == -1:
raise Exception("Could not find end of multipart headers")
start_offset = header_end + 4
logger.info(f"Video data starts at offset: {start_offset}")
f.seek(max(0, file_size - 200))
tail = f.read()
last_boundary_pos = tail.rfind(boundary)
if last_boundary_pos != -1:
end_offset = (max(0, file_size - 200) + last_boundary_pos) - 2
else:
logger.warning("Could not find closing boundary, assuming EOF")
end_offset = file_size
logger.info(f"Video data ends at offset: {end_offset}. Total video size: {end_offset - start_offset}")
video_path = temp_file_path + "_video.mp4"
with open(temp_file_path, 'rb') as src, open(video_path, 'wb') as dst:
src.seek(start_offset)
bytes_to_copy = end_offset - start_offset
copied = 0
while copied < bytes_to_copy:
chunk_size = min(1024 * 1024 * 10, bytes_to_copy - copied)
chunk = src.read(chunk_size)
if not chunk:
break
dst.write(chunk)
copied += len(chunk)
logger.info(f"Extracted video content to {video_path}")
timestamp = int(time.time())
safe_name = re.sub(r'[^a-zA-Z0-9._-]', '', original_filename)
storage_path = f"{user_id}/{timestamp}_{safe_name}"
with open(video_path, 'rb') as f:
file_content = f.read()
await storage_service.upload_file(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path,
file_data=file_content,
content_type=content_type
)
logger.info(f"Upload to Supabase complete: {storage_path}")
os.remove(temp_file_path)
os.remove(video_path)
return storage_path
except Exception as e:
logger.error(f"Background upload processing failed: {e}\n{traceback.format_exc()}")
raise
async def upload_material(request, user_id: str) -> dict:
"""接收流式上传并存储到 Supabase返回素材信息"""
filename = "unknown_video.mp4"
content_type = "video/mp4"
timestamp = int(time.time())
temp_filename = f"upload_{timestamp}.raw"
temp_path = os.path.join("/tmp", temp_filename)
if os.name == 'nt':
temp_path = f"d:/tmp/{temp_filename}"
os.makedirs("d:/tmp", exist_ok=True)
try:
total_size = 0
last_log = 0
async with aiofiles.open(temp_path, 'wb') as f:
async for chunk in request.stream():
await f.write(chunk)
total_size += len(chunk)
if total_size - last_log > 20 * 1024 * 1024:
logger.info(f"Receiving stream... Processed {total_size / (1024*1024):.2f} MB")
last_log = total_size
logger.info(f"Stream reception complete. Total size: {total_size} bytes. Saved to {temp_path}")
if total_size == 0:
raise ValueError("Received empty body")
with open(temp_path, 'rb') as f:
head = f.read(4096).decode('utf-8', errors='ignore')
match = re.search(r'filename="([^"]+)"', head)
if match:
filename = match.group(1)
logger.info(f"Extracted filename from body: {filename}")
storage_path = await _process_and_upload(temp_path, filename, content_type, user_id)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=storage_path
)
size_mb = total_size / (1024 * 1024)
display_name = _extract_display_name(storage_path.split('/')[-1])
return {
"id": storage_path,
"name": display_name,
"path": signed_url,
"size_mb": size_mb,
"type": "video"
}
except Exception as e:
error_msg = f"Streaming upload failed: {str(e)}"
detail_msg = f"Exception: {repr(e)}\nArgs: {e.args}\n{traceback.format_exc()}"
logger.error(error_msg + "\n" + detail_msg)
try:
with open("debug_upload.log", "a") as logf:
logf.write(f"\n--- Error at {time.ctime()} ---\n")
logf.write(detail_msg)
logf.write("\n-----------------------------\n")
except:
pass
if os.path.exists(temp_path):
try:
os.remove(temp_path)
except:
pass
raise
async def list_materials(user_id: str) -> list[dict]:
"""列出用户的所有素材"""
try:
files_obj = await storage_service.list_files(
bucket=storage_service.BUCKET_MATERIALS,
path=user_id
)
semaphore = asyncio.Semaphore(8)
async def build_item(f):
name = f.get('name')
if not name or name == '.emptyFolderPlaceholder':
return None
display_name = _extract_display_name(name)
full_path = f"{user_id}/{name}"
async with semaphore:
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=full_path
)
metadata = f.get('metadata', {})
size = metadata.get('size', 0)
created_at_str = f.get('created_at', '')
created_at = 0
if created_at_str:
from datetime import datetime
try:
dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
created_at = int(dt.timestamp())
except Exception:
pass
return {
"id": full_path,
"name": display_name,
"path": signed_url,
"size_mb": size / (1024 * 1024),
"type": "video",
"created_at": created_at
}
tasks = [build_item(f) for f in files_obj]
results = await asyncio.gather(*tasks, return_exceptions=True)
materials = []
for item in results:
if not item:
continue
if isinstance(item, Exception):
logger.warning(f"Material signed url build failed: {item}")
continue
materials.append(item)
materials.sort(key=lambda x: x['id'], reverse=True)
return materials
except Exception as e:
logger.error(f"List materials failed: {e}")
return []
async def delete_material(material_id: str, user_id: str) -> None:
"""删除素材"""
if not material_id.startswith(f"{user_id}/"):
raise PermissionError("无权删除此素材")
await storage_service.delete_file(
bucket=storage_service.BUCKET_MATERIALS,
path=material_id
)
async def rename_material(material_id: str, new_name_raw: str, user_id: str) -> dict:
"""重命名素材,返回更新后的素材信息"""
if not material_id.startswith(f"{user_id}/"):
raise PermissionError("无权重命名此素材")
new_name_raw = new_name_raw.strip() if new_name_raw else ""
if not new_name_raw:
raise ValueError("新名称不能为空")
old_name = material_id.split("/", 1)[1]
old_ext = Path(old_name).suffix
base_name = Path(new_name_raw).stem if Path(new_name_raw).suffix else new_name_raw
safe_base = sanitize_filename(base_name).strip()
if not safe_base:
raise ValueError("新名称无效")
new_filename = f"{safe_base}{old_ext}"
prefix = None
if "_" in old_name:
maybe_prefix, _ = old_name.split("_", 1)
if maybe_prefix.isdigit():
prefix = maybe_prefix
if prefix:
new_filename = f"{prefix}_{new_filename}"
new_path = f"{user_id}/{new_filename}"
if new_path != material_id:
await storage_service.move_file(
bucket=storage_service.BUCKET_MATERIALS,
from_path=material_id,
to_path=new_path
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_MATERIALS,
path=new_path
)
display_name = _extract_display_name(new_filename)
return {
"id": new_path,
"name": display_name,
"path": signed_url,
}

View File

@@ -1,83 +1,14 @@
"""
参考音频管理 API
支持上传/列表/删除参考音频,用于 Qwen3-TTS 声音克隆
"""
"""参考音频管理 API"""
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from pydantic import BaseModel
from typing import List, Optional
from pathlib import Path
from loguru import logger
import time
import json
import subprocess
import tempfile
import os
import re
from app.core.deps import get_current_user
from app.services.storage import storage_service
from app.core.response import success_response
from app.modules.ref_audios.schemas import RenameRequest
from app.modules.ref_audios import service
router = APIRouter()
# 支持的音频格式
ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'}
# 参考音频 bucket
BUCKET_REF_AUDIOS = "ref-audios"
class RefAudioResponse(BaseModel):
id: str
name: str
path: str # signed URL for playback
ref_text: str
duration_sec: float
created_at: int
class RefAudioListResponse(BaseModel):
items: List[RefAudioResponse]
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除特殊字符"""
safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename)
if len(safe_name) > 50:
ext = Path(safe_name).suffix
safe_name = safe_name[:50 - len(ext)] + ext
return safe_name
def get_audio_duration(file_path: str) -> float:
"""获取音频时长 (秒)"""
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', file_path],
capture_output=True, text=True, timeout=10
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"获取音频时长失败: {e}")
return 0.0
def convert_to_wav(input_path: str, output_path: str) -> bool:
"""将音频转换为 WAV 格式 (16kHz, mono)"""
try:
subprocess.run([
'ffmpeg', '-y', '-i', input_path,
'-ar', '16000', # 16kHz 采样率
'-ac', '1', # 单声道
'-acodec', 'pcm_s16le', # 16-bit PCM
output_path
], capture_output=True, timeout=60, check=True)
return True
except Exception as e:
logger.error(f"音频转换失败: {e}")
return False
@router.post("")
async def upload_ref_audio(
@@ -85,156 +16,12 @@ async def upload_ref_audio(
ref_text: str = Form(...),
user: dict = Depends(get_current_user)
):
"""
上传参考音频
- file: 音频文件 (支持 wav, mp3, m4a, webm 等)
- ref_text: 参考音频的转写文字 (必填)
"""
user_id = user["id"]
if not file.filename:
raise HTTPException(status_code=400, detail="文件名无效")
filename = file.filename
# 验证文件扩展名
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_AUDIO_EXTENSIONS:
raise HTTPException(
status_code=400,
detail=f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}"
)
# 验证 ref_text
if not ref_text or len(ref_text.strip()) < 2:
raise HTTPException(status_code=400, detail="参考文字不能为空")
"""上传参考音频"""
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
content = await file.read()
tmp_input.write(content)
tmp_input_path = tmp_input.name
# 转换为 WAV 格式
tmp_wav_path = tmp_input_path + ".wav"
if ext != '.wav':
if not convert_to_wav(tmp_input_path, tmp_wav_path):
raise HTTPException(status_code=500, detail="音频格式转换失败")
else:
# 即使是 wav 也要标准化格式
convert_to_wav(tmp_input_path, tmp_wav_path)
# 获取音频时长
duration = get_audio_duration(tmp_wav_path)
if duration < 1.0:
raise HTTPException(status_code=400, detail="音频时长过短,至少需要 1 秒")
if duration > 60.0:
raise HTTPException(status_code=400, detail="音频时长过长,最多 60 秒")
# 3. 处理重名逻辑 (Friendly Display Name)
original_name = filename
# 获取用户现有的所有参考音频列表 (为了检查文件名冲突)
# 注意: 这种列表方式在文件极多时性能一般,但考虑到单用户参考音频数量有限,目前可行
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
existing_names = set()
# 预加载所有现有的 display name
# 这里需要并发请求 metadata 可能会慢,优化: 仅检查 metadata 文件并解析
# 简易方案: 仅在 metadata 中读取 original_filename
# 但 list_files 返回的是 name我们需要 metadata
# 考虑到性能,这里使用一种妥协方案:
# 我们不做全量检查,而是简单的检查:如果用户上传 myvoice.wav
# 我们看看有没有 (timestamp)_myvoice.wav 这种其实并不能准确判断 display name 是否冲突
#
# 正确做法: 应该有个数据库表存 metadata。但目前是无数据库设计。
#
# 改用简单方案:
# 既然我们无法快速获取所有 display name
# 我们暂时只处理 "在新上传时original_filename 保持原样"
# 但用户希望 "如果在列表中看到重复的,自动加(1)"
#
# 鉴于无数据库架构的限制,要在上传时知道"已有的 display name" 成本太高(需遍历下载所有json)。
#
# 💡 替代方案:
# 我们不检查旧的。我们只保证**存储**唯一。
# 对于用户提到的 "新上传的文件名后加个数字" -> 这通常是指 "另存为" 的逻辑。
# 既然用户现在的痛点是 "显示了时间戳太丑",而我已经去掉了时间戳显示。
# 那么如果用户上传两个 "TEST.wav",列表里就会有两个 "TEST.wav" (但时间不同)。
# 这其实是可以接受的。
#
# 但如果用户强求 "自动重命名":
# 我们可以在这里做一个轻量级的 "同名检测"
# 检查有没有 *_{original_name} 的文件存在。
# 如果 storage 里已经有 123_abc.wav, 456_abc.wav
# 我们可以认为 abc.wav 已经存在。
dup_count = 0
search_suffix = f"_{original_name}" # 比如 _test.wav
for f in existing_files:
fname = f.get('name', '')
if fname.endswith(search_suffix):
dup_count += 1
final_display_name = original_name
if dup_count > 0:
name_stem = Path(original_name).stem
name_ext = Path(original_name).suffix
final_display_name = f"{name_stem}({dup_count}){name_ext}"
# 生成存储路径 (唯一ID)
timestamp = int(time.time())
safe_name = sanitize_filename(Path(filename).stem)
storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
# 上传 WAV 文件到 Supabase
with open(tmp_wav_path, 'rb') as f:
wav_data = f.read()
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=storage_path,
file_data=wav_data,
content_type="audio/wav"
)
# 上传元数据 JSON
metadata = {
"ref_text": ref_text.strip(),
"original_filename": final_display_name, # 这里的名字如果有重复会自动加(1)
"duration_sec": duration,
"created_at": timestamp
}
metadata_path = f"{user_id}/{timestamp}_{safe_name}.json"
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
# 获取签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
# 清理临时文件
os.unlink(tmp_input_path)
if os.path.exists(tmp_wav_path):
os.unlink(tmp_wav_path)
return success_response(RefAudioResponse(
id=storage_path,
name=filename,
path=signed_url,
ref_text=ref_text.strip(),
duration_sec=duration,
created_at=timestamp
).model_dump())
except HTTPException:
raise
result = await service.upload_ref_audio(file, ref_text, user["id"])
return success_response(result)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"上传参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
@@ -243,81 +30,9 @@ async def upload_ref_audio(
@router.get("")
async def list_ref_audios(user: dict = Depends(get_current_user)):
"""列出当前用户的所有参考音频"""
user_id = user["id"]
try:
# 列出用户目录下的文件
files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
# 过滤出 .wav 文件
wav_files = [f for f in files if f.get("name", "").endswith(".wav")]
if not wav_files:
return success_response(RefAudioListResponse(items=[]).model_dump())
# 并发获取所有 metadata 和签名 URL
async def fetch_audio_info(f):
"""获取单个音频的信息metadata + signed URL"""
name = f.get("name", "")
storage_path = f"{user_id}/{name}"
metadata_name = name.replace(".wav", ".json")
metadata_path = f"{user_id}/{metadata_name}"
ref_text = ""
duration_sec = 0.0
created_at = 0
original_filename = ""
try:
# 获取 metadata 内容
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
import httpx
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
ref_text = metadata.get("ref_text", "")
duration_sec = metadata.get("duration_sec", 0.0)
created_at = metadata.get("created_at", 0)
original_filename = metadata.get("original_filename", "")
except Exception as e:
logger.debug(f"读取 metadata 失败: {e}")
# 从文件名提取时间戳
try:
created_at = int(name.split("_")[0])
except:
pass
# 获取音频签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
# 优先显示原始文件名 (去掉时间戳前缀)
display_name = original_filename if original_filename else name
# 如果原始文件名丢失,尝试从现有文件名中通过正则去掉时间戳
if not display_name or display_name == name:
# 匹配 "1234567890_filename.wav"
match = re.match(r'^\d+_(.+)$', name)
if match:
display_name = match.group(1)
return RefAudioResponse(
id=storage_path,
name=display_name,
path=signed_url,
ref_text=ref_text,
duration_sec=duration_sec,
created_at=created_at
)
# 使用 asyncio.gather 并发获取所有音频信息
import asyncio
items = await asyncio.gather(*[fetch_audio_info(f) for f in wav_files])
# 按创建时间倒序排列
items = sorted(items, key=lambda x: x.created_at, reverse=True)
return success_response(RefAudioListResponse(items=items).model_dump())
result = await service.list_ref_audios(user["id"])
return success_response(result)
except Exception as e:
logger.error(f"列出参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"获取列表失败: {str(e)}")
@@ -326,96 +41,30 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
@router.delete("/{audio_id:path}")
async def delete_ref_audio(audio_id: str, user: dict = Depends(get_current_user)):
"""删除参考音频"""
user_id = user["id"]
# 安全检查:确保只能删除自己的文件
if not audio_id.startswith(f"{user_id}/"):
raise HTTPException(status_code=403, detail="无权删除此文件")
try:
# 删除 WAV 文件
await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id)
# 删除 metadata JSON
metadata_path = audio_id.replace(".wav", ".json")
try:
await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path)
except:
pass # metadata 可能不存在
await service.delete_ref_audio(audio_id, user["id"])
return success_response(message="删除成功")
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except Exception as e:
logger.error(f"删除参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
class RenameRequest(BaseModel):
new_name: str
@router.put("/{audio_id:path}")
async def rename_ref_audio(
audio_id: str,
request: RenameRequest,
user: dict = Depends(get_current_user)
):
"""重命名参考音频 (修改 metadata 中的 display name)"""
user_id = user["id"]
# 安全检查
if not audio_id.startswith(f"{user_id}/"):
raise HTTPException(status_code=403, detail="无权修改此文件")
new_name = request.new_name.strip()
if not new_name:
raise HTTPException(status_code=400, detail="新名称不能为空")
# 确保新名称有后缀 (保留原后缀或添加 .wav)
if not Path(new_name).suffix:
new_name += ".wav"
"""重命名参考音频"""
try:
# 1. 下载现有的 metadata
metadata_path = audio_id.replace(".wav", ".json")
try:
# 获取已有的 JSON
import httpx
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
if not metadata_url:
# 如果 json 不存在,则需要新建一个基础的
raise Exception("Metadata not found")
async with httpx.AsyncClient() as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
# 兜底:如果读取失败,构建最小元数据
metadata = {
"ref_text": "", # 可能丢失
"duration_sec": 0.0,
"created_at": int(time.time()),
"original_filename": new_name
}
# 2. 更新 original_filename
metadata["original_filename"] = new_name
# 3. 覆盖上传 metadata
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
return success_response({"name": new_name}, message="重命名成功")
result = await service.rename_ref_audio(audio_id, request.new_name, user["id"])
return success_response(result, message="重命名成功")
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"重命名失败: {e}")
raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")

View File

@@ -0,0 +1,19 @@
from pydantic import BaseModel
from typing import List
class RefAudioResponse(BaseModel):
id: str
name: str
path: str
ref_text: str
duration_sec: float
created_at: int
class RefAudioListResponse(BaseModel):
items: List[RefAudioResponse]
class RenameRequest(BaseModel):
new_name: str

View File

@@ -0,0 +1,269 @@
import re
import os
import time
import json
import asyncio
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
import httpx
from loguru import logger
from app.services.storage import storage_service
from app.modules.ref_audios.schemas import RefAudioResponse, RefAudioListResponse
ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'}
BUCKET_REF_AUDIOS = "ref-audios"
def sanitize_filename(filename: str) -> str:
"""清理文件名,移除特殊字符"""
safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename)
if len(safe_name) > 50:
ext = Path(safe_name).suffix
safe_name = safe_name[:50 - len(ext)] + ext
return safe_name
def _get_audio_duration(file_path: str) -> float:
"""获取音频时长 (秒)"""
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', file_path],
capture_output=True, text=True, timeout=10
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"获取音频时长失败: {e}")
return 0.0
def _convert_to_wav(input_path: str, output_path: str) -> bool:
"""将音频转换为 WAV 格式 (16kHz, mono)"""
try:
subprocess.run([
'ffmpeg', '-y', '-i', input_path,
'-ar', '16000',
'-ac', '1',
'-acodec', 'pcm_s16le',
output_path
], capture_output=True, timeout=60, check=True)
return True
except Exception as e:
logger.error(f"音频转换失败: {e}")
return False
async def upload_ref_audio(file, ref_text: str, user_id: str) -> dict:
"""上传参考音频:转码、获取时长、存储到 Supabase"""
if not file.filename:
raise ValueError("文件名无效")
filename = file.filename
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_AUDIO_EXTENSIONS:
raise ValueError(f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}")
if not ref_text or len(ref_text.strip()) < 2:
raise ValueError("参考文字不能为空")
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
content = await file.read()
tmp_input.write(content)
tmp_input_path = tmp_input.name
try:
# 转换为 WAV 格式
tmp_wav_path = tmp_input_path + ".wav"
if not _convert_to_wav(tmp_input_path, tmp_wav_path):
raise RuntimeError("音频格式转换失败")
# 获取音频时长
duration = _get_audio_duration(tmp_wav_path)
if duration < 1.0:
raise ValueError("音频时长过短,至少需要 1 秒")
if duration > 60.0:
raise ValueError("音频时长过长,最多 60 秒")
# 检查重名
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
dup_count = 0
search_suffix = f"_{filename}"
for f in existing_files:
fname = f.get('name', '')
if fname.endswith(search_suffix):
dup_count += 1
final_display_name = filename
if dup_count > 0:
name_stem = Path(filename).stem
name_ext = Path(filename).suffix
final_display_name = f"{name_stem}({dup_count}){name_ext}"
# 生成存储路径
timestamp = int(time.time())
safe_name = sanitize_filename(Path(filename).stem)
storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
# 上传 WAV 文件
with open(tmp_wav_path, 'rb') as f:
wav_data = f.read()
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=storage_path,
file_data=wav_data,
content_type="audio/wav"
)
# 上传元数据 JSON
metadata = {
"ref_text": ref_text.strip(),
"original_filename": final_display_name,
"duration_sec": duration,
"created_at": timestamp
}
metadata_path = f"{user_id}/{timestamp}_{safe_name}.json"
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
# 获取签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
return RefAudioResponse(
id=storage_path,
name=filename,
path=signed_url,
ref_text=ref_text.strip(),
duration_sec=duration,
created_at=timestamp
).model_dump()
finally:
os.unlink(tmp_input_path)
if os.path.exists(tmp_input_path + ".wav"):
os.unlink(tmp_input_path + ".wav")
async def list_ref_audios(user_id: str) -> dict:
"""列出用户的所有参考音频"""
files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
wav_files = [f for f in files if f.get("name", "").endswith(".wav")]
if not wav_files:
return RefAudioListResponse(items=[]).model_dump()
async def fetch_audio_info(f):
name = f.get("name", "")
storage_path = f"{user_id}/{name}"
metadata_name = name.replace(".wav", ".json")
metadata_path = f"{user_id}/{metadata_name}"
ref_text = ""
duration_sec = 0.0
created_at = 0
original_filename = ""
try:
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
ref_text = metadata.get("ref_text", "")
duration_sec = metadata.get("duration_sec", 0.0)
created_at = metadata.get("created_at", 0)
original_filename = metadata.get("original_filename", "")
except Exception as e:
logger.debug(f"读取 metadata 失败: {e}")
try:
created_at = int(name.split("_")[0])
except:
pass
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
display_name = original_filename if original_filename else name
if not display_name or display_name == name:
match = re.match(r'^\d+_(.+)$', name)
if match:
display_name = match.group(1)
return RefAudioResponse(
id=storage_path,
name=display_name,
path=signed_url,
ref_text=ref_text,
duration_sec=duration_sec,
created_at=created_at
)
items = await asyncio.gather(*[fetch_audio_info(f) for f in wav_files])
items = sorted(items, key=lambda x: x.created_at, reverse=True)
return RefAudioListResponse(items=items).model_dump()
async def delete_ref_audio(audio_id: str, user_id: str) -> None:
"""删除参考音频及其元数据"""
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权删除此文件")
await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id)
metadata_path = audio_id.replace(".wav", ".json")
try:
await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path)
except:
pass
async def rename_ref_audio(audio_id: str, new_name: str, user_id: str) -> dict:
"""重命名参考音频(修改 metadata 中的 display name"""
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权修改此文件")
new_name = new_name.strip()
if not new_name:
raise ValueError("新名称不能为空")
if not Path(new_name).suffix:
new_name += ".wav"
# 下载现有 metadata
metadata_path = audio_id.replace(".wav", ".json")
try:
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
async with httpx.AsyncClient() as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
metadata = {
"ref_text": "",
"duration_sec": 0.0,
"created_at": int(time.time()),
"original_filename": new_name
}
# 更新并覆盖上传
metadata["original_filename"] = new_name
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
return {"name": new_name}

View File

@@ -1,417 +1,32 @@
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from typing import Optional, Any, cast
import asyncio
import shutil
import os
import time
from pathlib import Path
from loguru import logger
from typing import Optional
import traceback
import re
import json
import requests
from urllib.parse import unquote
from loguru import logger
from app.services.whisper_service import whisper_service
from app.services.glm_service import glm_service
from app.core.response import success_response
from app.modules.tools import service
router = APIRouter()
@router.post("/extract-script")
async def extract_script_tool(
file: Optional[UploadFile] = File(None),
url: Optional[str] = Form(None),
rewrite: bool = Form(True)
):
"""
独立文案提取工具
支持上传视频/音频 OR 输入视频链接 -> 提取文字 -> (可选) AI洗稿
"""
if not file and not url:
raise HTTPException(400, "必须提供文件或视频链接")
temp_path = None
"""独立文案提取工具"""
try:
timestamp = int(time.time())
temp_dir = Path("/tmp")
if os.name == 'nt':
temp_dir = Path("d:/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 获取/保存文件
loop = asyncio.get_event_loop()
if file:
filename = file.filename
if not filename:
raise HTTPException(400, "文件名无效")
safe_filename = Path(filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
# 文件 I/O 放入线程池
await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
logger.info(f"Tool processing upload file: {temp_path}")
else:
if not url:
raise HTTPException(400, "必须提供视频链接")
url_value: str = url
# URL 下载逻辑
# 自动提取文案中的链接 (支持 Douyin/Bilibili 等分享文案)
url_match = re.search(r'https?://[^\s]+', url_value)
if url_match:
extracted_url = url_match.group(0)
logger.info(f"Extracted URL from text: {extracted_url}")
url_value = extracted_url
logger.info(f"Tool downloading URL: {url_value}")
# 封装 yt-dlp 下载函数 (Blocking)
def _download_yt_dlp():
import yt_dlp
logger.info("Attempting download with yt-dlp...")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(temp_dir / f"tool_download_{timestamp}_%(id)s.%(ext)s"),
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
}
with yt_dlp.YoutubeDL() as ydl_raw:
ydl: Any = ydl_raw
ydl.params.update(ydl_opts)
info = ydl.extract_info(url_value, download=True)
if 'requested_downloads' in info:
downloaded_file = info['requested_downloads'][0]['filepath']
else:
ext = info.get('ext', 'mp4')
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
return Path(downloaded_file)
# 先尝试 yt-dlp (Run in Executor)
try:
temp_path = await loop.run_in_executor(None, _download_yt_dlp)
logger.info(f"yt-dlp downloaded to: {temp_path}")
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual Douyin fallback...")
# 失败则尝试手动解析 (Douyin Fallback)
if "douyin" in url_value:
manual_path = await download_douyin_manual(url_value, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Douyin fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
elif "bilibili" in url_value:
manual_path = await download_bilibili_manual(url_value, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Bilibili fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
else:
raise HTTPException(400, f"视频下载失败: {str(e)}")
if not temp_path or not temp_path.exists():
raise HTTPException(400, "文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k)
import subprocess
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
def _convert_audio():
try:
convert_cmd = [
'ffmpeg',
'-i', str(temp_path),
'-vn', # 忽略视频
'-acodec', 'pcm_s16le',
'-ar', '16000', # Whisper 推荐采样率
'-ac', '1', # 单声道
'-y', # 覆盖
str(audio_path)
]
# 捕获 stderr
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
# 检查是否为 HTML
head = b""
try:
with open(temp_path, 'rb') as f:
head = f.read(100)
except: pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise ValueError("HTML_DETECTED")
raise ValueError("CONVERT_FAILED")
# 执行转换 (Run in Executor)
try:
await loop.run_in_executor(None, _convert_audio)
logger.info(f"Converted to WAV: {audio_path}")
target_path = audio_path
except ValueError as ve:
if str(ve) == "HTML_DETECTED":
raise HTTPException(400, "下载的文件是网页而非视频,请重试或手动上传。")
else:
raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(target_path))
# 3. AI 洗稿 (GLM)
rewritten = None
if rewrite:
if script and len(script.strip()) > 0:
logger.info("Rewriting script...")
rewritten = await glm_service.rewrite_script(script)
else:
logger.warning("No script extracted, skipping rewrite")
return success_response({
"original_script": script,
"rewritten_script": rewritten
})
except HTTPException as he:
raise he
result = await service.extract_script(file=file, url=url, rewrite=rewrite)
return success_response(result)
except ValueError as e:
raise HTTPException(400, str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Tool extract failed: {e}")
logger.error(traceback.format_exc())
# Friendly error message
msg = str(e)
if "Fresh cookies" in msg:
msg = "下载失败:目标平台开启了反爬验证,请过段时间重试或直接上传视频文件。"
raise HTTPException(500, f"提取失败: {msg}")
finally:
# 清理临时文件
if temp_path and temp_path.exists():
try:
os.remove(temp_path)
logger.info(f"Cleaned up temp file: {temp_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {temp_path}: {e}")
async def download_douyin_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载抖音视频 (Fallback logic - Ported from SuperIPAgent/douyinDownloader)
使用特定的 User Profile URL 和硬编码 Cookie 绕过反爬
"""
import httpx
logger.info(f"[SuperIPAgent] Starting download for: {url}")
try:
# 1. 提取 Modal ID (支持短链跳转)
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
# 如果是短链或重定向 - 使用异步 httpx
async with httpx.AsyncClient(follow_redirects=True, timeout=10.0) as client:
resp = await client.get(url, headers=headers)
final_url = str(resp.url)
logger.info(f"[SuperIPAgent] Final URL: {final_url}")
modal_id = None
match = re.search(r'/video/(\d+)', final_url)
if match:
modal_id = match.group(1)
if not modal_id:
logger.error("[SuperIPAgent] Could not extract modal_id")
return None
logger.info(f"[SuperIPAgent] Extracted modal_id: {modal_id}")
# 2. 构造特定请求 URL (Copy from SuperIPAgent)
# 使用特定用户的 Profile 页 + modal_id 参数,配合特定 Cookie
target_url = f"https://www.douyin.com/user/MS4wLjABAAAAN_s_hups7LD0N4qnrM3o2gI0vuG3pozNaEolz2_py3cHTTrpVr1Z4dukFD9SOlwY?from_tab_name=main&modal_id={modal_id}"
# 3. 使用配置的 Cookie (从环境变量 DOUYIN_COOKIE 读取)
from app.core.config import settings
if not settings.DOUYIN_COOKIE:
logger.warning("[SuperIPAgent] DOUYIN_COOKIE 未配置,视频下载可能失败")
headers_with_cookie = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"cookie": settings.DOUYIN_COOKIE,
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}
logger.info(f"[SuperIPAgent] Requesting page with Cookie...")
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(target_url, headers=headers_with_cookie)
# 4. 解析 RENDER_DATA
content_match = re.findall(r'<script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
# 尝试解码后再查找?或者结构变了
# 再尝试找 SSR_HYDRATED_DATA
if "SSR_HYDRATED_DATA" in response.text:
content_match = re.findall(r'<script id="SSR_HYDRATED_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
logger.error(f"[SuperIPAgent] Could not find RENDER_DATA in page (len={len(response.text)})")
return None
content = unquote(content_match[0])
try:
data = json.loads(content)
except:
logger.error("[SuperIPAgent] JSON decode failed")
return None
# 5. 提取视频流
video_url = None
try:
# 路径通常是: app -> videoDetail -> video -> bitRateList -> playAddr -> src
if "app" in data and "videoDetail" in data["app"]:
info = data["app"]["videoDetail"]["video"]
if "bitRateList" in info and info["bitRateList"]:
video_url = info["bitRateList"][0]["playAddr"][0]["src"]
elif "playAddr" in info and info["playAddr"]:
video_url = info["playAddr"][0]["src"]
except Exception as e:
logger.error(f"[SuperIPAgent] Path extraction failed: {e}")
if not video_url:
logger.error("[SuperIPAgent] No video_url found")
return None
if video_url.startswith("//"):
video_url = "https:" + video_url
logger.info(f"[SuperIPAgent] Found video URL: {video_url[:50]}...")
# 6. 下载 (带 Header) - 使用异步 httpx
temp_path = temp_dir / f"douyin_manual_{timestamp}.mp4"
download_headers = {
'Referer': 'https://www.douyin.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream("GET", video_url, headers=download_headers) as dl_resp:
if dl_resp.status_code == 200:
with open(temp_path, 'wb') as f:
async for chunk in dl_resp.aiter_bytes(chunk_size=8192):
f.write(chunk)
logger.info(f"[SuperIPAgent] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[SuperIPAgent] Download failed: {dl_resp.status_code}")
return None
except Exception as e:
logger.error(f"[SuperIPAgent] Logic failed: {e}")
return None
async def download_bilibili_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载 Bilibili 视频 (Fallback logic - Playwright Version)
B站通常音视频分离这里只提取音频即可因为只需要文案
"""
from playwright.async_api import async_playwright
logger.info(f"[Playwright] Starting Bilibili download for: {url}")
playwright = None
browser = None
try:
playwright = await async_playwright().start()
# Launch browser (ensure chromium is installed: playwright install chromium)
browser = await playwright.chromium.launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
# Mobile User Agent often gives single stream?
# But Bilibili mobile web is tricky. Desktop is fine.
context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
# Intercept audio responses?
# Bilibili streams are usually .m4s
# But finding the initial state is easier.
logger.info("[Playwright] Navigating to Bilibili...")
await page.goto(url, timeout=45000)
# Wait for video element (triggers loading)
try:
await page.wait_for_selector('video', timeout=15000)
except:
logger.warning("[Playwright] Video selector timeout")
# 1. Try extracting from __playinfo__
# window.__playinfo__ contains dash streams
playinfo = await page.evaluate("window.__playinfo__")
audio_url = None
if playinfo and "data" in playinfo and "dash" in playinfo["data"]:
dash = playinfo["data"]["dash"]
if "audio" in dash and dash["audio"]:
audio_url = dash["audio"][0]["baseUrl"]
logger.info(f"[Playwright] Found audio stream in __playinfo__: {audio_url[:50]}...")
# 2. If playinfo fails, try extracting video src (sometimes it's a blob, which we can't fetch easily without interception)
# But interception is complex. Let's try requests with Referer if we have URL.
if not audio_url:
logger.warning("[Playwright] Could not find audio in __playinfo__")
return None
# Download the audio stream
temp_path = temp_dir / f"bilibili_audio_{timestamp}.m4s" # usually m4s
try:
api_request = context.request
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
logger.info(f"[Playwright] Downloading audio stream...")
response = await api_request.get(audio_url, headers=headers)
if response.status == 200:
body = await response.body()
with open(temp_path, 'wb') as f:
f.write(body)
logger.info(f"[Playwright] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[Playwright] API Request failed: {response.status}")
return None
except Exception as e:
logger.error(f"[Playwright] Download logic error: {e}")
return None
except Exception as e:
logger.error(f"[Playwright] Bilibili download failed: {e}")
return None
finally:
if browser:
await browser.close()
if playwright:
await playwright.stop()

View File

@@ -0,0 +1,7 @@
from pydantic import BaseModel
from typing import Optional
class ExtractScriptResponse(BaseModel):
original_script: Optional[str] = None
rewritten_script: Optional[str] = None

View File

@@ -0,0 +1,355 @@
import asyncio
import os
import re
import json
import time
import shutil
import subprocess
import traceback
from pathlib import Path
from typing import Optional, Any
from urllib.parse import unquote
import httpx
from loguru import logger
from app.services.whisper_service import whisper_service
from app.services.glm_service import glm_service
async def extract_script(file=None, url: Optional[str] = None, rewrite: bool = True) -> dict:
"""
文案提取:上传文件或视频链接 -> Whisper 转写 -> (可选) GLM 洗稿
"""
if not file and not url:
raise ValueError("必须提供文件或视频链接")
temp_path = None
try:
timestamp = int(time.time())
temp_dir = Path("/tmp")
if os.name == 'nt':
temp_dir = Path("d:/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
loop = asyncio.get_event_loop()
# 1. 获取/保存文件
if file:
filename = file.filename
if not filename:
raise ValueError("文件名无效")
safe_filename = Path(filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
logger.info(f"Tool processing upload file: {temp_path}")
else:
temp_path = await _download_video(url, temp_dir, timestamp)
if not temp_path or not temp_path.exists():
raise ValueError("文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k)
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
try:
await loop.run_in_executor(None, lambda: _convert_to_wav(temp_path, audio_path))
logger.info(f"Converted to WAV: {audio_path}")
except ValueError as ve:
if str(ve) == "HTML_DETECTED":
raise ValueError("下载的文件是网页而非视频,请重试或手动上传。")
else:
raise ValueError("下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(audio_path))
# 3. AI 洗稿 (GLM)
rewritten = None
if rewrite and script and len(script.strip()) > 0:
logger.info("Rewriting script...")
rewritten = await glm_service.rewrite_script(script)
return {
"original_script": script,
"rewritten_script": rewritten
}
finally:
if temp_path and temp_path.exists():
try:
os.remove(temp_path)
logger.info(f"Cleaned up temp file: {temp_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {temp_path}: {e}")
def _convert_to_wav(input_path: Path, output_path: Path) -> None:
"""FFmpeg 转换为 16k WAV"""
try:
convert_cmd = [
'ffmpeg',
'-i', str(input_path),
'-vn',
'-acodec', 'pcm_s16le',
'-ar', '16000',
'-ac', '1',
'-y',
str(output_path)
]
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
head = b""
try:
with open(input_path, 'rb') as f:
head = f.read(100)
except:
pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise ValueError("HTML_DETECTED")
raise ValueError("CONVERT_FAILED")
async def _download_video(url: str, temp_dir: Path, timestamp: int) -> Path:
"""下载视频yt-dlp 优先,失败回退手动解析)"""
url_value = url
url_match = re.search(r'https?://[^\s]+', url_value)
if url_match:
extracted_url = url_match.group(0)
logger.info(f"Extracted URL from text: {extracted_url}")
url_value = extracted_url
logger.info(f"Tool downloading URL: {url_value}")
loop = asyncio.get_event_loop()
# 先尝试 yt-dlp
try:
temp_path = await loop.run_in_executor(None, lambda: _download_yt_dlp(url_value, temp_dir, timestamp))
logger.info(f"yt-dlp downloaded to: {temp_path}")
return temp_path
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual fallback...")
if "douyin" in url_value:
manual_path = await _download_douyin_manual(url_value, temp_dir, timestamp)
if manual_path:
return manual_path
raise ValueError(f"视频下载失败。yt-dlp 报错: {str(e)}")
elif "bilibili" in url_value:
manual_path = await _download_bilibili_manual(url_value, temp_dir, timestamp)
if manual_path:
return manual_path
raise ValueError(f"视频下载失败。yt-dlp 报错: {str(e)}")
else:
raise ValueError(f"视频下载失败: {str(e)}")
def _download_yt_dlp(url_value: str, temp_dir: Path, timestamp: int) -> Path:
"""yt-dlp 下载(阻塞调用,应在线程池中运行)"""
import yt_dlp
logger.info("Attempting download with yt-dlp...")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(temp_dir / f"tool_download_{timestamp}_%(id)s.%(ext)s"),
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
}
with yt_dlp.YoutubeDL() as ydl_raw:
ydl: Any = ydl_raw
ydl.params.update(ydl_opts)
info = ydl.extract_info(url_value, download=True)
if 'requested_downloads' in info:
downloaded_file = info['requested_downloads'][0]['filepath']
else:
ext = info.get('ext', 'mp4')
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
return Path(downloaded_file)
async def _download_douyin_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""手动下载抖音视频 (Fallback)"""
logger.info(f"[SuperIPAgent] Starting download for: {url}")
try:
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
async with httpx.AsyncClient(follow_redirects=True, timeout=10.0) as client:
resp = await client.get(url, headers=headers)
final_url = str(resp.url)
logger.info(f"[SuperIPAgent] Final URL: {final_url}")
modal_id = None
match = re.search(r'/video/(\d+)', final_url)
if match:
modal_id = match.group(1)
if not modal_id:
logger.error("[SuperIPAgent] Could not extract modal_id")
return None
logger.info(f"[SuperIPAgent] Extracted modal_id: {modal_id}")
target_url = f"https://www.douyin.com/user/MS4wLjABAAAAN_s_hups7LD0N4qnrM3o2gI0vuG3pozNaEolz2_py3cHTTrpVr1Z4dukFD9SOlwY?from_tab_name=main&modal_id={modal_id}"
from app.core.config import settings
if not settings.DOUYIN_COOKIE:
logger.warning("[SuperIPAgent] DOUYIN_COOKIE 未配置,视频下载可能失败")
headers_with_cookie = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"cookie": settings.DOUYIN_COOKIE,
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}
logger.info(f"[SuperIPAgent] Requesting page with Cookie...")
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(target_url, headers=headers_with_cookie)
content_match = re.findall(r'<script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
if "SSR_HYDRATED_DATA" in response.text:
content_match = re.findall(r'<script id="SSR_HYDRATED_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
logger.error(f"[SuperIPAgent] Could not find RENDER_DATA in page (len={len(response.text)})")
return None
content = unquote(content_match[0])
try:
data = json.loads(content)
except:
logger.error("[SuperIPAgent] JSON decode failed")
return None
video_url = None
try:
if "app" in data and "videoDetail" in data["app"]:
info = data["app"]["videoDetail"]["video"]
if "bitRateList" in info and info["bitRateList"]:
video_url = info["bitRateList"][0]["playAddr"][0]["src"]
elif "playAddr" in info and info["playAddr"]:
video_url = info["playAddr"][0]["src"]
except Exception as e:
logger.error(f"[SuperIPAgent] Path extraction failed: {e}")
if not video_url:
logger.error("[SuperIPAgent] No video_url found")
return None
if video_url.startswith("//"):
video_url = "https:" + video_url
logger.info(f"[SuperIPAgent] Found video URL: {video_url[:50]}...")
temp_path = temp_dir / f"douyin_manual_{timestamp}.mp4"
download_headers = {
'Referer': 'https://www.douyin.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
async with httpx.AsyncClient(timeout=60.0) as client:
async with client.stream("GET", video_url, headers=download_headers) as dl_resp:
if dl_resp.status_code == 200:
with open(temp_path, 'wb') as f:
async for chunk in dl_resp.aiter_bytes(chunk_size=8192):
f.write(chunk)
logger.info(f"[SuperIPAgent] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[SuperIPAgent] Download failed: {dl_resp.status_code}")
return None
except Exception as e:
logger.error(f"[SuperIPAgent] Logic failed: {e}")
return None
async def _download_bilibili_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""手动下载 Bilibili 视频 (Playwright Fallback)"""
from playwright.async_api import async_playwright
logger.info(f"[Playwright] Starting Bilibili download for: {url}")
playwright = None
browser = None
try:
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
logger.info("[Playwright] Navigating to Bilibili...")
await page.goto(url, timeout=45000)
try:
await page.wait_for_selector('video', timeout=15000)
except:
logger.warning("[Playwright] Video selector timeout")
playinfo = await page.evaluate("window.__playinfo__")
audio_url = None
if playinfo and "data" in playinfo and "dash" in playinfo["data"]:
dash = playinfo["data"]["dash"]
if "audio" in dash and dash["audio"]:
audio_url = dash["audio"][0]["baseUrl"]
logger.info(f"[Playwright] Found audio stream in __playinfo__: {audio_url[:50]}...")
if not audio_url:
logger.warning("[Playwright] Could not find audio in __playinfo__")
return None
temp_path = temp_dir / f"bilibili_audio_{timestamp}.m4s"
try:
api_request = context.request
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
logger.info(f"[Playwright] Downloading audio stream...")
response = await api_request.get(audio_url, headers=headers)
if response.status == 200:
body = await response.body()
with open(temp_path, 'wb') as f:
f.write(body)
logger.info(f"[Playwright] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[Playwright] API Request failed: {response.status}")
return None
except Exception as e:
logger.error(f"[Playwright] Download logic error: {e}")
return None
except Exception as e:
logger.error(f"[Playwright] Bilibili download failed: {e}")
return None
finally:
if browser:
await browser.close()
if playwright:
await playwright.stop()

View File

@@ -1,8 +1,8 @@
import type { Metadata, Viewport } from "next";
import { Geist, Geist_Mono } from "next/font/google";
import "./globals.css";
import { AuthProvider } from "@/contexts/AuthContext";
import { TaskProvider } from "@/contexts/TaskContext";
import { AuthProvider } from "@/shared/contexts/AuthContext";
import { TaskProvider } from "@/shared/contexts/TaskContext";
import { Toaster } from "sonner";

View File

@@ -1,7 +1,7 @@
"use client";
import { useState, useEffect, useRef } from "react";
import { useAuth } from "@/contexts/AuthContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import api from "@/shared/api/axios";
import { ApiResponse } from "@/shared/api/types";

View File

@@ -1,6 +1,6 @@
"use client";
import { useTask } from "@/contexts/TaskContext";
import { useTask } from "@/shared/contexts/TaskContext";
import Link from "next/link";
import { usePathname } from "next/navigation";

View File

@@ -11,8 +11,8 @@ import {
} from "@/shared/lib/media";
import { clampTitle } from "@/shared/lib/title";
import { useTitleInput } from "@/shared/hooks/useTitleInput";
import { useAuth } from "@/contexts/AuthContext";
import { useTask } from "@/contexts/TaskContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import { useTask } from "@/shared/contexts/TaskContext";
import { toast } from "sonner";
import { usePublishPrefetch } from "@/shared/hooks/usePublishPrefetch";
import { PublishAccount } from "@/shared/types/publish";

View File

@@ -3,7 +3,7 @@
import { useEffect } from "react";
import { useRouter } from "next/navigation";
import VideoPreviewModal from "@/components/VideoPreviewModal";
import ScriptExtractionModal from "@/components/ScriptExtractionModal";
import ScriptExtractionModal from "./ScriptExtractionModal";
import { useHomeController } from "@/features/home/model/useHomeController";
import { BgmPanel } from "@/features/home/ui/BgmPanel";
import { GenerateActionBar } from "@/features/home/ui/GenerateActionBar";

View File

@@ -5,8 +5,8 @@ import { ApiResponse, unwrap } from "@/shared/api/types";
import { formatDate, getApiBaseUrl, isAbsoluteUrl, resolveMediaUrl } from "@/shared/lib/media";
import { clampTitle } from "@/shared/lib/title";
import { useTitleInput } from "@/shared/hooks/useTitleInput";
import { useAuth } from "@/contexts/AuthContext";
import { useTask } from "@/contexts/TaskContext";
import { useAuth } from "@/shared/contexts/AuthContext";
import { useTask } from "@/shared/contexts/TaskContext";
import { toast } from "sonner";
import { usePublishPrefetch } from "@/shared/hooks/usePublishPrefetch";
import {