This commit is contained in:
Kevin Wong
2026-02-02 17:16:07 +08:00
parent 7bfd6bf862
commit 6e58f4bbe7
11 changed files with 1012 additions and 19 deletions

View File

@@ -345,3 +345,66 @@ pm2 restart vigent2-backend vigent2-frontend
- [task_complete.md](../task_complete.md) - 任务总览 - [task_complete.md](../task_complete.md) - 任务总览
- [Day14.md](./Day14.md) - 模型升级 + AI 标题标签 - [Day14.md](./Day14.md) - 模型升级 + AI 标题标签
- [AUTH_DEPLOY.md](../AUTH_DEPLOY.md) - 认证系统部署指南 - [AUTH_DEPLOY.md](../AUTH_DEPLOY.md) - 认证系统部署指南
---
## 🤖 模型与功能增强 (Day 15 晚)
### 1. GLM-4.7-Flash 升级
**文件**: `backend/app/services/glm_service.py`
将文案洗稿模型从 `glm-4-flash` 升级为 `glm-4.7-flash`
```python
response = client.chat.completions.create(
model="glm-4.7-flash", # Upgrade from glm-4-flash
messages=[...],
# ...
)
```
**改进**:
- 响应速度提升
- 洗稿文案的流畅度和逻辑性增强
### 2. 独立文案提取助手
实现了独立的文案提取工具,支持从视频/音频文件或 URL 提取文字。
#### 后端实现 (`backend/app/api/tools.py`)
- **多源支持**: 文件上传 (MP4/MP3/WAV) 或 URL 下载
- **智能下载**:
- `yt-dlp`: 通用下载 (Douyin/TikTok/Bilibili)
- `Playwright`: 智能回退机制 (Bilibili Dashboard API, Douyin Cookie Bypass)
- **URL 自动清洗**: 正则提取分享文本中的 HTTP 链接
- **流程**: 下载 -> FFmpeg 转 WAV (16k) -> Whisper 识别 -> GLM-4.7 洗稿
#### 前端实现 (`frontend/src/components/ScriptExtractionModal.tsx`)
- **独立模态框**: 通过顶部导航栏打开
- **功能**:
- 链接粘贴 / 文件拖拽
- 实时进度显示 (下载 -> 识别 -> 洗稿)
- **一键填入**: 将提取结果直接填充到主输入框
- **自动识别**: 自动区分平台与链接
- **交互优化**:
- 防止误触背景关闭
- 复制功能兼容 HTTP 环境 (Fallback textArea)
### 3. 上传视频预览功能
在素材列表 (`frontend/src/app/page.tsx`) 中为上传的视频添加预览功能:
- 点击缩略图弹出视频播放模态框
- 支持下载与发布快捷跳转
---
## 📝 任务清单更新
- [x] 认证系统迁移 (手机号)
- [x] 账户管理 (密码修改/有效期)
- [x] GLM-4.7 模型升级
- [x] 独立文案提取助手 (B站/抖音支持)
- [x] 视频预览功能

View File

@@ -413,9 +413,10 @@ Day 14: 模型升级 + AI 标题标签 + 前端修复 ✅ 完成
- 登录页刷新循环修复 (公开路由跳转豁免) - 登录页刷新循环修复 (公开路由跳转豁免)
Day 15: 手机号登录迁移 + 账户设置 ✅ 完成 Day 15: 手机号登录迁移 + 账户设置 ✅ 完成
- 认证系统迁移 (邮箱 → 11位手机号) - **认证系统迁移** (邮箱 → 11位手机号)
- 修改密码 API (/api/auth/change-password) - **账户设置** (修改密码 + 退出登录 + 有效期显示)
- 账户设置下拉菜单 (修改密码 + 退出登录) - **GLM-4.7 模型升级** (文案洗稿效果提升)
- 前端登录/注册页面更新 - **文案提取助手** (支持 B站/抖音/URL 提取 + 自动洗稿)
- 数据库迁移脚本 (migrate_to_phone.sql) - **视频预览功能** (素材列表预览 + 交互优化)
- **前端交互优化** (滚动条美化、弹窗误触修复)

View File

@@ -13,7 +13,9 @@
- 🔊 **声音克隆** - Qwen3-TTS 1.7B3秒参考音频快速克隆更高质量 - 🔊 **声音克隆** - Qwen3-TTS 1.7B3秒参考音频快速克隆更高质量
- 📝 **逐字高亮字幕** - faster-whisper + Remotion卡拉OK效果 🆕 - 📝 **逐字高亮字幕** - faster-whisper + Remotion卡拉OK效果 🆕
- 🎬 **片头标题** - 淡入淡出动画,可自定义 🆕 - 🎬 **片头标题** - 淡入淡出动画,可自定义 🆕
- 🤖 **AI 标题/标签生成** - GLM-4-Flash 自动生成标题与标签 🆕 - 🤖 **AI 标题/标签生成** - GLM-4.7-Flash 自动生成标题与标签 (升级版) 🆕
- 📜 **文案提取助手** - 支持 B站/抖音/TikTok 视频链接提取与 AI 洗稿 🆕
- 📽️ **上传视频预览** - 素材列表支持直接预览播放 🆕
- 📱 **全自动发布** - 扫码登录 + Cookie持久化支持多平台(B站/抖音/小红书)定时发布 - 📱 **全自动发布** - 扫码登录 + Cookie持久化支持多平台(B站/抖音/小红书)定时发布
- 🖥️ **Web UI** - Next.js 现代化界面iOS/Android 移动端适配 - 🖥️ **Web UI** - Next.js 现代化界面iOS/Android 移动端适配
- 🔐 **用户系统** - Supabase + JWT 认证,**手机号登录** + 管理员后台 🆕 - 🔐 **用户系统** - Supabase + JWT 认证,**手机号登录** + 管理员后台 🆕

View File

@@ -9,6 +9,10 @@ import os
import aiofiles import aiofiles
from pathlib import Path from pathlib import Path
from loguru import logger from loguru import logger
from pydantic import BaseModel
from typing import Optional
import httpx
router = APIRouter() router = APIRouter()
@@ -329,3 +333,6 @@ async def delete_material(material_id: str, current_user: dict = Depends(get_cur
return {"success": True, "message": "素材已删除"} return {"success": True, "message": "素材已删除"}
except Exception as e: except Exception as e:
raise HTTPException(500, f"删除失败: {str(e)}") raise HTTPException(500, f"删除失败: {str(e)}")

390
backend/app/api/tools.py Normal file
View File

@@ -0,0 +1,390 @@
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from typing import Optional
import shutil
import os
import time
from pathlib import Path
from loguru import logger
import traceback
import re
import json
import requests
from urllib.parse import unquote
from app.services.whisper_service import whisper_service
from app.services.glm_service import glm_service
router = APIRouter()
@router.post("/extract-script")
async def extract_script_tool(
file: Optional[UploadFile] = File(None),
url: Optional[str] = Form(None),
rewrite: bool = Form(True)
):
"""
独立文案提取工具
支持上传视频/音频 OR 输入视频链接 -> 提取文字 -> (可选) AI洗稿
"""
if not file and not url:
raise HTTPException(400, "必须提供文件或视频链接")
temp_path = None
try:
timestamp = int(time.time())
temp_dir = Path("/tmp")
if os.name == 'nt':
temp_dir = Path("d:/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 获取/保存文件
if file:
safe_filename = Path(file.filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
with open(temp_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f"Tool processing upload file: {temp_path}")
else:
# URL 下载逻辑
# 自动提取文案中的链接 (支持 Douyin/Bilibili 等分享文案)
url_match = re.search(r'https?://[^\s]+', url)
if url_match:
extracted_url = url_match.group(0)
logger.info(f"Extracted URL from text: {extracted_url}")
url = extracted_url
logger.info(f"Tool downloading URL: {url}")
# 先尝试 yt-dlp
try:
import yt_dlp
logger.info("Attempting download with yt-dlp...")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(temp_dir / f"tool_download_{timestamp}_%(id)s.%(ext)s"),
'quiet': True,
'no_warnings': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
if 'requested_downloads' in info:
downloaded_file = info['requested_downloads'][0]['filepath']
else:
ext = info.get('ext', 'mp4')
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
temp_path = Path(downloaded_file)
logger.info(f"yt-dlp downloaded to: {temp_path}")
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual Douyin fallback...")
# 失败则尝试手动解析 (Douyin Fallback)
if "douyin" in url:
manual_path = await download_douyin_manual(url, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Douyin fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
elif "bilibili" in url:
manual_path = await download_bilibili_manual(url, temp_dir, timestamp)
if manual_path:
temp_path = manual_path
logger.info(f"Manual Bilibili fallback successful: {temp_path}")
else:
raise HTTPException(400, f"视频下载失败。yt-dlp 报错: {str(e)}")
else:
raise HTTPException(400, f"视频下载失败: {str(e)}")
if not temp_path or not temp_path.exists():
raise HTTPException(400, "文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k) 传给 Whisper
# 这一步既能验证文件有效性ffmpeg会报错又能避免 PyAV 音频解码 bug
import subprocess
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
try:
# ffmpeg -i input -vn -acodec pcm_s16le -ar 16000 -ac 1 output.wav -y
convert_cmd = [
'ffmpeg',
'-i', str(temp_path),
'-vn', # 忽略视频
'-acodec', 'pcm_s16le',
'-ar', '16000', # Whisper 推荐采样率
'-ac', '1', # 单声道
'-y', # 覆盖
str(audio_path)
]
# 捕获 stderr 以便出错时打印
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"Converted to WAV: {audio_path}")
# 使用转换后的文件
target_path = audio_path
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
# 尝试判断是不是 HTML
head = b""
try:
with open(temp_path, 'rb') as f:
head = f.read(100)
except:
pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise HTTPException(400, "下载的文件是网页而非视频,请重试或手动上传。")
raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(target_path))
# 3. AI 洗稿 (GLM)
rewritten = None
if rewrite:
if script and len(script.strip()) > 0:
logger.info("Rewriting script...")
rewritten = await glm_service.rewrite_script(script)
else:
logger.warning("No script extracted, skipping rewrite")
return {
"success": True,
"original_script": script,
"rewritten_script": rewritten
}
except HTTPException as he:
raise he
except Exception as e:
logger.error(f"Tool extract failed: {e}")
logger.error(traceback.format_exc())
# Friendly error message
msg = str(e)
if "Fresh cookies" in msg:
msg = "下载失败:目标平台开启了反爬验证,请过段时间重试或直接上传视频文件。"
raise HTTPException(500, f"提取失败: {msg}")
finally:
# 清理临时文件
if temp_path and temp_path.exists():
try:
os.remove(temp_path)
logger.info(f"Cleaned up temp file: {temp_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {temp_path}: {e}")
async def download_douyin_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载抖音视频 (Fallback logic - Ported from SuperIPAgent/douyinDownloader)
使用特定的 User Profile URL 和硬编码 Cookie 绕过反爬
"""
logger.info(f"[SuperIPAgent] Starting download for: {url}")
try:
# 1. 提取 Modal ID (支持短链跳转)
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
# 如果是短链或重定向
resp = requests.get(url, headers=headers, allow_redirects=True, timeout=10)
final_url = resp.url
logger.info(f"[SuperIPAgent] Final URL: {final_url}")
modal_id = None
match = re.search(r'/video/(\d+)', final_url)
if match:
modal_id = match.group(1)
if not modal_id:
logger.error("[SuperIPAgent] Could not extract modal_id")
return None
logger.info(f"[SuperIPAgent] Extracted modal_id: {modal_id}")
# 2. 构造特定请求 URL (Copy from SuperIPAgent)
# 使用特定用户的 Profile 页 + modal_id 参数,配合特定 Cookie
target_url = f"https://www.douyin.com/user/MS4wLjABAAAAN_s_hups7LD0N4qnrM3o2gI0vuG3pozNaEolz2_py3cHTTrpVr1Z4dukFD9SOlwY?from_tab_name=main&modal_id={modal_id}"
# 3. 使用硬编码 Cookie (Copy from SuperIPAgent)
headers_with_cookie = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"cookie": "douyin.com; device_web_cpu_core=10; device_web_memory_size=8; __ac_nonce=06760391f00b9b51264ae; __ac_signature=_02B4Z6wo00f019a5ceAAAIDAhEZR-X3jjWfWmXVAAJLXd4; ttwid=1%7C7MTKBSMsP4eOv9h5NAh8p0E-NYIud09ftNmB0mjLpWc%7C1734359327%7C8794abeabbd47447e1f56e5abc726be089f2a0344d6343b5f75f23e7b0f0028f; UIFID_TEMP=0de8750d2b188f4235dbfd208e44abbb976428f0720eb983255afefa45d39c0c6532e1d4768dd8587bf919f866ff1396912bcb2af71efee56a14a2a9f37b74010d0a0413795262f6d4afe02a032ac7ab; s_v_web_id=verify_m4r4ribr_c7krmY1z_WoeI_43po_ATpO_I4o8U1bex2D7; hevc_supported=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=2560; dy_sheight=1440; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A2560%2C%5C%22screen_height%5C%22%3A1440%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A10%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22; strategyABtestKey=%221734359328.577%22; csrf_session_id=2f53aed9aa6974e83aa9a1014180c3a4; fpk1=U2FsdGVkX1/IpBh0qdmlKAVhGyYHgur4/VtL9AReZoeSxadXn4juKvsakahRGqjxOPytHWspYoBogyhS/V6QSw==; fpk2=0845b309c7b9b957afd9ecf775a4c21f; passport_csrf_token=d80e0c5b2fa2328219856be5ba7e671e; passport_csrf_token_default=d80e0c5b2fa2328219856be5ba7e671e; odin_tt=3c891091d2eb0f4718c1d5645bc4a0017032d4d5aa989decb729e9da2ad570918cbe5e9133dc6b145fa8c758de98efe32ff1f81aa0d611e838cc73ab08ef7d3f6adf66ab4d10e8372ddd628f94f16b8e; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.5%7D; bd_ticket_guard_client_web_domain=2; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; UIFID=0de8750d2b188f4235dbfd208e44abbb976428f0720eb983255afefa45d39c0c6532e1d4768dd8587bf919f866ff139655a3c2b735923234f371c699560c657923fd3d6c5b63ab7bb9b83423b6cb4787e2ce66a7fbc4ecb24c8570f520fe6de068bbb95115023c0c6c1b6ee31b49fb7e3996fb8349f43a3fd8b7a61cd9e18e8fe65eb6a7c13de4c0960d84e344b644725db3eb2fa6b7caf821de1b50527979f2; is_dash_user=1; biz_trace_id=b57a241f; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTEo2R0lDalVoWW1XcHpGOFdrN0Vrc0dXcCtaUzNKY1g4NGNGY2k0TTl1TEowNjdUb21mbFU5aDdvWVBGamhNRWNRQWtKdnN1MnM3RmpTWnlJQXpHMjA9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; download_guide=%221%2F20241216%2F0%22; sdk_source_info=7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f273d33323131333c3036313632342778; bit_env=RiOY4jzzpxZoVCl6zdVSVhVRjdwHRTxqcqWdqMBZLPGjMdB4Tax1kAELHNTVAAh72KuhumewE4Lq6f0-VJ2UpJrkrhSxoPw9LUb3zQrq1OSwbeSPHkRlRgRQvO89sItdGUyq1oFr0XyRCnMYG87KSeWyc4x0czGR0o50hTDoDLG5rJVoRcdQOLvjiAegsqyytKF59sPX_QM9qffK2SqYsg0hCggURc_AI6kguDDE5DvG0bnyz1utw4z1eEnIoLrkGDqzqBZj4dOAr0BVU6ofbsS-pOQ2u2PM1dLP9FlBVBlVaqYVgHJeSLsR5k76BRTddUjTb4zEilVIEwAMJWGN4I1BxVt6fC9B5tBQpuT0lj3n3eKXCKXZsd8FrEs5_pbfDsxV-e_WMiXI2ff4qxiTC0U73sfo9OpicKICtZjdq8qsHxJuu6wVR36zvXeL2Wch5C6MzprNvkivv0l8nbh2mSgy1nabZr3dmU6NcR-Bg3Q3xTWUlR9aAUmpopC-cNuXjgLpT-Lw1AYGilSUnCvosth1Gfypq-b0MpgmdSDgTrQ%3D; gulu_source_res=eyJwX2luIjoiMDhjOGQ3ZTJiODQyNjZkZWI5Y2VkMGJiODNlNmY1ZWY0ZjMyNTE2ZmYyZjAzNDMzZjI0OWU1Y2Q1NTczNTk5NyJ9; passport_auth_mix_state=hp9bc3dgb1tm5wd8p82zawus27g0e3ue; IsDouyinActive=false",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
}
logger.info(f"[SuperIPAgent] Requesting page with Cookie...")
# 必须 verify=False 否则有些环境会报错
response = requests.get(target_url, headers=headers_with_cookie, timeout=10)
# 4. 解析 RENDER_DATA
content_match = re.findall(r'<script id="RENDER_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
# 尝试解码后再查找?或者结构变了
# 再尝试找 SSR_HYDRATED_DATA
if "SSR_HYDRATED_DATA" in response.text:
content_match = re.findall(r'<script id="SSR_HYDRATED_DATA" type="application/json">(.*?)</script>', response.text)
if not content_match:
logger.error(f"[SuperIPAgent] Could not find RENDER_DATA in page (len={len(response.text)})")
return None
content = unquote(content_match[0])
try:
data = json.loads(content)
except:
logger.error("[SuperIPAgent] JSON decode failed")
return None
# 5. 提取视频流
video_url = None
try:
# 路径通常是: app -> videoDetail -> video -> bitRateList -> playAddr -> src
if "app" in data and "videoDetail" in data["app"]:
info = data["app"]["videoDetail"]["video"]
if "bitRateList" in info and info["bitRateList"]:
video_url = info["bitRateList"][0]["playAddr"][0]["src"]
elif "playAddr" in info and info["playAddr"]:
video_url = info["playAddr"][0]["src"]
except Exception as e:
logger.error(f"[SuperIPAgent] Path extraction failed: {e}")
if not video_url:
logger.error("[SuperIPAgent] No video_url found")
return None
if video_url.startswith("//"):
video_url = "https:" + video_url
logger.info(f"[SuperIPAgent] Found video URL: {video_url[:50]}...")
# 6. 下载 (带 Header)
temp_path = temp_dir / f"douyin_manual_{timestamp}.mp4"
download_headers = {
'Referer': 'https://www.douyin.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
}
dl_resp = requests.get(video_url, headers=download_headers, stream=True, timeout=60)
if dl_resp.status_code == 200:
with open(temp_path, 'wb') as f:
for chunk in dl_resp.iter_content(chunk_size=1024):
f.write(chunk)
logger.info(f"[SuperIPAgent] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[SuperIPAgent] Download failed: {dl_resp.status_code}")
return None
except Exception as e:
logger.error(f"[SuperIPAgent] Logic failed: {e}")
return None
async def download_bilibili_manual(url: str, temp_dir: Path, timestamp: int) -> Optional[Path]:
"""
手动下载 Bilibili 视频 (Fallback logic - Playwright Version)
B站通常音视频分离这里只提取音频即可因为只需要文案
"""
from playwright.async_api import async_playwright
logger.info(f"[Playwright] Starting Bilibili download for: {url}")
playwright = None
browser = None
try:
playwright = await async_playwright().start()
# Launch browser (ensure chromium is installed: playwright install chromium)
browser = await playwright.chromium.launch(headless=True, args=['--no-sandbox', '--disable-setuid-sandbox'])
# Mobile User Agent often gives single stream?
# But Bilibili mobile web is tricky. Desktop is fine.
context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
page = await context.new_page()
# Intercept audio responses?
# Bilibili streams are usually .m4s
# But finding the initial state is easier.
logger.info("[Playwright] Navigating to Bilibili...")
await page.goto(url, timeout=45000)
# Wait for video element (triggers loading)
try:
await page.wait_for_selector('video', timeout=15000)
except:
logger.warning("[Playwright] Video selector timeout")
# 1. Try extracting from __playinfo__
# window.__playinfo__ contains dash streams
playinfo = await page.evaluate("window.__playinfo__")
audio_url = None
if playinfo and "data" in playinfo and "dash" in playinfo["data"]:
dash = playinfo["data"]["dash"]
if "audio" in dash and dash["audio"]:
audio_url = dash["audio"][0]["baseUrl"]
logger.info(f"[Playwright] Found audio stream in __playinfo__: {audio_url[:50]}...")
# 2. If playinfo fails, try extracting video src (sometimes it's a blob, which we can't fetch easily without interception)
# But interception is complex. Let's try requests with Referer if we have URL.
if not audio_url:
logger.warning("[Playwright] Could not find audio in __playinfo__")
return None
# Download the audio stream
temp_path = temp_dir / f"bilibili_audio_{timestamp}.m4s" # usually m4s
try:
api_request = context.request
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.bilibili.com/"
}
logger.info(f"[Playwright] Downloading audio stream...")
response = await api_request.get(audio_url, headers=headers)
if response.status == 200:
body = await response.body()
with open(temp_path, 'wb') as f:
f.write(body)
logger.info(f"[Playwright] Downloaded successfully: {temp_path}")
return temp_path
else:
logger.error(f"[Playwright] API Request failed: {response.status}")
return None
except Exception as e:
logger.error(f"[Playwright] Download logic error: {e}")
return None
except Exception as e:
logger.error(f"[Playwright] Bilibili download failed: {e}")
return None
finally:
if browser:
await browser.close()
if playwright:
await playwright.stop()

View File

@@ -2,7 +2,7 @@ from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from app.core import config from app.core import config
from app.api import materials, videos, publish, login_helper, auth, admin, ref_audios, ai from app.api import materials, videos, publish, login_helper, auth, admin, ref_audios, ai, tools
from loguru import logger from loguru import logger
import os import os
@@ -57,6 +57,7 @@ app.include_router(auth.router) # /api/auth
app.include_router(admin.router) # /api/admin app.include_router(admin.router) # /api/admin
app.include_router(ref_audios.router, prefix="/api/ref-audios", tags=["RefAudios"]) app.include_router(ref_audios.router, prefix="/api/ref-audios", tags=["RefAudios"])
app.include_router(ai.router) # /api/ai app.include_router(ai.router) # /api/ai
app.include_router(tools.router, prefix="/api/tools", tags=["Tools"])
@app.on_event("startup") @app.on_event("startup")

View File

@@ -71,6 +71,49 @@ class GLMService:
logger.error(f"GLM service error: {e}") logger.error(f"GLM service error: {e}")
raise Exception(f"AI 生成失败: {str(e)}") raise Exception(f"AI 生成失败: {str(e)}")
async def rewrite_script(self, text: str) -> str:
"""
AI 洗稿(文案改写)
Args:
text: 原始文案
Returns:
改写后的文案
"""
prompt = f"""请将以下视频文案进行改写。
原始文案:
{text}
要求:
1. 保持原意,但语气更加自然流畅
2. 适合口播,读起来朗朗上口
3. 字数与原文相当或略微精简
4. 不要返回多余的解释,只返回改写后的正文"""
try:
client = self._get_client()
logger.info(f"Using GLM to rewrite script")
response = client.chat.completions.create(
model=settings.GLM_MODEL,
messages=[{"role": "user", "content": prompt}],
thinking={"type": "disabled"},
max_tokens=2000,
temperature=0.8
)
content = response.choices[0].message.content
logger.info("GLM rewrite completed")
return content.strip()
except Exception as e:
logger.error(f"GLM rewrite error: {e}")
raise Exception(f"AI 改写失败: {str(e)}")
def _parse_json_response(self, content: str) -> dict: def _parse_json_response(self, content: str) -> dict:
"""解析 GLM 返回的 JSON 内容""" """解析 GLM 返回的 JSON 内容"""
# 尝试直接解析 # 尝试直接解析

View File

@@ -212,6 +212,43 @@ class WhisperService:
return result return result
async def transcribe(self, audio_path: str) -> str:
"""
仅转录文本(用于提取文案)
Args:
audio_path: 音频/视频文件路径
Returns:
纯文本内容
"""
import asyncio
def _do_transcribe_text():
model = self._load_model()
logger.info(f"Extracting script from: {audio_path}")
# 转录 (无需字级时间戳)
segments_iter, _ = model.transcribe(
audio_path,
language="zh",
word_timestamps=False,
vad_filter=True,
)
text_parts = []
for segment in segments_iter:
text_parts.append(segment.text.strip())
full_text = " ".join(text_parts)
logger.info(f"Extracted text length: {len(full_text)}")
return full_text
# 在线程池中执行
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, _do_transcribe_text)
return result
async def check_health(self) -> dict: async def check_health(self) -> dict:
"""检查服务健康状态""" """检查服务健康状态"""
try: try:

View File

@@ -36,6 +36,12 @@ ViGent2 的前端界面,采用 Next.js 14 + TailwindCSS 构建。
- **账户下拉菜单**: 显示有效期 + 修改密码 + 安全退出。 - **账户下拉菜单**: 显示有效期 + 修改密码 + 安全退出。
- **修改密码**: 弹窗输入当前密码与新密码,修改后强制重新登录。 - **修改密码**: 弹窗输入当前密码与新密码,修改后强制重新登录。
### 6. 文案提取助手 (`ScriptExtractionModal`) [Day 15 新增]
- **多源提取**: 支持文件拖拽上传与 URL 粘贴 (B站/抖音/TikTok)。
- **AI 洗稿**: 集成 GLM-4.7-Flash自动改写为口播文案。
- **一键填入**: 提取结果直接填充至视频生成输入框。
- **智能交互**: 实时进度展示,防误触设计。
## 🛠️ 技术栈 ## 🛠️ 技术栈
- **框架**: Next.js 14 (App Router) - **框架**: Next.js 14 (App Router)

View File

@@ -8,6 +8,7 @@ import { useAuth } from "@/contexts/AuthContext";
import { useTask } from "@/contexts/TaskContext"; import { useTask } from "@/contexts/TaskContext";
import AccountSettingsDropdown from "@/components/AccountSettingsDropdown"; import AccountSettingsDropdown from "@/components/AccountSettingsDropdown";
import VideoPreviewModal from "@/components/VideoPreviewModal"; import VideoPreviewModal from "@/components/VideoPreviewModal";
import ScriptExtractionModal from "@/components/ScriptExtractionModal";
const API_BASE = typeof window === 'undefined' const API_BASE = typeof window === 'undefined'
? 'http://localhost:8006' ? 'http://localhost:8006'
@@ -109,6 +110,10 @@ export default function Home() {
// 是否已从 localStorage 恢复完成 // 是否已从 localStorage 恢复完成
const [isRestored, setIsRestored] = useState(false); const [isRestored, setIsRestored] = useState(false);
// 文案提取模态框
const [extractModalOpen, setExtractModalOpen] = useState(false);
// 可选音色 // 可选音色
const voices = [ const voices = [
{ id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" }, { id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
@@ -689,22 +694,30 @@ export default function Home() {
<h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap"> <h2 className="text-base sm:text-lg font-semibold text-white flex items-center gap-2 whitespace-nowrap">
</h2> </h2>
<button <div className="flex gap-2">
onClick={handleGenerateMeta} <button
disabled={isGeneratingMeta || !text.trim()} onClick={() => setExtractModalOpen(true)}
className={`px-2 py-1 text-xs rounded transition-all whitespace-nowrap ${isGeneratingMeta || !text.trim() className="px-2 py-1 text-xs rounded transition-all whitespace-nowrap bg-purple-600 hover:bg-purple-700 text-white flex items-center gap-1"
? "bg-gray-600 cursor-not-allowed text-gray-400" >
: "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-700 hover:to-cyan-700 text-white" <span>📜</span>
}`} </button>
> <button
{isGeneratingMeta ? "⏳ 生成中..." : "🤖 AI生成标题标签"} onClick={handleGenerateMeta}
</button> disabled={isGeneratingMeta || !text.trim()}
className={`px-2 py-1 text-xs rounded transition-all whitespace-nowrap ${isGeneratingMeta || !text.trim()
? "bg-gray-600 cursor-not-allowed text-gray-400"
: "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-700 hover:to-cyan-700 text-white"
}`}
>
{isGeneratingMeta ? "⏳ 生成中..." : "🤖 AI生成标题标签"}
</button>
</div>
</div> </div>
<textarea <textarea
value={text} value={text}
onChange={(e) => setText(e.target.value)} onChange={(e) => setText(e.target.value)}
placeholder="请输入你想说的话..." placeholder="请输入你想说的话..."
className="w-full h-40 bg-black/30 border border-white/10 rounded-xl p-4 text-white placeholder-gray-500 resize-none focus:outline-none focus:border-purple-500 transition-colors" className="w-full h-40 bg-black/30 border border-white/10 rounded-xl p-4 text-white placeholder-gray-500 resize-none focus:outline-none focus:border-purple-500 transition-colors hide-scrollbar"
/> />
<div className="flex justify-between mt-2 text-sm text-gray-400"> <div className="flex justify-between mt-2 text-sm text-gray-400">
<span>{text.length} </span> <span>{text.length} </span>
@@ -1102,8 +1115,14 @@ export default function Home() {
</div> </div>
</main > </main >
<VideoPreviewModal <VideoPreviewModal
videoUrl={previewMaterial}
onClose={() => setPreviewMaterial(null)} onClose={() => setPreviewMaterial(null)}
videoUrl={previewMaterial}
/>
<ScriptExtractionModal
isOpen={extractModalOpen}
onClose={() => setExtractModalOpen(false)}
onApply={(text) => setText(text)}
/> />
</div > </div >
); );

View File

@@ -0,0 +1,424 @@
"use client";
import { useState, useRef, useEffect } from "react";
import api from "@/lib/axios";
interface ScriptExtractionModalProps {
isOpen: boolean;
onClose: () => void;
onApply?: (text: string) => void;
}
export default function ScriptExtractionModal({
isOpen,
onClose,
onApply
}: ScriptExtractionModalProps) {
const [isLoading, setIsLoading] = useState(false);
const [script, setScript] = useState("");
const [rewrittenScript, setRewrittenScript] = useState("");
const [error, setError] = useState<string | null>(null);
const [doRewrite, setDoRewrite] = useState(true);
const [step, setStep] = useState<'config' | 'processing' | 'result'>('config');
const [dragActive, setDragActive] = useState(false);
const [selectedFile, setSelectedFile] = useState<File | null>(null);
// New state for URL mode
const [activeTab, setActiveTab] = useState<'file' | 'url'>('url');
const [inputUrl, setInputUrl] = useState("");
// Reset state when modal opens
useEffect(() => {
if (isOpen) {
setStep('config');
setScript("");
setRewrittenScript("");
setError(null);
setIsLoading(false);
setSelectedFile(null);
setInputUrl("");
setActiveTab('url');
}
}, [isOpen]);
const handleDrag = (e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
if (e.type === "dragenter" || e.type === "dragover") {
setDragActive(true);
} else if (e.type === "dragleave") {
setDragActive(false);
}
};
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
e.stopPropagation();
setDragActive(false);
if (e.dataTransfer.files && e.dataTransfer.files[0]) {
handleFile(e.dataTransfer.files[0]);
}
};
const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
if (e.target.files && e.target.files[0]) {
handleFile(e.target.files[0]);
}
};
const handleFile = (file: File) => {
const validTypes = ['.mp4', '.mov', '.avi', '.mp3', '.wav', '.m4a'];
const ext = file.name.toLowerCase().slice(file.name.lastIndexOf('.'));
if (!validTypes.includes(ext)) {
setError(`不支持的文件格式 ${ext},请上传视频或音频文件`);
return;
}
setSelectedFile(file);
setError(null);
};
const handleExtract = async () => {
if (activeTab === 'file' && !selectedFile) {
setError("请先上传文件");
return;
}
if (activeTab === 'url' && !inputUrl.trim()) {
setError("请先输入视频链接");
return;
}
setIsLoading(true);
setStep('processing');
setError(null);
try {
const formData = new FormData();
if (activeTab === 'file' && selectedFile) {
formData.append('file', selectedFile);
} else if (activeTab === 'url') {
formData.append('url', inputUrl.trim());
}
formData.append('rewrite', doRewrite ? 'true' : 'false');
const { data } = await api.post('/api/tools/extract-script', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
timeout: 180000 // 3 minutes timeout
});
if (data.success) {
setScript(data.original_script);
setRewrittenScript(data.rewritten_script || "");
setStep('result');
} else {
setError("提取失败:未知错误");
setStep('config');
}
} catch (err: any) {
console.error(err);
const msg = err.response?.data?.detail || err.message || "请求失败";
setError(msg);
setStep('config');
} finally {
setIsLoading(false);
}
};
const copyToClipboard = (text: string) => {
if (navigator.clipboard && window.isSecureContext) {
navigator.clipboard.writeText(text).then(() => {
alert("已复制到剪贴板");
}).catch(err => {
console.error('Async: Could not copy text: ', err);
fallbackCopyTextToClipboard(text);
});
} else {
fallbackCopyTextToClipboard(text);
}
};
const fallbackCopyTextToClipboard = (text: string) => {
var textArea = document.createElement("textarea");
textArea.value = text;
// Avoid scrolling to bottom
textArea.style.top = "0";
textArea.style.left = "0";
textArea.style.position = "fixed";
textArea.style.opacity = "0";
document.body.appendChild(textArea);
textArea.focus();
textArea.select();
try {
var successful = document.execCommand('copy');
var msg = successful ? 'successful' : 'unsuccessful';
if (successful) {
alert("已复制到剪贴板");
} else {
alert("复制失败,请手动复制");
}
} catch (err) {
console.error('Fallback: Oops, unable to copy', err);
alert("复制失败,请手动复制");
}
document.body.removeChild(textArea);
};
// Close when clicking outside - DISABLED as per user request
// const modalRef = useRef<HTMLDivElement>(null);
// const handleBackdropClick = (e: React.MouseEvent) => {
// if (modalRef.current && !modalRef.current.contains(e.target as Node)) {
// onClose();
// }
// };
if (!isOpen) return null;
return (
<div
className="fixed inset-0 z-50 flex items-center justify-center bg-black/80 backdrop-blur-sm p-4 animate-in fade-in duration-200"
>
<div
// ref={modalRef}
className="bg-[#1a1a1a] border border-white/10 rounded-2xl w-full max-w-2xl max-h-[90vh] overflow-hidden flex flex-col shadow-2xl"
>
{/* Header */}
<div className="flex items-center justify-between p-4 border-b border-white/10 bg-white/5">
<h3 className="text-lg font-semibold text-white flex items-center gap-2">
📜
</h3>
<button
onClick={onClose}
className="text-gray-400 hover:text-white transition-colors text-2xl leading-none"
>
&times;
</button>
</div>
{/* Content */}
<div className="flex-1 overflow-y-auto p-6">
{step === 'config' && (
<div className="space-y-6">
{/* Tabs */}
<div className="flex p-1 bg-white/5 rounded-xl border border-white/10">
<button
onClick={() => setActiveTab('url')}
className={`flex-1 py-2 rounded-lg text-sm font-medium transition-all ${activeTab === 'url'
? 'bg-purple-600 text-white shadow-lg'
: 'text-gray-400 hover:text-white hover:bg-white/5'
}`}
>
🔗
</button>
<button
onClick={() => setActiveTab('file')}
className={`flex-1 py-2 rounded-lg text-sm font-medium transition-all ${activeTab === 'file'
? 'bg-purple-600 text-white shadow-lg'
: 'text-gray-400 hover:text-white hover:bg-white/5'
}`}
>
📂
</button>
</div>
{/* URL Input Area */}
{activeTab === 'url' && (
<div className="space-y-2 py-4">
<div className="relative">
<input
type="text"
value={inputUrl}
onChange={(e) => setInputUrl(e.target.value)}
placeholder="请粘贴抖音、B站等主流平台视频链接..."
className="w-full bg-black/20 border border-white/10 rounded-xl px-4 py-4 text-white placeholder-gray-500 focus:outline-none focus:border-purple-500 transition-colors"
/>
{inputUrl && (
<button
onClick={() => setInputUrl("")}
className="absolute right-3 top-1/2 -translate-y-1/2 text-gray-500 hover:text-white p-1"
>
</button>
)}
</div>
<p className="text-xs text-gray-400 px-1">
B站等主流平台分享链接
</p>
</div>
)}
{/* File Upload Area */}
{activeTab === 'file' && (
<div
className={`
relative border-2 border-dashed rounded-xl p-8 text-center transition-all cursor-pointer
${dragActive ? 'border-purple-500 bg-purple-500/10' : 'border-white/20 hover:border-white/40 hover:bg-white/5'}
${selectedFile ? 'bg-purple-900/10 border-purple-500/50' : ''}
`}
onDragEnter={handleDrag}
onDragLeave={handleDrag}
onDragOver={handleDrag}
onDrop={handleDrop}
>
<input
type="file"
className="absolute inset-0 w-full h-full opacity-0 cursor-pointer"
onChange={handleFileChange}
accept=".mp4,.mov,.avi,.mp3,.wav,.m4a"
/>
{selectedFile ? (
<div className="flex flex-col items-center">
<div className="text-4xl mb-2">📄</div>
<div className="font-medium text-white break-all max-w-xs">{selectedFile.name}</div>
<div className="text-sm text-gray-400 mt-1">{(selectedFile.size / (1024 * 1024)).toFixed(1)} MB</div>
<div className="mt-4 text-xs text-purple-400"></div>
</div>
) : (
<div className="flex flex-col items-center">
<div className="text-4xl mb-2">📤</div>
<div className="font-medium text-white"></div>
<div className="text-sm text-gray-400 mt-2"> MP4, MOV, MP3, WAV </div>
</div>
)}
</div>
)}
{/* Options */}
<div className="bg-white/5 rounded-xl p-4 border border-white/10">
<label className="flex items-center gap-3 cursor-pointer">
<input
type="checkbox"
checked={doRewrite}
onChange={e => setDoRewrite(e.target.checked)}
className="w-5 h-5 accent-purple-600 rounded"
/>
<div>
<div className="text-white font-medium"> AI 稿</div>
<div className="text-xs text-gray-400">稿</div>
</div>
</label>
</div>
{error && (
<div className="p-3 bg-red-500/20 text-red-200 rounded-lg text-sm text-center">
{error}
</div>
)}
<div className="flex justify-center pt-2">
<button
onClick={handleExtract}
className="w-full sm:w-auto px-10 py-3 bg-gradient-to-r from-purple-600 to-pink-600 text-white rounded-xl font-bold hover:shadow-lg hover:from-purple-500 hover:to-pink-500 transition-all transform hover:-translate-y-0.5 disabled:opacity-50 disabled:cursor-not-allowed"
disabled={activeTab === 'file' ? !selectedFile : !inputUrl.trim()}
>
{activeTab === 'url' ? '🔗 解析并提取' : '🚀 开始提取'}
</button>
</div>
</div>
)}
{step === 'processing' && (
<div className="flex flex-col items-center justify-center py-20">
<div className="relative w-20 h-20 mb-6">
<div className="absolute inset-0 border-4 border-purple-500/30 rounded-full"></div>
<div className="absolute inset-0 border-4 border-t-purple-500 rounded-full animate-spin"></div>
</div>
<h4 className="text-xl font-medium text-white mb-2">...</h4>
<p className="text-sm text-gray-400 text-center max-w-sm px-4">
{activeTab === 'url' && "正在下载视频..."}<br />
{doRewrite ? "正在进行语音识别和 AI 智能改写..." : "正在进行语音识别..."}<br />
<span className="opacity-75"></span>
</p>
</div>
)}
{step === 'result' && (
<div className="space-y-6">
{rewrittenScript && (
<div className="space-y-2">
<div className="flex justify-between items-center">
<h4 className="font-semibold text-purple-300 flex items-center gap-2">
AI 稿 <span className="text-xs font-normal text-purple-400/70">()</span>
</h4>
{onApply && (
<button
onClick={() => {
onApply(rewrittenScript);
onClose();
}}
className="text-xs bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-500 hover:to-pink-500 text-white px-3 py-1.5 rounded-lg transition-colors flex items-center gap-1 shadow-sm"
>
📥
</button>
)}
<button
onClick={() => copyToClipboard(rewrittenScript)}
className="text-xs bg-purple-600 hover:bg-purple-500 text-white px-3 py-1.5 rounded-lg transition-colors flex items-center gap-1"
>
📋
</button>
</div>
<div className="bg-purple-900/10 border border-purple-500/20 rounded-xl p-4 max-h-60 overflow-y-auto custom-scrollbar">
<p className="text-gray-200 text-sm leading-relaxed whitespace-pre-wrap">
{rewrittenScript}
</p>
</div>
</div>
)}
<div className="space-y-2">
<div className="flex justify-between items-center">
<h4 className="font-semibold text-gray-400 flex items-center gap-2">
🎙
</h4>
{onApply && (
<button
onClick={() => {
onApply(script);
onClose();
}}
className="text-xs bg-white/10 hover:bg-white/20 text-white px-3 py-1.5 rounded-lg transition-colors flex items-center gap-1"
>
📥
</button>
)}
<button
onClick={() => copyToClipboard(script)}
className="text-xs bg-white/10 hover:bg-white/20 text-white px-3 py-1.5 rounded-lg transition-colors"
>
</button>
</div>
<div className="bg-white/5 border border-white/10 rounded-xl p-4 max-h-40 overflow-y-auto custom-scrollbar">
<p className="text-gray-400 text-sm leading-relaxed whitespace-pre-wrap">
{script}
</p>
</div>
</div>
<div className="flex justify-center pt-4">
<button
onClick={() => {
setStep('config');
setScript("");
setRewrittenScript("");
setSelectedFile(null);
setInputUrl("");
// Keep current tab active
}}
className="px-6 py-2 bg-white/10 hover:bg-white/20 text-white rounded-lg transition-colors"
>
</button>
</div>
</div>
)}
</div>
</div>
</div>
);
}