Init: 初版代码
This commit is contained in:
336
.gitignore
vendored
336
.gitignore
vendored
@@ -1,314 +1,46 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
# ============ 环境配置 ============
|
||||
.env
|
||||
*.local
|
||||
|
||||
# ============ Python ============
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
#uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
.venv/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# ---> Node
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
# ============ Node.js ============
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
.next/
|
||||
out/
|
||||
.turbo/
|
||||
|
||||
# Snowpack dependency directory (https://snowpack.dev/)
|
||||
web_modules/
|
||||
# ============ IDE ============
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
# ============ 系统文件 ============
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
desktop.ini
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
# ============ 项目输出 ============
|
||||
backend/outputs/
|
||||
backend/uploads/
|
||||
backend/cookies/
|
||||
*_cookies.json
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Optional stylelint cache
|
||||
.stylelintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variable files
|
||||
.env
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.local
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
out
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||
# public
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# vuepress v2.x temp and cache directory
|
||||
.temp
|
||||
.cache
|
||||
|
||||
# vitepress build output
|
||||
**/.vitepress/dist
|
||||
|
||||
# vitepress cache directory
|
||||
**/.vitepress/cache
|
||||
|
||||
# Docusaurus cache and generated files
|
||||
.docusaurus
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# yarn v2
|
||||
.yarn/cache
|
||||
.yarn/unplugged
|
||||
.yarn/build-state.yml
|
||||
.yarn/install-state.gz
|
||||
.pnp.*
|
||||
# ============ MuseTalk ============
|
||||
models/MuseTalk/models/
|
||||
models/MuseTalk/results/
|
||||
|
||||
# ============ 日志 ============
|
||||
*.log
|
||||
logs/
|
||||
|
||||
263
Docs/DEPLOY_MANUAL.md
Normal file
263
Docs/DEPLOY_MANUAL.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# ViGent 手动部署指南
|
||||
|
||||
## 服务器信息
|
||||
|
||||
| 配置 | 规格 |
|
||||
|------|------|
|
||||
| 服务器 | Dell PowerEdge R730 |
|
||||
| CPU | 2× Intel Xeon E5-2680 v4 (56 线程) |
|
||||
| 内存 | 192GB DDR4 |
|
||||
| GPU 0 | NVIDIA RTX 3090 24GB |
|
||||
| GPU 1 | NVIDIA RTX 3090 24GB (用于 MuseTalk) |
|
||||
| 部署路径 | `/home/rongye/ProgramFiles/ViGent` |
|
||||
|
||||
---
|
||||
|
||||
## 步骤 1: 环境检查
|
||||
|
||||
```bash
|
||||
# 检查 GPU
|
||||
nvidia-smi
|
||||
|
||||
# 检查 Python 版本 (需要 3.10+)
|
||||
python3 --version
|
||||
|
||||
# 检查 Node.js 版本 (需要 18+)
|
||||
node --version
|
||||
|
||||
# 检查 FFmpeg
|
||||
ffmpeg -version
|
||||
```
|
||||
|
||||
如果缺少 FFmpeg:
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install ffmpeg
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 2: 创建目录结构
|
||||
|
||||
```bash
|
||||
mkdir -p /home/rongye/ProgramFiles/ViGent
|
||||
cd /home/rongye/ProgramFiles/ViGent
|
||||
```
|
||||
|
||||
将项目文件复制到该目录。
|
||||
|
||||
---
|
||||
|
||||
## 步骤 3: 安装后端依赖
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/backend
|
||||
|
||||
# 创建虚拟环境
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
|
||||
# 安装 PyTorch (CUDA 12.1)
|
||||
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# 安装其他依赖
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 安装 Playwright 浏览器 (社交发布用)
|
||||
playwright install chromium
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 4: 安装 MMPose (唇形检测)
|
||||
|
||||
```bash
|
||||
source /home/rongye/ProgramFiles/ViGent/backend/venv/bin/activate
|
||||
|
||||
pip install -U openmim
|
||||
mim install mmengine
|
||||
mim install "mmcv>=2.0.1"
|
||||
mim install "mmdet>=3.1.0"
|
||||
mim install "mmpose>=1.1.0"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 5: 安装 MuseTalk
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/models
|
||||
|
||||
# 克隆仓库
|
||||
git clone https://github.com/TMElyralab/MuseTalk.git
|
||||
cd MuseTalk
|
||||
|
||||
# 激活虚拟环境
|
||||
source /home/rongye/ProgramFiles/ViGent/backend/venv/bin/activate
|
||||
|
||||
# 安装依赖
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 6: 下载 MuseTalk 模型权重
|
||||
|
||||
从 HuggingFace 下载模型:
|
||||
- 地址: https://huggingface.co/TMElyralab/MuseTalk
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
|
||||
|
||||
# 使用 huggingface-cli 下载 (需要安装 huggingface_hub)
|
||||
pip install huggingface_hub
|
||||
huggingface-cli download TMElyralab/MuseTalk --local-dir ./models
|
||||
```
|
||||
|
||||
或手动下载后放到:
|
||||
```
|
||||
/home/rongye/ProgramFiles/ViGent/models/MuseTalk/models/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 7: 配置环境变量
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/backend
|
||||
|
||||
# 复制配置模板
|
||||
cp .env.example .env
|
||||
|
||||
# 编辑配置
|
||||
nano .env
|
||||
```
|
||||
|
||||
修改以下配置:
|
||||
```ini
|
||||
# GPU 配置
|
||||
MUSETALK_GPU_ID=1
|
||||
MUSETALK_LOCAL=true
|
||||
|
||||
# 其他配置按需修改
|
||||
DEBUG=false
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 8: 安装前端依赖
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/frontend
|
||||
|
||||
# 安装依赖
|
||||
npm install
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 9: 测试运行
|
||||
|
||||
### 启动后端
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/backend
|
||||
source venv/bin/activate
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
### 启动前端 (新开终端)
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/frontend
|
||||
npm run dev -- --host 0.0.0.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 步骤 10: 验证
|
||||
|
||||
1. 访问 http://服务器IP:3000 查看前端
|
||||
2. 访问 http://服务器IP:8000/docs 查看 API 文档
|
||||
3. 上传测试视频,生成口播视频
|
||||
|
||||
---
|
||||
|
||||
## 使用 systemd 管理服务 (可选)
|
||||
|
||||
### 后端服务
|
||||
|
||||
创建 `/etc/systemd/system/vigent-backend.service`:
|
||||
```ini
|
||||
[Unit]
|
||||
Description=ViGent Backend API
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=rongye
|
||||
WorkingDirectory=/home/rongye/ProgramFiles/ViGent/backend
|
||||
Environment="PATH=/home/rongye/ProgramFiles/ViGent/backend/venv/bin"
|
||||
ExecStart=/home/rongye/ProgramFiles/ViGent/backend/venv/bin/uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
### 前端服务
|
||||
|
||||
创建 `/etc/systemd/system/vigent-frontend.service`:
|
||||
```ini
|
||||
[Unit]
|
||||
Description=ViGent Frontend
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=rongye
|
||||
WorkingDirectory=/home/rongye/ProgramFiles/ViGent/frontend
|
||||
ExecStart=/usr/bin/npm run start
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
### 启用服务
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable vigent-backend vigent-frontend
|
||||
sudo systemctl start vigent-backend vigent-frontend
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 故障排除
|
||||
|
||||
### GPU 不可用
|
||||
|
||||
```bash
|
||||
# 检查 CUDA
|
||||
nvidia-smi
|
||||
python3 -c "import torch; print(torch.cuda.is_available())"
|
||||
```
|
||||
|
||||
### 端口被占用
|
||||
|
||||
```bash
|
||||
# 查看端口占用
|
||||
sudo lsof -i :8000
|
||||
sudo lsof -i :3000
|
||||
```
|
||||
|
||||
### 查看日志
|
||||
|
||||
```bash
|
||||
# 后端日志
|
||||
journalctl -u vigent-backend -f
|
||||
|
||||
# 前端日志
|
||||
journalctl -u vigent-frontend -f
|
||||
```
|
||||
171
Docs/DevLogs/Day1.md
Normal file
171
Docs/DevLogs/Day1.md
Normal file
@@ -0,0 +1,171 @@
|
||||
# Day 1 - ViGent 数字人口播系统开发
|
||||
|
||||
**日期**:2026-01-13
|
||||
**开发环境**:Windows 11 (本地开发) / Ubuntu 24.04 (服务器部署)
|
||||
**目标平台**:Dell PowerEdge R730 (2× RTX 3090 24GB)
|
||||
|
||||
---
|
||||
|
||||
## 🎯 今日目标
|
||||
|
||||
搭建数字人口播视频生成系统的完整框架,包括:
|
||||
1. 后端 API (FastAPI)
|
||||
2. 前端 UI (Next.js)
|
||||
3. 视频生成流程 (TTS + FFmpeg)
|
||||
4. 社交媒体发布功能
|
||||
|
||||
---
|
||||
|
||||
## 📦 项目初始化
|
||||
|
||||
### 后端项目结构
|
||||
```
|
||||
backend/
|
||||
├── app/
|
||||
│ ├── api/ # API 路由
|
||||
│ │ ├── materials.py # 素材管理
|
||||
│ │ ├── videos.py # 视频生成
|
||||
│ │ └── publish.py # 社交发布
|
||||
│ ├── services/ # 核心服务
|
||||
│ │ ├── tts_service.py # EdgeTTS 配音
|
||||
│ │ ├── video_service.py # FFmpeg 视频合成
|
||||
│ │ ├── lipsync_service.py # MuseTalk 唇形同步
|
||||
│ │ └── publish_service.py # Playwright 自动发布
|
||||
│ └── core/
|
||||
│ └── config.py # 配置管理
|
||||
├── requirements.txt
|
||||
└── .env.example
|
||||
```
|
||||
|
||||
### 前端项目
|
||||
- 使用 Next.js 14 + TypeScript + Tailwind CSS
|
||||
- 主页面: 视频生成界面
|
||||
- 发布页面: `/publish` 多平台发布管理
|
||||
|
||||
---
|
||||
|
||||
## 🔧 TTS + 视频合成
|
||||
|
||||
### EdgeTTS 集成
|
||||
- 使用 `edge-tts` 库实现免费中文语音合成
|
||||
- 支持多种音色: 云溪、云健、云扬、晓晓、晓伊
|
||||
|
||||
### FFmpeg 视频合成
|
||||
```python
|
||||
# 核心命令
|
||||
ffmpeg -i video.mp4 -i audio.mp3 -c:v copy -c:a aac -map 0:v -map 1:a output.mp4
|
||||
```
|
||||
|
||||
### 🐛 Bug 修复: asyncio subprocess 问题
|
||||
|
||||
**问题**:在 FastAPI BackgroundTasks 中调用 `asyncio.create_subprocess_exec` 导致 `NotImplementedError`
|
||||
|
||||
**原因**:BackgroundTasks 运行在非 asyncio 上下文中
|
||||
|
||||
**修复**:将 `_run_ffmpeg` 和 `_get_duration` 改为使用同步 `subprocess.run`
|
||||
|
||||
```python
|
||||
# 修复前
|
||||
result = await asyncio.create_subprocess_exec(...)
|
||||
|
||||
# 修复后
|
||||
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, encoding='utf-8')
|
||||
```
|
||||
|
||||
**状态**:✅ 已修复,视频生成成功
|
||||
|
||||
---
|
||||
|
||||
## 🎬 MuseTalk 唇形同步集成
|
||||
|
||||
### 架构设计
|
||||
- GPU0: 其他服务
|
||||
- GPU1: MuseTalk 唇形同步
|
||||
|
||||
### 代码实现
|
||||
- `lipsync_service.py` 支持本地模式和远程 API 模式
|
||||
- 通过 `CUDA_VISIBLE_DEVICES=1` 指定使用 GPU1
|
||||
- 如未配置,自动跳过唇形同步
|
||||
|
||||
---
|
||||
|
||||
## 📱 社交媒体发布
|
||||
|
||||
### 支持平台
|
||||
| 平台 | 状态 |
|
||||
|------|------|
|
||||
| 抖音 | ✅ 框架完成 |
|
||||
| 小红书 | ✅ 框架完成 |
|
||||
| 微信视频号 | ✅ 框架完成 |
|
||||
| 快手 | ✅ 框架完成 |
|
||||
| B站 | ✅ 框架完成 |
|
||||
|
||||
### 技术方案
|
||||
- 使用 Playwright 进行浏览器自动化
|
||||
- Cookie 管理实现免登录发布
|
||||
- 前端提供账号管理和一键发布 UI
|
||||
|
||||
---
|
||||
|
||||
## 📚 文档产出
|
||||
|
||||
| 文件 | 说明 |
|
||||
|------|------|
|
||||
| `README.md` | 项目说明 |
|
||||
| `DEPLOY_MANUAL.md` | 手动部署指南 |
|
||||
| `deploy.sh` | 一键部署脚本 |
|
||||
| `.env.example` | 环境配置模板 |
|
||||
|
||||
---
|
||||
|
||||
## ✅ 今日完成
|
||||
|
||||
1. ✅ FastAPI 后端框架搭建
|
||||
2. ✅ EdgeTTS 语音合成服务
|
||||
3. ✅ FFmpeg 视频合成服务
|
||||
4. ✅ MuseTalk 唇形同步集成 (代码层面)
|
||||
5. ✅ Next.js 前端 UI (视频生成 + 发布管理)
|
||||
6. ✅ Playwright 社交媒体发布服务
|
||||
7. ✅ 端到端视频生成测试通过
|
||||
8. ✅ 服务器部署文档编写
|
||||
|
||||
---
|
||||
|
||||
|
||||
### 下午调试记录 (Afternoon Debugging Session)
|
||||
|
||||
**1. 前端 "Undefined" 错误**
|
||||
- **现象**:视频生成失败,弹窗显示 "undefined"。
|
||||
- **原因**:
|
||||
1. 后端 `videos.py` 在异常捕获时未设置 `message` 字段,前端无法获取错误信息。
|
||||
2. 路径解析逻辑错误导致文件未找到。
|
||||
- **修复**:
|
||||
- 后端补充 `tasks[task_id]["message"]` 字段。
|
||||
- 修复 `pathlib.Path` 引用缺失。
|
||||
|
||||
**2. 路径解析问题**
|
||||
- **现象**:本地测试时无法找到素材文件。
|
||||
- **原因**:Windows 本地路径 (`d:\...`) 与相对路径混合使用,且 `BASE_DIR` 指向了错误的父级目录。
|
||||
- **修复**:
|
||||
- `materials.py` 所有返回路径改为相对路径 (`uploads/materials/xxx`)。
|
||||
- `videos.py` 增加智能路径解析:非绝对路径自动拼接 `BASE_DIR`。
|
||||
- `config.py` 调整 `BASE_DIR` 指向项目根目录。
|
||||
|
||||
**3. 语法错误修复**
|
||||
- **现象**:`page.tsx` 出现 `Parsing ecmascript source code failed`。
|
||||
- **原因**:调试代码逻辑错误地插入到了 JSX渲染块中。
|
||||
- **修复**:完全重写 `page.tsx`,规范化代码结构,增加 "Raw Response" 调试面板。
|
||||
|
||||
**4. 本地 Fallback 逻辑验证**
|
||||
- **现象**:进度条从 5% 直接跳到 100%。
|
||||
- **原因**:本地 MuseTalk 未启用,系统触发 `fallback` 逻辑(仅复制文件)。
|
||||
- **验证**:符合预期行为,确保了无 GPU 环境下的流程连通性。
|
||||
|
||||
---
|
||||
|
||||
## 📋 明日计划
|
||||
|
||||
1. 在服务器上部署系统
|
||||
2. 下载 MuseTalk 模型权重
|
||||
3. 测试完整唇形同步流程
|
||||
4. 优化前端 UI 交互体验
|
||||
96
Docs/Doc_Rules.md
Normal file
96
Docs/Doc_Rules.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# 📋 开发日志更新规则
|
||||
|
||||
> **本文件定义了 AI 助手更新开发文档的规范**
|
||||
|
||||
---
|
||||
|
||||
## ⚡ 核心原则
|
||||
|
||||
| 规则 | 说明 |
|
||||
|------|------|
|
||||
| **默认更新** | 只更新 `DayN.md` |
|
||||
| **按需更新** | `task_complete.md` 仅在用户**明确要求**时更新 |
|
||||
| **增量追加** | 禁止覆盖/新建。请使用 replace/edit 工具插入新内容。 |
|
||||
| **先读后写** | 更新前先查看文件当前内容 |
|
||||
|
||||
---
|
||||
|
||||
## 📁 文件结构
|
||||
|
||||
```
|
||||
ViGent/Docs/
|
||||
├── task_complete.md # 任务总览(仅按需更新)
|
||||
├── Doc_Rules.md # 本文件
|
||||
└── DevLogs/
|
||||
├── Day1.md # 开发日志
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📅 DayN.md 更新规则(日常更新)
|
||||
|
||||
### 新建判断
|
||||
- 检查最新 `DayN.md` 的日期
|
||||
- **今天** → 追加到现有文件
|
||||
- **之前** → 创建 `Day{N+1}.md`
|
||||
|
||||
### 追加格式
|
||||
```markdown
|
||||
---
|
||||
|
||||
## 🔧 [章节标题]
|
||||
|
||||
### 问题描述
|
||||
简要描述...
|
||||
|
||||
### 解决方案
|
||||
```code
|
||||
# 代码示例
|
||||
```
|
||||
|
||||
### 结果
|
||||
- ✅ 修复了 xxx
|
||||
```
|
||||
|
||||
### 快速修复格式
|
||||
```markdown
|
||||
## 🐛 [Bug 简述] (HH:MM)
|
||||
|
||||
**问题**:一句话描述
|
||||
**修复**:修改了 `文件名` 中的 xxx
|
||||
**状态**:✅ 已修复 / 🔄 待验证
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📝 task_complete.md 更新规则(仅按需)
|
||||
|
||||
> ⚠️ **仅当用户明确要求更新 `task_complete.md` 时才更新**
|
||||
|
||||
### 更新原则
|
||||
- **格式一致性**:直接参考 `task_complete.md` 现有格式追加内容。
|
||||
- **进度更新**:仅在阶段性里程碑时更新进度百分比。
|
||||
|
||||
---
|
||||
|
||||
## 🚀 新对话检查清单
|
||||
|
||||
1. 查看 `task_complete.md` → 了解整体进度
|
||||
2. 查看最新 `DayN.md` → 确认今天是第几天
|
||||
3. 根据日期决定追加或新建 Day 文件
|
||||
|
||||
---
|
||||
|
||||
## 🎯 项目组件
|
||||
|
||||
| 组件 | 位置 |
|
||||
|------|------|
|
||||
| 后端 (FastAPI) | `ViGent/backend/` |
|
||||
| 前端 (Next.js) | `ViGent/frontend/` |
|
||||
| AI 模型 (MuseTalk) | `ViGent/models/` |
|
||||
| 文档 | `ViGent/Docs/` |
|
||||
|
||||
---
|
||||
|
||||
**最后更新**:2026-01-13
|
||||
0
Docs/Logs.md
Normal file
0
Docs/Logs.md
Normal file
72
Docs/README.md
Normal file
72
Docs/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# ViGent - 数字人口播视频生成系统
|
||||
|
||||
基于 MuseTalk + EdgeTTS 的开源数字人口播视频生成系统
|
||||
|
||||
## 功能
|
||||
|
||||
- 📹 上传静态人物视频,生成口播视频(唇形同步)
|
||||
- 🎙️ TTS 配音 / 声音克隆
|
||||
- 💬 自动生成字幕
|
||||
- 📱 一键发布到多个社交平台
|
||||
|
||||
## 技术栈
|
||||
|
||||
| 模块 | 技术 |
|
||||
|------|------|
|
||||
| 前端 | Next.js 14 |
|
||||
| 后端 | FastAPI + Celery |
|
||||
| 唇形同步 | MuseTalk (GPU1) |
|
||||
| TTS | EdgeTTS |
|
||||
| 视频处理 | FFmpeg |
|
||||
| 自动发布 | Playwright |
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
/home/rongye/ProgramFiles/ViGent/
|
||||
├── backend/ # FastAPI 后端
|
||||
├── frontend/ # Next.js 前端
|
||||
├── models/ # AI 模型 (MuseTalk)
|
||||
└── deploy.sh # 一键部署脚本
|
||||
```
|
||||
|
||||
## 服务器部署 (Dell R730)
|
||||
|
||||
```bash
|
||||
# 进入部署目录
|
||||
cd /home/rongye/ProgramFiles/ViGent
|
||||
|
||||
# 一键部署
|
||||
chmod +x deploy.sh
|
||||
./deploy.sh
|
||||
```
|
||||
|
||||
## 启动服务
|
||||
|
||||
```bash
|
||||
# 后端 API (端口 8000)
|
||||
cd /home/rongye/ProgramFiles/ViGent/backend
|
||||
source venv/bin/activate
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
|
||||
# 前端 UI (端口 3000)
|
||||
cd /home/rongye/ProgramFiles/ViGent/frontend
|
||||
npm run dev
|
||||
```
|
||||
|
||||
## GPU 配置
|
||||
|
||||
| GPU | 用途 |
|
||||
|-----|------|
|
||||
| GPU 0 (RTX 3090 24GB) | 其他服务 |
|
||||
| GPU 1 (RTX 3090 24GB) | MuseTalk 唇形同步 |
|
||||
|
||||
## 访问地址
|
||||
|
||||
- 视频生成: http://服务器IP:3000
|
||||
- 发布管理: http://服务器IP:3000/publish
|
||||
- API 文档: http://服务器IP:8000/docs
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
305
Docs/implementation_plan.md
Normal file
305
Docs/implementation_plan.md
Normal file
@@ -0,0 +1,305 @@
|
||||
# 数字人口播视频生成系统 - 实现计划
|
||||
|
||||
## 项目目标
|
||||
|
||||
构建一个开源的数字人口播视频生成系统,功能包括:
|
||||
- 上传静态人物视频 → 生成口播视频(唇形同步)
|
||||
- TTS 配音或声音克隆
|
||||
- 字幕自动生成与渲染
|
||||
- 一键发布到多个社交平台
|
||||
|
||||
---
|
||||
|
||||
## 技术架构
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ 前端 (Next.js) │
|
||||
│ 素材管理 | 视频生成 | 发布管理 | 任务状态 │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
│ REST API
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ 后端 (FastAPI) │
|
||||
├─────────────────────────────────────────────────────────┤
|
||||
│ Celery 任务队列 (Redis) │
|
||||
│ ├── 视频生成任务 │
|
||||
│ ├── TTS 配音任务 │
|
||||
│ └── 自动发布任务 │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||
│ MuseTalk │ │ FFmpeg │ │Playwright│
|
||||
│ 唇形同步 │ │ 视频合成 │ │ 自动发布 │
|
||||
└──────────┘ └──────────┘ └──────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 技术选型
|
||||
|
||||
| 模块 | 技术选择 | 备选方案 |
|
||||
|------|----------|----------|
|
||||
| **前端框架** | Next.js 14 | Vue 3 + Vite |
|
||||
| **UI 组件库** | Tailwind + shadcn/ui | Ant Design |
|
||||
| **后端框架** | FastAPI | Flask |
|
||||
| **任务队列** | Celery + Redis | RQ / Dramatiq |
|
||||
| **唇形同步** | MuseTalk | Wav2Lip / SadTalker |
|
||||
| **TTS 配音** | EdgeTTS | CosyVoice |
|
||||
| **声音克隆** | GPT-SoVITS (可选) | - |
|
||||
| **视频处理** | FFmpeg | MoviePy |
|
||||
| **自动发布** | social-auto-upload | 自行实现 |
|
||||
| **数据库** | SQLite → PostgreSQL | MySQL |
|
||||
| **文件存储** | 本地 / MinIO | 阿里云 OSS |
|
||||
|
||||
---
|
||||
|
||||
## 分阶段实施计划
|
||||
|
||||
### 阶段一:核心功能验证 (MVP)
|
||||
|
||||
> **目标**:验证 MuseTalk + EdgeTTS 效果,跑通端到端流程
|
||||
|
||||
#### 1.1 环境搭建
|
||||
|
||||
```bash
|
||||
# 创建项目目录
|
||||
mkdir TalkingHeadAgent
|
||||
cd TalkingHeadAgent
|
||||
|
||||
# 克隆 MuseTalk
|
||||
git clone https://github.com/TMElyralab/MuseTalk.git
|
||||
|
||||
# 安装依赖
|
||||
cd MuseTalk
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 下载模型权重 (按官方文档)
|
||||
```
|
||||
|
||||
#### 1.2 集成 EdgeTTS
|
||||
|
||||
```python
|
||||
# tts_engine.py
|
||||
import edge_tts
|
||||
import asyncio
|
||||
|
||||
async def text_to_speech(text: str, voice: str = "zh-CN-YunxiNeural", output_path: str = "output.mp3"):
|
||||
communicate = edge_tts.Communicate(text, voice)
|
||||
await communicate.save(output_path)
|
||||
return output_path
|
||||
```
|
||||
|
||||
#### 1.3 端到端测试脚本
|
||||
|
||||
```python
|
||||
# test_pipeline.py
|
||||
"""
|
||||
1. 文案 → EdgeTTS → 音频
|
||||
2. 静态视频 + 音频 → MuseTalk → 口播视频
|
||||
3. 添加字幕 → FFmpeg → 最终视频
|
||||
"""
|
||||
```
|
||||
|
||||
#### 1.4 验证标准
|
||||
- [ ] MuseTalk 能正常推理
|
||||
- [ ] 唇形与音频同步率 > 90%
|
||||
- [ ] 单个视频生成时间 < 2 分钟
|
||||
|
||||
---
|
||||
|
||||
### 阶段二:后端 API 开发
|
||||
|
||||
> **目标**:将核心功能封装为 API,支持异步任务
|
||||
|
||||
#### 2.1 项目结构
|
||||
|
||||
```
|
||||
backend/
|
||||
├── app/
|
||||
│ ├── main.py # FastAPI 入口
|
||||
│ ├── api/
|
||||
│ │ ├── videos.py # 视频生成 API
|
||||
│ │ ├── materials.py # 素材管理 API
|
||||
│ │ └── publish.py # 发布管理 API
|
||||
│ ├── services/
|
||||
│ │ ├── tts_service.py # TTS 服务
|
||||
│ │ ├── lipsync_service.py # 唇形同步服务
|
||||
│ │ └── video_service.py # 视频合成服务
|
||||
│ ├── tasks/
|
||||
│ │ └── celery_tasks.py # Celery 异步任务
|
||||
│ ├── models/
|
||||
│ │ └── schemas.py # Pydantic 模型
|
||||
│ └── core/
|
||||
│ └── config.py # 配置管理
|
||||
├── requirements.txt
|
||||
└── docker-compose.yml # Redis + API
|
||||
```
|
||||
|
||||
#### 2.2 核心 API 设计
|
||||
|
||||
| 端点 | 方法 | 功能 |
|
||||
|------|------|------|
|
||||
| `/api/materials` | POST | 上传素材视频 |
|
||||
| `/api/materials` | GET | 获取素材列表 |
|
||||
| `/api/videos/generate` | POST | 创建视频生成任务 |
|
||||
| `/api/tasks/{id}` | GET | 查询任务状态 |
|
||||
| `/api/videos/{id}/download` | GET | 下载生成的视频 |
|
||||
| `/api/publish` | POST | 发布到社交平台 |
|
||||
|
||||
#### 2.3 Celery 任务定义
|
||||
|
||||
```python
|
||||
# tasks/celery_tasks.py
|
||||
@celery.task
|
||||
def generate_video_task(material_id: str, text: str, voice: str):
|
||||
# 1. TTS 生成音频
|
||||
# 2. MuseTalk 唇形同步
|
||||
# 3. FFmpeg 添加字幕
|
||||
# 4. 保存并返回视频 URL
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 阶段三:前端 Web UI
|
||||
|
||||
> **目标**:提供用户友好的操作界面
|
||||
|
||||
#### 3.1 页面设计
|
||||
|
||||
| 页面 | 功能 |
|
||||
|------|------|
|
||||
| **素材库** | 上传/管理多场景素材视频 |
|
||||
| **生成视频** | 输入文案、选择素材、生成预览 |
|
||||
| **任务中心** | 查看生成进度、下载视频 |
|
||||
| **发布管理** | 绑定平台、一键发布、定时发布 |
|
||||
|
||||
#### 3.2 技术实现
|
||||
|
||||
```bash
|
||||
# 创建 Next.js 项目
|
||||
npx create-next-app@latest frontend --typescript --tailwind --app
|
||||
|
||||
# 安装依赖
|
||||
cd frontend
|
||||
npm install @tanstack/react-query axios
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 阶段四:社交媒体发布
|
||||
|
||||
> **目标**:集成 social-auto-upload,支持多平台发布
|
||||
|
||||
#### 4.1 复用 social-auto-upload
|
||||
|
||||
```bash
|
||||
# 复制模块
|
||||
cp -r SuperIPAgent/social-auto-upload backend/social_upload
|
||||
```
|
||||
|
||||
#### 4.2 Cookie 管理
|
||||
|
||||
```python
|
||||
# 用户通过浏览器登录 → 保存 Cookie → 后续自动发布
|
||||
```
|
||||
|
||||
#### 4.3 支持平台
|
||||
- 抖音
|
||||
- 小红书
|
||||
- 微信视频号
|
||||
- 快手
|
||||
|
||||
---
|
||||
|
||||
### 阶段五:优化与扩展
|
||||
|
||||
| 功能 | 实现方式 |
|
||||
|------|----------|
|
||||
| **声音克隆** | 集成 GPT-SoVITS,用自己的声音 |
|
||||
| **批量生成** | 上传 Excel/CSV,批量生成视频 |
|
||||
| **字幕编辑器** | 可视化调整字幕样式、位置 |
|
||||
| **Docker 部署** | 一键部署到云服务器 |
|
||||
|
||||
---
|
||||
|
||||
## 项目目录结构 (最终)
|
||||
|
||||
```
|
||||
TalkingHeadAgent/
|
||||
├── frontend/ # Next.js 前端
|
||||
│ ├── app/
|
||||
│ ├── components/
|
||||
│ └── package.json
|
||||
├── backend/ # FastAPI 后端
|
||||
│ ├── app/
|
||||
│ ├── MuseTalk/ # 唇形同步模型
|
||||
│ ├── social_upload/ # 社交发布模块
|
||||
│ └── requirements.txt
|
||||
├── docker-compose.yml # 一键部署
|
||||
└── README.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 开发时间估算
|
||||
|
||||
| 阶段 | 预计时间 | 说明 |
|
||||
|------|----------|------|
|
||||
| 阶段一 | 2-3 天 | 环境搭建 + 效果验证 |
|
||||
| 阶段二 | 3-4 天 | 后端 API 开发 |
|
||||
| 阶段三 | 3-4 天 | 前端 UI 开发 |
|
||||
| 阶段四 | 2 天 | 社交发布集成 |
|
||||
| 阶段五 | 按需 | 持续优化 |
|
||||
|
||||
**总计**:约 10-13 天可完成 MVP
|
||||
|
||||
---
|
||||
|
||||
## 验证计划
|
||||
|
||||
### 阶段一验证
|
||||
1. 运行 `test_pipeline.py` 脚本
|
||||
2. 检查生成视频的唇形同步效果
|
||||
3. 确认音画同步
|
||||
|
||||
### 阶段二验证
|
||||
1. 使用 Postman/curl 测试所有 API 端点
|
||||
2. 验证任务队列正常工作
|
||||
3. 检查视频生成完整流程
|
||||
|
||||
### 阶段三验证
|
||||
1. 在浏览器中完成完整操作流程
|
||||
2. 验证上传、生成、下载功能
|
||||
3. 检查响应式布局
|
||||
|
||||
### 阶段四验证
|
||||
1. 发布一个测试视频到抖音
|
||||
2. 验证定时发布功能
|
||||
3. 检查发布状态同步
|
||||
|
||||
---
|
||||
|
||||
## 硬件要求
|
||||
|
||||
| 配置 | 最低要求 | 推荐配置 |
|
||||
|------|----------|----------|
|
||||
| **GPU** | NVIDIA GTX 1060 6GB | RTX 3060 12GB+ |
|
||||
| **内存** | 16GB | 32GB |
|
||||
| **存储** | 100GB SSD | 500GB SSD |
|
||||
| **CUDA** | 11.7+ | 12.0+ |
|
||||
|
||||
---
|
||||
|
||||
## 下一步行动
|
||||
|
||||
1. **确认你的 GPU 配置** - MuseTalk 需要 NVIDIA GPU
|
||||
2. **选择开发起点** - 从阶段一开始验证效果
|
||||
3. **确定项目位置** - 在哪个目录创建项目
|
||||
|
||||
---
|
||||
|
||||
> [!IMPORTANT]
|
||||
> 请确认以上计划是否符合你的需求,有任何需要调整的地方请告诉我。
|
||||
119
Docs/task_complete.md
Normal file
119
Docs/task_complete.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# ViGent 数字人口播系统 - 开发任务清单
|
||||
|
||||
**项目**:ViGent 数字人口播视频生成系统
|
||||
**服务器**:Dell R730 (2× RTX 3090 24GB)
|
||||
**更新时间**:2026-01-13
|
||||
**整体进度**:80%(核心功能验证通过,待服务器部署)
|
||||
|
||||
## 📖 快速导航
|
||||
|
||||
| 章节 | 说明 |
|
||||
|------|------|
|
||||
| [已完成任务](#-已完成任务) | Day 1 完成的功能 |
|
||||
| [后续规划](#️-后续规划) | 待办项目 |
|
||||
| [进度统计](#-进度统计) | 各模块完成度 |
|
||||
| [里程碑](#-里程碑) | 关键节点 |
|
||||
| [时间线](#-时间线) | 开发历程 |
|
||||
|
||||
**相关文档**:
|
||||
- [Day 日志](file:///d:/CodingProjects/Antigravity/ViGent/Docs/DevLogs/) (Day1-)
|
||||
- [部署指南](file:///d:/CodingProjects/Antigravity/ViGent/TalkingHeadAgent/DEPLOY_MANUAL.md)
|
||||
|
||||
---
|
||||
|
||||
## ✅ 已完成任务
|
||||
|
||||
### 阶段一:核心功能验证
|
||||
- [x] EdgeTTS 配音集成
|
||||
- [x] FFmpeg 视频合成
|
||||
- [x] MuseTalk 唇形同步 (代码集成)
|
||||
- [x] 端到端流程验证
|
||||
|
||||
### 阶段二:后端 API 开发
|
||||
- [x] FastAPI 项目搭建
|
||||
- [x] 视频生成 API
|
||||
- [x] 素材管理 API
|
||||
- [x] 文件存储管理
|
||||
|
||||
### 阶段三:前端 Web UI
|
||||
- [x] Next.js 项目初始化
|
||||
- [x] 视频生成页面
|
||||
- [x] 发布管理页面
|
||||
- [x] 任务状态展示
|
||||
|
||||
### 阶段四:社交媒体发布
|
||||
- [x] Playwright 自动化框架
|
||||
- [x] Cookie 管理功能
|
||||
- [x] 多平台发布 UI
|
||||
- [ ] 定时发布功能
|
||||
|
||||
### 阶段五:部署与文档
|
||||
- [x] 手动部署指南 (DEPLOY_MANUAL.md)
|
||||
- [x] 一键部署脚本 (deploy.sh)
|
||||
- [x] 环境配置模板 (.env.example)
|
||||
- [x] 项目文档 (README.md)
|
||||
|
||||
---
|
||||
|
||||
## 🛤️ 后续规划
|
||||
|
||||
### 🔴 优先待办
|
||||
- [ ] 服务器环境部署
|
||||
- [ ] MuseTalk 模型权重下载
|
||||
- [ ] 唇形同步完整测试
|
||||
- [ ] 生产环境验证
|
||||
|
||||
### 🟠 功能完善
|
||||
- [ ] 定时发布功能
|
||||
- [ ] 批量视频生成
|
||||
- [ ] 字幕样式编辑器
|
||||
|
||||
### 🔵 长期探索
|
||||
- [ ] 声音克隆 (GPT-SoVITS)
|
||||
- [ ] Docker 容器化
|
||||
- [ ] Celery 分布式任务队列
|
||||
|
||||
---
|
||||
|
||||
## 📊 进度统计
|
||||
|
||||
### 总体进度
|
||||
```
|
||||
████████████████░░░░ 80%
|
||||
```
|
||||
|
||||
### 各模块进度
|
||||
|
||||
| 模块 | 进度 | 状态 |
|
||||
|------|------|------|
|
||||
| 后端 API | 100% | ✅ 完成 |
|
||||
| 前端 UI | 100% | ✅ 完成 |
|
||||
| TTS 配音 | 100% | ✅ 完成 |
|
||||
| 视频合成 | 100% | ✅ 完成 |
|
||||
| 唇形同步 | 80% | ✅ 本地Fallback验证通过,待服务器部署 |
|
||||
| 社交发布 | 80% | 🔄 框架完成,待测试 |
|
||||
| 服务器部署 | 0% | ⏳ 待开始 |
|
||||
|
||||
---
|
||||
|
||||
## 🎯 里程碑
|
||||
|
||||
### Milestone 1: 项目框架搭建 ✅
|
||||
**完成时间**: Day 1
|
||||
**成果**:
|
||||
- FastAPI 后端 + Next.js 前端
|
||||
- EdgeTTS + FFmpeg 集成
|
||||
- 视频生成端到端验证
|
||||
|
||||
---
|
||||
|
||||
## 📅 时间线
|
||||
|
||||
```
|
||||
Day 1: 项目初始化 + 核心功能 ✅ 完成
|
||||
- 后端 API 框架
|
||||
- 前端 UI
|
||||
- TTS + 视频合成
|
||||
- 社交发布框架
|
||||
- 部署文档
|
||||
```
|
||||
138
README.md
138
README.md
@@ -1,2 +1,138 @@
|
||||
# ViGent
|
||||
# ViGent - 数字人口播视频生成系统
|
||||
|
||||
基于 **MuseTalk + EdgeTTS** 的开源数字人口播视频生成系统。
|
||||
|
||||
> 📹 上传静态人物视频 → 🎙️ 输入口播文案 → 🎬 自动生成唇形同步视频
|
||||
|
||||
---
|
||||
|
||||
## ✨ 功能特性
|
||||
|
||||
- 🎬 **唇形同步** - MuseTalk v1.5 驱动,AI 生成自然口型
|
||||
- 🎙️ **TTS 配音** - EdgeTTS 多音色支持(云溪、晓晓等)
|
||||
- 📱 **一键发布** - Playwright 自动发布到抖音、小红书、B站等
|
||||
- 🖥️ **Web UI** - Next.js 现代化界面
|
||||
|
||||
## 🛠️ 技术栈
|
||||
|
||||
| 模块 | 技术 |
|
||||
|------|------|
|
||||
| 前端 | Next.js 14 + TypeScript + TailwindCSS |
|
||||
| 后端 | FastAPI + Python 3.10 |
|
||||
| 唇形同步 | MuseTalk v1.5 (GPU) |
|
||||
| TTS | EdgeTTS |
|
||||
| 视频处理 | FFmpeg |
|
||||
| 自动发布 | Playwright |
|
||||
|
||||
---
|
||||
|
||||
## 📂 项目结构
|
||||
|
||||
```
|
||||
ViGent/
|
||||
├── backend/ # FastAPI 后端
|
||||
│ ├── app/
|
||||
│ │ ├── api/ # API 路由
|
||||
│ │ ├── services/ # 核心服务 (TTS, LipSync, Video)
|
||||
│ │ └── core/ # 配置
|
||||
│ ├── requirements.txt
|
||||
│ └── .env.example
|
||||
├── frontend/ # Next.js 前端
|
||||
│ └── src/app/
|
||||
├── models/ # AI 模型
|
||||
│ └── MuseTalk/ # 唇形同步模型
|
||||
│ └── DEPLOY.md # MuseTalk 部署指南
|
||||
└── Docs/ # 文档
|
||||
├── task_complete.md
|
||||
└── DevLogs/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 快速开始
|
||||
|
||||
### 1. 克隆项目
|
||||
|
||||
```bash
|
||||
git clone <仓库地址> /home/rongye/ProgramFiles/ViGent
|
||||
cd /home/rongye/ProgramFiles/ViGent
|
||||
```
|
||||
|
||||
### 2. 安装后端
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
python -m venv venv
|
||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
### 3. 安装前端
|
||||
|
||||
```bash
|
||||
cd frontend
|
||||
npm install
|
||||
```
|
||||
|
||||
### 4. 安装 MuseTalk (服务器)
|
||||
|
||||
详见 [models/MuseTalk/DEPLOY.md](models/MuseTalk/DEPLOY.md)
|
||||
|
||||
```bash
|
||||
cd models/MuseTalk
|
||||
# 按照 DEPLOY.md 步骤安装
|
||||
```
|
||||
|
||||
### 5. 启动服务
|
||||
|
||||
```bash
|
||||
# 终端 1: 后端 (端口 8000)
|
||||
cd backend && source venv/bin/activate
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
|
||||
# 终端 2: 前端 (端口 3000)
|
||||
cd frontend
|
||||
npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ 服务器配置
|
||||
|
||||
**目标服务器**: Dell PowerEdge R730
|
||||
|
||||
| 配置 | 规格 |
|
||||
|------|------|
|
||||
| CPU | 2× Intel Xeon E5-2680 v4 (56 线程) |
|
||||
| 内存 | 192GB DDR4 |
|
||||
| GPU | 2× NVIDIA RTX 3090 24GB |
|
||||
| 存储 | 4.47TB |
|
||||
|
||||
**GPU 分配**:
|
||||
- GPU 0: 其他服务
|
||||
- GPU 1: MuseTalk 唇形同步
|
||||
|
||||
---
|
||||
|
||||
## 🌐 访问地址
|
||||
|
||||
| 服务 | 地址 |
|
||||
|------|------|
|
||||
| 视频生成 | http://服务器IP:3000 |
|
||||
| 发布管理 | http://服务器IP:3000/publish |
|
||||
| API 文档 | http://服务器IP:8000/docs |
|
||||
|
||||
---
|
||||
|
||||
## 📖 文档
|
||||
|
||||
- [MuseTalk 部署指南](models/MuseTalk/DEPLOY.md)
|
||||
- [开发日志](Docs/DevLogs/)
|
||||
- [任务进度](Docs/task_complete.md)
|
||||
|
||||
---
|
||||
|
||||
## 📄 License
|
||||
|
||||
MIT
|
||||
|
||||
39
backend/.env.example
Normal file
39
backend/.env.example
Normal file
@@ -0,0 +1,39 @@
|
||||
# ViGent 环境配置示例
|
||||
# 复制此文件为 .env 并填入实际值
|
||||
|
||||
# 调试模式
|
||||
DEBUG=true
|
||||
|
||||
# Redis 配置 (Celery 任务队列)
|
||||
REDIS_URL=redis://localhost:6379/0
|
||||
|
||||
# =============== TTS 配置 ===============
|
||||
# 默认 TTS 音色
|
||||
DEFAULT_TTS_VOICE=zh-CN-YunxiNeural
|
||||
|
||||
# =============== MuseTalk 配置 ===============
|
||||
# GPU 选择 (0=第一块GPU, 1=第二块GPU)
|
||||
MUSETALK_GPU_ID=1
|
||||
|
||||
# 使用本地模式 (true) 或远程 API (false)
|
||||
MUSETALK_LOCAL=true
|
||||
|
||||
# 远程 API 地址 (仅 MUSETALK_LOCAL=false 时使用)
|
||||
# MUSETALK_API_URL=http://localhost:8001
|
||||
|
||||
# 模型版本 (v1 或 v15,推荐 v15)
|
||||
MUSETALK_VERSION=v15
|
||||
|
||||
# 推理批次大小 (根据 GPU 显存调整,RTX 3090 可用 8-16)
|
||||
MUSETALK_BATCH_SIZE=8
|
||||
|
||||
# 使用半精度加速 (推荐开启,减少显存占用)
|
||||
MUSETALK_USE_FLOAT16=true
|
||||
|
||||
# =============== 上传配置 ===============
|
||||
# 最大上传文件大小 (MB)
|
||||
MAX_UPLOAD_SIZE_MB=500
|
||||
|
||||
# =============== FFmpeg 配置 ===============
|
||||
# FFmpeg 路径 (如果不在系统 PATH 中)
|
||||
# FFMPEG_PATH=/usr/bin/ffmpeg
|
||||
0
backend/app/__init__.py
Normal file
0
backend/app/__init__.py
Normal file
0
backend/app/api/__init__.py
Normal file
0
backend/app/api/__init__.py
Normal file
53
backend/app/api/materials.py
Normal file
53
backend/app/api/materials.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException
|
||||
from app.core.config import settings
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("/")
|
||||
async def upload_material(file: UploadFile = File(...)):
|
||||
if not file.filename.lower().endswith(('.mp4', '.mov', '.avi')):
|
||||
raise HTTPException(400, "Invalid format")
|
||||
|
||||
file_id = str(uuid.uuid4())
|
||||
ext = Path(file.filename).suffix
|
||||
save_path = settings.UPLOAD_DIR / "materials" / f"{file_id}{ext}"
|
||||
|
||||
# Save file
|
||||
with open(save_path, "wb") as buffer:
|
||||
shutil.copyfileobj(file.file, buffer)
|
||||
|
||||
# Calculate size
|
||||
size_mb = save_path.stat().st_size / (1024 * 1024)
|
||||
|
||||
return {
|
||||
"id": file_id,
|
||||
"name": file.filename,
|
||||
"path": f"uploads/materials/{file_id}{ext}",
|
||||
"size_mb": size_mb,
|
||||
"type": "video"
|
||||
}
|
||||
|
||||
@router.get("/")
|
||||
async def list_materials():
|
||||
materials_dir = settings.UPLOAD_DIR / "materials"
|
||||
files = []
|
||||
if materials_dir.exists():
|
||||
for f in materials_dir.glob("*"):
|
||||
try:
|
||||
stat = f.stat()
|
||||
files.append({
|
||||
"id": f.stem,
|
||||
"name": f.name,
|
||||
"path": f"uploads/materials/{f.name}",
|
||||
"size_mb": stat.st_size / (1024 * 1024),
|
||||
"type": "video",
|
||||
"created_at": stat.st_ctime
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
# Sort by creation time desc
|
||||
files.sort(key=lambda x: x.get("created_at", 0), reverse=True)
|
||||
return {"materials": files}
|
||||
59
backend/app/api/publish.py
Normal file
59
backend/app/api/publish.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
发布管理 API
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
from app.services.publish_service import PublishService
|
||||
|
||||
router = APIRouter()
|
||||
publish_service = PublishService()
|
||||
|
||||
class PublishRequest(BaseModel):
|
||||
video_path: str
|
||||
platform: str
|
||||
title: str
|
||||
tags: List[str] = []
|
||||
description: str = ""
|
||||
publish_time: Optional[datetime] = None
|
||||
|
||||
class PublishResponse(BaseModel):
|
||||
success: bool
|
||||
message: str
|
||||
platform: str
|
||||
url: Optional[str] = None
|
||||
|
||||
@router.post("/", response_model=PublishResponse)
|
||||
async def publish_video(request: PublishRequest, background_tasks: BackgroundTasks):
|
||||
try:
|
||||
result = await publish_service.publish(
|
||||
video_path=request.video_path,
|
||||
platform=request.platform,
|
||||
title=request.title,
|
||||
tags=request.tags,
|
||||
description=request.description,
|
||||
publish_time=request.publish_time
|
||||
)
|
||||
return PublishResponse(
|
||||
success=result.get("success", False),
|
||||
message=result.get("message", ""),
|
||||
platform=request.platform,
|
||||
url=result.get("url")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"发布失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.get("/platforms")
|
||||
async def list_platforms():
|
||||
return {"platforms": [{"id": pid, **pinfo} for pid, pinfo in publish_service.PLATFORMS.items()]}
|
||||
|
||||
@router.get("/accounts")
|
||||
async def list_accounts():
|
||||
return {"accounts": publish_service.get_accounts()}
|
||||
|
||||
@router.post("/login/{platform}")
|
||||
async def login_platform(platform: str):
|
||||
return await publish_service.login(platform)
|
||||
85
backend/app/api/videos.py
Normal file
85
backend/app/api/videos.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import traceback
|
||||
from app.services.tts_service import TTSService
|
||||
from app.services.video_service import VideoService
|
||||
from app.services.lipsync_service import LipSyncService
|
||||
from app.core.config import settings
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
text: str
|
||||
voice: str = "zh-CN-YunxiNeural"
|
||||
material_path: str
|
||||
|
||||
tasks = {} # In-memory task store
|
||||
|
||||
async def _process_video_generation(task_id: str, req: GenerateRequest):
|
||||
try:
|
||||
# Resolve path if it's relative
|
||||
input_material_path = Path(req.material_path)
|
||||
if not input_material_path.is_absolute():
|
||||
input_material_path = settings.BASE_DIR.parent / req.material_path
|
||||
|
||||
tasks[task_id]["status"] = "processing"
|
||||
tasks[task_id]["progress"] = 5
|
||||
tasks[task_id]["message"] = "Initializing generation..."
|
||||
|
||||
# 1. TTS
|
||||
tasks[task_id]["message"] = "Generating Audio (TTS)..."
|
||||
tts = TTSService()
|
||||
audio_path = settings.OUTPUT_DIR / f"{task_id}_audio.mp3"
|
||||
await tts.generate_audio(req.text, req.voice, str(audio_path))
|
||||
|
||||
tasks[task_id]["progress"] = 30
|
||||
|
||||
# 2. LipSync
|
||||
tasks[task_id]["message"] = "Synthesizing Video (MuseTalk)..."
|
||||
lipsync = LipSyncService()
|
||||
lipsync_video_path = settings.OUTPUT_DIR / f"{task_id}_lipsync.mp4"
|
||||
|
||||
# Check health and generate
|
||||
if await lipsync.check_health():
|
||||
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
|
||||
else:
|
||||
# Skip lipsync if not available
|
||||
import shutil
|
||||
shutil.copy(str(input_material_path), lipsync_video_path)
|
||||
|
||||
tasks[task_id]["progress"] = 80
|
||||
|
||||
# 3. Composition
|
||||
tasks[task_id]["message"] = "Final compositing..."
|
||||
video = VideoService()
|
||||
final_output = settings.OUTPUT_DIR / f"{task_id}_output.mp4"
|
||||
await video.compose(str(lipsync_video_path), str(audio_path), str(final_output))
|
||||
|
||||
tasks[task_id]["status"] = "completed"
|
||||
tasks[task_id]["progress"] = 100
|
||||
tasks[task_id]["message"] = "Generation Complete!"
|
||||
tasks[task_id]["output"] = str(final_output)
|
||||
tasks[task_id]["download_url"] = f"/outputs/{final_output.name}"
|
||||
|
||||
except Exception as e:
|
||||
tasks[task_id]["status"] = "failed"
|
||||
tasks[task_id]["message"] = f"Error: {str(e)}"
|
||||
tasks[task_id]["error"] = traceback.format_exc()
|
||||
|
||||
@router.post("/generate")
|
||||
async def generate_video(req: GenerateRequest, background_tasks: BackgroundTasks):
|
||||
task_id = str(uuid.uuid4())
|
||||
tasks[task_id] = {"status": "pending", "task_id": task_id}
|
||||
background_tasks.add_task(_process_video_generation, task_id, req)
|
||||
return {"task_id": task_id}
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
async def get_task(task_id: str):
|
||||
return tasks.get(task_id, {"status": "not_found"})
|
||||
|
||||
@router.get("/tasks")
|
||||
async def list_tasks():
|
||||
return {"tasks": list(tasks.values())}
|
||||
0
backend/app/core/__init__.py
Normal file
0
backend/app/core/__init__.py
Normal file
36
backend/app/core/config.py
Normal file
36
backend/app/core/config.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# 基础路径配置
|
||||
BASE_DIR: Path = Path(__file__).resolve().parent.parent
|
||||
UPLOAD_DIR: Path = BASE_DIR.parent / "uploads"
|
||||
OUTPUT_DIR: Path = BASE_DIR.parent / "outputs"
|
||||
|
||||
# 数据库/缓存
|
||||
REDIS_URL: str = "redis://localhost:6379/0"
|
||||
DEBUG: bool = True
|
||||
|
||||
# TTS 配置
|
||||
DEFAULT_TTS_VOICE: str = "zh-CN-YunxiNeural"
|
||||
MAX_UPLOAD_SIZE_MB: int = 500
|
||||
|
||||
# MuseTalk 配置
|
||||
MUSETALK_GPU_ID: int = 1 # GPU ID (默认使用 GPU1)
|
||||
MUSETALK_LOCAL: bool = True # 使用本地推理 (False 则使用远程 API)
|
||||
MUSETALK_API_URL: str = "http://localhost:8001" # 远程 API 地址
|
||||
MUSETALK_VERSION: Literal["v1", "v15"] = "v15" # 模型版本
|
||||
MUSETALK_BATCH_SIZE: int = 8 # 推理批次大小
|
||||
MUSETALK_USE_FLOAT16: bool = True # 使用半精度加速
|
||||
|
||||
@property
|
||||
def MUSETALK_DIR(self) -> Path:
|
||||
"""MuseTalk 目录路径 (动态计算)"""
|
||||
return self.BASE_DIR.parent.parent / "models" / "MuseTalk"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
extra = "ignore" # 忽略未知的环境变量
|
||||
|
||||
settings = Settings()
|
||||
32
backend/app/main.py
Normal file
32
backend/app/main.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from app.core import config
|
||||
from app.api import materials, videos, publish
|
||||
|
||||
settings = config.settings
|
||||
|
||||
app = FastAPI(title="ViGent TalkingHead Agent")
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Create dirs
|
||||
settings.UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
||||
settings.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
(settings.UPLOAD_DIR / "materials").mkdir(exist_ok=True)
|
||||
|
||||
app.mount("/outputs", StaticFiles(directory=str(settings.OUTPUT_DIR)), name="outputs")
|
||||
|
||||
app.include_router(materials.router, prefix="/api/materials", tags=["Materials"])
|
||||
app.include_router(videos.router, prefix="/api/videos", tags=["Videos"])
|
||||
app.include_router(publish.router, prefix="/api/publish", tags=["Publish"])
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
0
backend/app/services/__init__.py
Normal file
0
backend/app/services/__init__.py
Normal file
448
backend/app/services/lipsync_service.py
Normal file
448
backend/app/services/lipsync_service.py
Normal file
@@ -0,0 +1,448 @@
|
||||
"""
|
||||
唇形同步服务
|
||||
支持本地 MuseTalk 推理 (Python API) 或远程 MuseTalk API
|
||||
配置为使用 GPU1 (CUDA:1)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import httpx
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from typing import Optional, Any
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
# 设置 MuseTalk 使用 GPU1 (在导入 torch 之前设置)
|
||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(settings.MUSETALK_GPU_ID))
|
||||
|
||||
|
||||
class LipSyncService:
|
||||
"""唇形同步服务 - MuseTalk 集成"""
|
||||
|
||||
def __init__(self):
|
||||
self.use_local = settings.MUSETALK_LOCAL
|
||||
self.api_url = settings.MUSETALK_API_URL
|
||||
self.version = settings.MUSETALK_VERSION
|
||||
self.musetalk_dir = settings.MUSETALK_DIR
|
||||
|
||||
# 模型相关 (懒加载)
|
||||
self._model_loaded = False
|
||||
self._vae = None
|
||||
self._unet = None
|
||||
self._pe = None
|
||||
self._whisper = None
|
||||
self._audio_processor = None
|
||||
self._face_parser = None
|
||||
self._device = None
|
||||
|
||||
# 运行时检测
|
||||
self._gpu_available: Optional[bool] = None
|
||||
self._weights_available: Optional[bool] = None
|
||||
|
||||
def _check_gpu(self) -> bool:
|
||||
"""检查 GPU 是否可用"""
|
||||
if self._gpu_available is not None:
|
||||
return self._gpu_available
|
||||
|
||||
try:
|
||||
import torch
|
||||
self._gpu_available = torch.cuda.is_available()
|
||||
if self._gpu_available:
|
||||
device_name = torch.cuda.get_device_name(0)
|
||||
logger.info(f"✅ GPU 可用: {device_name}")
|
||||
else:
|
||||
logger.warning("⚠️ GPU 不可用,将使用 Fallback 模式")
|
||||
except ImportError:
|
||||
self._gpu_available = False
|
||||
logger.warning("⚠️ PyTorch 未安装,将使用 Fallback 模式")
|
||||
|
||||
return self._gpu_available
|
||||
|
||||
def _check_weights(self) -> bool:
|
||||
"""检查模型权重是否存在"""
|
||||
if self._weights_available is not None:
|
||||
return self._weights_available
|
||||
|
||||
# 检查关键权重文件
|
||||
required_dirs = [
|
||||
self.musetalk_dir / "models" / "musetalkV15",
|
||||
self.musetalk_dir / "models" / "whisper",
|
||||
]
|
||||
|
||||
self._weights_available = all(d.exists() for d in required_dirs)
|
||||
|
||||
if self._weights_available:
|
||||
logger.info("✅ MuseTalk 权重文件已就绪")
|
||||
else:
|
||||
missing = [str(d) for d in required_dirs if not d.exists()]
|
||||
logger.warning(f"⚠️ 缺少权重文件: {missing}")
|
||||
|
||||
return self._weights_available
|
||||
|
||||
def _load_models(self):
|
||||
"""懒加载 MuseTalk 模型 (Python API 方式)"""
|
||||
if self._model_loaded:
|
||||
return True
|
||||
|
||||
if not self._check_gpu() or not self._check_weights():
|
||||
return False
|
||||
|
||||
logger.info("🔄 加载 MuseTalk 模型到 GPU...")
|
||||
|
||||
try:
|
||||
# 添加 MuseTalk 到 Python 路径
|
||||
if str(self.musetalk_dir) not in sys.path:
|
||||
sys.path.insert(0, str(self.musetalk_dir))
|
||||
logger.debug(f"Added to sys.path: {self.musetalk_dir}")
|
||||
|
||||
import torch
|
||||
from omegaconf import OmegaConf
|
||||
from transformers import WhisperModel
|
||||
|
||||
# 导入 MuseTalk 模块
|
||||
from musetalk.utils.utils import load_all_model
|
||||
from musetalk.utils.audio_processor import AudioProcessor
|
||||
from musetalk.utils.face_parsing import FaceParsing
|
||||
|
||||
# 设置设备 (CUDA_VISIBLE_DEVICES=1 后,可见设备变为 cuda:0)
|
||||
self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# 加载模型
|
||||
unet_model_path = str(self.musetalk_dir / "models" / "musetalkV15" / "unet.pth")
|
||||
unet_config = str(self.musetalk_dir / "models" / "musetalk" / "config.json")
|
||||
whisper_dir = str(self.musetalk_dir / "models" / "whisper")
|
||||
|
||||
self._vae, self._unet, self._pe = load_all_model(
|
||||
unet_model_path=unet_model_path,
|
||||
vae_type="sd-vae",
|
||||
unet_config=unet_config,
|
||||
device=self._device
|
||||
)
|
||||
|
||||
# 使用半精度加速
|
||||
if settings.MUSETALK_USE_FLOAT16:
|
||||
self._pe = self._pe.half()
|
||||
self._vae.vae = self._vae.vae.half()
|
||||
self._unet.model = self._unet.model.half()
|
||||
|
||||
# 移动到 GPU
|
||||
self._pe = self._pe.to(self._device)
|
||||
self._vae.vae = self._vae.vae.to(self._device)
|
||||
self._unet.model = self._unet.model.to(self._device)
|
||||
|
||||
# 加载 Whisper
|
||||
weight_dtype = self._unet.model.dtype
|
||||
self._whisper = WhisperModel.from_pretrained(whisper_dir)
|
||||
self._whisper = self._whisper.to(device=self._device, dtype=weight_dtype).eval()
|
||||
self._whisper.requires_grad_(False)
|
||||
|
||||
# 音频处理器
|
||||
self._audio_processor = AudioProcessor(feature_extractor_path=whisper_dir)
|
||||
|
||||
# 人脸解析器 (v15 版本支持更多参数)
|
||||
if self.version == "v15":
|
||||
self._face_parser = FaceParsing(
|
||||
left_cheek_width=90,
|
||||
right_cheek_width=90
|
||||
)
|
||||
else:
|
||||
self._face_parser = FaceParsing()
|
||||
|
||||
self._model_loaded = True
|
||||
logger.info("✅ MuseTalk 模型加载完成")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ MuseTalk 模型加载失败: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return False
|
||||
|
||||
async def generate(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
fps: int = 25
|
||||
) -> str:
|
||||
"""生成唇形同步视频"""
|
||||
logger.info(f"🎬 唇形同步任务: {Path(video_path).name} + {Path(audio_path).name}")
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 决定使用哪种模式
|
||||
if self.use_local:
|
||||
if self._load_models():
|
||||
return await self._local_generate_api(video_path, audio_path, output_path, fps)
|
||||
else:
|
||||
logger.warning("⚠️ 本地推理失败,尝试 subprocess 方式")
|
||||
return await self._local_generate_subprocess(video_path, audio_path, output_path, fps)
|
||||
else:
|
||||
return await self._remote_generate(video_path, audio_path, output_path, fps)
|
||||
|
||||
async def _local_generate_api(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
fps: int
|
||||
) -> str:
|
||||
"""使用 Python API 进行本地推理"""
|
||||
import torch
|
||||
import cv2
|
||||
import copy
|
||||
import glob
|
||||
import pickle
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from musetalk.utils.utils import get_file_type, get_video_fps, datagen
|
||||
from musetalk.utils.preprocessing import get_landmark_and_bbox, read_imgs, coord_placeholder
|
||||
from musetalk.utils.blending import get_image
|
||||
|
||||
logger.info("🔄 开始 MuseTalk 推理 (Python API)...")
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
result_img_dir = tmpdir / "frames"
|
||||
result_img_dir.mkdir()
|
||||
|
||||
# 1. 提取视频帧
|
||||
logger.info("📹 提取视频帧...")
|
||||
if get_file_type(video_path) == "video":
|
||||
frames_dir = tmpdir / "input_frames"
|
||||
frames_dir.mkdir()
|
||||
cmd = f'ffmpeg -v fatal -i "{video_path}" -start_number 0 "{frames_dir}/%08d.png"'
|
||||
subprocess.run(cmd, shell=True, check=True)
|
||||
input_img_list = sorted(glob.glob(str(frames_dir / "*.png")))
|
||||
video_fps = get_video_fps(video_path)
|
||||
else:
|
||||
input_img_list = [video_path]
|
||||
video_fps = fps
|
||||
|
||||
# 2. 提取音频特征
|
||||
logger.info("🎵 提取音频特征...")
|
||||
whisper_input_features, librosa_length = self._audio_processor.get_audio_feature(audio_path)
|
||||
weight_dtype = self._unet.model.dtype
|
||||
whisper_chunks = self._audio_processor.get_whisper_chunk(
|
||||
whisper_input_features,
|
||||
self._device,
|
||||
weight_dtype,
|
||||
self._whisper,
|
||||
librosa_length,
|
||||
fps=video_fps,
|
||||
audio_padding_length_left=2,
|
||||
audio_padding_length_right=2,
|
||||
)
|
||||
|
||||
# 3. 预处理图像
|
||||
logger.info("🧑 检测人脸关键点...")
|
||||
coord_list, frame_list = get_landmark_and_bbox(input_img_list, bbox_shift=0)
|
||||
|
||||
# 4. 编码潜在表示
|
||||
logger.info("🔢 编码图像潜在表示...")
|
||||
input_latent_list = []
|
||||
for bbox, frame in zip(coord_list, frame_list):
|
||||
if bbox == coord_placeholder:
|
||||
continue
|
||||
x1, y1, x2, y2 = bbox
|
||||
if self.version == "v15":
|
||||
y2 = min(y2 + 10, frame.shape[0])
|
||||
crop_frame = frame[y1:y2, x1:x2]
|
||||
crop_frame = cv2.resize(crop_frame, (256, 256), interpolation=cv2.INTER_LANCZOS4)
|
||||
latents = self._vae.get_latents_for_unet(crop_frame)
|
||||
input_latent_list.append(latents)
|
||||
|
||||
# 循环帧列表
|
||||
frame_list_cycle = frame_list + frame_list[::-1]
|
||||
coord_list_cycle = coord_list + coord_list[::-1]
|
||||
input_latent_list_cycle = input_latent_list + input_latent_list[::-1]
|
||||
|
||||
# 5. 批量推理
|
||||
logger.info("🤖 执行 MuseTalk 推理...")
|
||||
timesteps = torch.tensor([0], device=self._device)
|
||||
batch_size = settings.MUSETALK_BATCH_SIZE
|
||||
video_num = len(whisper_chunks)
|
||||
|
||||
gen = datagen(
|
||||
whisper_chunks=whisper_chunks,
|
||||
vae_encode_latents=input_latent_list_cycle,
|
||||
batch_size=batch_size,
|
||||
delay_frame=0,
|
||||
device=self._device,
|
||||
)
|
||||
|
||||
res_frame_list = []
|
||||
total = int(np.ceil(float(video_num) / batch_size))
|
||||
|
||||
with torch.no_grad():
|
||||
for i, (whisper_batch, latent_batch) in enumerate(tqdm(gen, total=total, desc="推理")):
|
||||
audio_feature_batch = self._pe(whisper_batch)
|
||||
latent_batch = latent_batch.to(dtype=self._unet.model.dtype)
|
||||
pred_latents = self._unet.model(
|
||||
latent_batch, timesteps, encoder_hidden_states=audio_feature_batch
|
||||
).sample
|
||||
recon = self._vae.decode_latents(pred_latents)
|
||||
for res_frame in recon:
|
||||
res_frame_list.append(res_frame)
|
||||
|
||||
# 6. 合成结果帧
|
||||
logger.info("🖼️ 合成结果帧...")
|
||||
for i, res_frame in enumerate(tqdm(res_frame_list, desc="合成")):
|
||||
bbox = coord_list_cycle[i % len(coord_list_cycle)]
|
||||
ori_frame = copy.deepcopy(frame_list_cycle[i % len(frame_list_cycle)])
|
||||
x1, y1, x2, y2 = bbox
|
||||
|
||||
if self.version == "v15":
|
||||
y2 = min(y2 + 10, ori_frame.shape[0])
|
||||
|
||||
try:
|
||||
res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
|
||||
except:
|
||||
continue
|
||||
|
||||
if self.version == "v15":
|
||||
combine_frame = get_image(
|
||||
ori_frame, res_frame, [x1, y1, x2, y2],
|
||||
mode="jaw", fp=self._face_parser
|
||||
)
|
||||
else:
|
||||
combine_frame = get_image(ori_frame, res_frame, [x1, y1, x2, y2], fp=self._face_parser)
|
||||
|
||||
cv2.imwrite(str(result_img_dir / f"{i:08d}.png"), combine_frame)
|
||||
|
||||
# 7. 合成视频
|
||||
logger.info("🎬 合成最终视频...")
|
||||
temp_video = tmpdir / "temp_video.mp4"
|
||||
cmd_video = f'ffmpeg -y -v warning -r {video_fps} -f image2 -i "{result_img_dir}/%08d.png" -vcodec libx264 -vf format=yuv420p -crf 18 "{temp_video}"'
|
||||
subprocess.run(cmd_video, shell=True, check=True)
|
||||
|
||||
# 8. 添加音频
|
||||
cmd_audio = f'ffmpeg -y -v warning -i "{audio_path}" -i "{temp_video}" -c:v copy -c:a aac -shortest "{output_path}"'
|
||||
subprocess.run(cmd_audio, shell=True, check=True)
|
||||
|
||||
logger.info(f"✅ 唇形同步完成: {output_path}")
|
||||
return output_path
|
||||
|
||||
async def _local_generate_subprocess(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
fps: int
|
||||
) -> str:
|
||||
"""使用 subprocess 调用 MuseTalk CLI"""
|
||||
logger.info("🔄 使用 subprocess 调用 MuseTalk...")
|
||||
|
||||
# 如果权重不存在,直接 fallback
|
||||
if not self._check_weights():
|
||||
logger.warning("⚠️ 权重不存在,使用 Fallback 模式")
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# 创建临时配置文件
|
||||
config_path = Path(tmpdir) / "inference_config.yaml"
|
||||
config_content = f"""
|
||||
task1:
|
||||
video_path: "{video_path}"
|
||||
audio_path: "{audio_path}"
|
||||
result_name: "output.mp4"
|
||||
"""
|
||||
config_path.write_text(config_content)
|
||||
|
||||
result_dir = Path(tmpdir) / "results"
|
||||
result_dir.mkdir()
|
||||
|
||||
cmd = [
|
||||
sys.executable, "-m", "scripts.inference",
|
||||
"--version", self.version,
|
||||
"--inference_config", str(config_path),
|
||||
"--result_dir", str(result_dir),
|
||||
"--gpu_id", "0", # 因为 CUDA_VISIBLE_DEVICES 已设置
|
||||
]
|
||||
|
||||
if settings.MUSETALK_USE_FLOAT16:
|
||||
cmd.append("--use_float16")
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(self.musetalk_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env={**os.environ, "CUDA_VISIBLE_DEVICES": str(settings.MUSETALK_GPU_ID)}
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"MuseTalk CLI 失败: {result.stderr}")
|
||||
# Fallback
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
# 查找输出文件
|
||||
output_files = list(result_dir.rglob("*.mp4"))
|
||||
if output_files:
|
||||
shutil.copy(output_files[0], output_path)
|
||||
logger.info(f"✅ 唇形同步完成: {output_path}")
|
||||
else:
|
||||
logger.warning("⚠️ 未找到输出文件,使用 Fallback")
|
||||
shutil.copy(video_path, output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
async def _remote_generate(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
fps: int
|
||||
) -> str:
|
||||
"""调用远程 MuseTalk API 服务"""
|
||||
logger.info(f"📡 调用远程 API: {self.api_url}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
# 上传文件
|
||||
with open(video_path, "rb") as vf, open(audio_path, "rb") as af:
|
||||
files = {
|
||||
"video": (Path(video_path).name, vf, "video/mp4"),
|
||||
"audio": (Path(audio_path).name, af, "audio/mpeg"),
|
||||
}
|
||||
data = {"fps": fps}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.api_url}/lipsync",
|
||||
files=files,
|
||||
data=data
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存响应视频
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
logger.info(f"✅ 远程推理完成: {output_path}")
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError(f"API 错误: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"远程 API 调用失败: {e}")
|
||||
# Fallback
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
async def check_health(self) -> bool:
|
||||
"""健康检查"""
|
||||
if self.use_local:
|
||||
gpu_ok = self._check_gpu()
|
||||
weights_ok = self._check_weights()
|
||||
return gpu_ok and weights_ok
|
||||
else:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{self.api_url}/health")
|
||||
return response.status_code == 200
|
||||
except:
|
||||
return False
|
||||
71
backend/app/services/publish_service.py
Normal file
71
backend/app/services/publish_service.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
发布服务 (Playwright)
|
||||
"""
|
||||
from playwright.async_api import async_playwright
|
||||
from pathlib import Path
|
||||
import json
|
||||
import asyncio
|
||||
from loguru import logger
|
||||
from app.core.config import settings
|
||||
|
||||
class PublishService:
|
||||
PLATFORMS = {
|
||||
"douyin": {"name": "抖音", "url": "https://creator.douyin.com/"},
|
||||
"xiaohongshu": {"name": "小红书", "url": "https://creator.xiaohongshu.com/"},
|
||||
"weixin": {"name": "微信视频号", "url": "https://channels.weixin.qq.com/"},
|
||||
"kuaishou": {"name": "快手", "url": "https://cp.kuaishou.com/"},
|
||||
"bilibili": {"name": "B站", "url": "https://member.bilibili.com/platform/upload/video/frame"},
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.cookies_dir = settings.BASE_DIR / "cookies"
|
||||
self.cookies_dir.mkdir(exist_ok=True)
|
||||
|
||||
def get_accounts(self):
|
||||
accounts = []
|
||||
for pid, pinfo in self.PLATFORMS.items():
|
||||
cookie_file = self.cookies_dir / f"{pid}_cookies.json"
|
||||
accounts.append({
|
||||
"platform": pid,
|
||||
"name": pinfo["name"],
|
||||
"logged_in": cookie_file.exists(),
|
||||
"enabled": True
|
||||
})
|
||||
return accounts
|
||||
|
||||
async def login(self, platform: str):
|
||||
if platform not in self.PLATFORMS:
|
||||
raise ValueError("Unsupported platform")
|
||||
|
||||
pinfo = self.PLATFORMS[platform]
|
||||
logger.info(f"Logging in to {platform}...")
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
await page.goto(pinfo["url"])
|
||||
logger.info("Please login manually in the browser window...")
|
||||
|
||||
# Wait for user input (naive check via title or url change, or explicit timeout)
|
||||
# For simplicity in restore, wait for 60s or until manually closed?
|
||||
# In a real API, this blocks.
|
||||
# We implemented a simplistic wait in the previous iteration.
|
||||
try:
|
||||
await page.wait_for_timeout(45000) # Give user 45s to login
|
||||
cookies = await context.cookies()
|
||||
cookie_path = self.cookies_dir / f"{platform}_cookies.json"
|
||||
with open(cookie_path, "w") as f:
|
||||
json.dump(cookies, f)
|
||||
return {"success": True, "message": f"Login {platform} successful"}
|
||||
except Exception as e:
|
||||
return {"success": False, "message": str(e)}
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
async def publish(self, video_path: str, platform: str, title: str, **kwargs):
|
||||
# Placeholder for actual automation logic
|
||||
# Real implementation requires complex selectors per platform
|
||||
await asyncio.sleep(2)
|
||||
return {"success": True, "message": f"Published to {platform} (Mock)", "url": ""}
|
||||
33
backend/app/services/tts_service.py
Normal file
33
backend/app/services/tts_service.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""
|
||||
TTS 服务 (EdgeTTS)
|
||||
"""
|
||||
import edge_tts
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
class TTSService:
|
||||
VOICES = {
|
||||
"zh-CN-YunxiNeural": "云希 (男, 轻松)",
|
||||
"zh-CN-YunjianNeural": "云健 (男, 体育)",
|
||||
"zh-CN-YunyangNeural": "云扬 (男, 专业)",
|
||||
"zh-CN-XiaoxiaoNeural": "晓晓 (女, 活泼)",
|
||||
"zh-CN-XiaoyiNeural": "晓伊 (女, 卡通)",
|
||||
}
|
||||
|
||||
async def generate_audio(self, text: str, voice: str, output_path: str) -> str:
|
||||
"""生成语音"""
|
||||
logger.info(f"TTS Generating: {text[:20]}... ({voice})")
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
communicate = edge_tts.Communicate(text, voice)
|
||||
await communicate.save(output_path)
|
||||
# Create SUBTITLES (vtt -> srt conversion logic omitted for brevity in restore)
|
||||
return output_path
|
||||
except Exception as e:
|
||||
logger.error(f"TTS Failed: {e}")
|
||||
raise
|
||||
|
||||
async def list_voices(self):
|
||||
return [{"id": k, "name": v} for k, v in self.VOICES.items()]
|
||||
95
backend/app/services/video_service.py
Normal file
95
backend/app/services/video_service.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
视频合成服务
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from typing import Optional
|
||||
|
||||
class VideoService:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def _run_ffmpeg(self, cmd: list) -> bool:
|
||||
cmd_str = ' '.join(f'"{c}"' if ' ' in c or '\\' in c else c for c in cmd)
|
||||
logger.debug(f"FFmpeg CMD: {cmd_str}")
|
||||
try:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
result = subprocess.run(
|
||||
cmd_str,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"FFmpeg Error: {result.stderr}")
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg Exception: {e}")
|
||||
return False
|
||||
|
||||
def _get_duration(self, file_path: str) -> float:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "{file_path}"'
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
async def compose(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
subtitle_path: Optional[str] = None
|
||||
) -> str:
|
||||
"""合成视频"""
|
||||
# Ensure output dir
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_duration = self._get_duration(video_path)
|
||||
audio_duration = self._get_duration(audio_path)
|
||||
|
||||
# Audio loop if needed
|
||||
loop_count = 1
|
||||
if audio_duration > video_duration and video_duration > 0:
|
||||
loop_count = int(audio_duration / video_duration) + 1
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
|
||||
# Input video (stream_loop must be before -i)
|
||||
if loop_count > 1:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
cmd.extend(["-i", video_path])
|
||||
|
||||
# Input audio
|
||||
cmd.extend(["-i", audio_path])
|
||||
|
||||
# Filter complex
|
||||
filter_complex = []
|
||||
|
||||
# Subtitles (skip for now to mimic previous state or implement basic)
|
||||
# Previous state: subtitles disabled due to font issues
|
||||
# if subtitle_path: ...
|
||||
|
||||
# Audio map
|
||||
cmd.extend(["-c:v", "libx264", "-c:a", "aac", "-shortest"])
|
||||
# Use audio from input 1
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError("FFmpeg composition failed")
|
||||
20
backend/requirements.txt
Normal file
20
backend/requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
# ViGent Backend 依赖
|
||||
# MuseTalk 依赖请参考: models/MuseTalk/DEPLOY.md
|
||||
|
||||
fastapi>=0.109.0
|
||||
uvicorn[standard]>=0.27.0
|
||||
python-multipart>=0.0.6
|
||||
pydantic>=2.5.3
|
||||
pydantic-settings>=2.1.0
|
||||
celery>=5.3.6
|
||||
redis>=5.0.1
|
||||
edge-tts>=6.1.9
|
||||
ffmpeg-python>=0.2.0
|
||||
httpx>=0.26.0
|
||||
aiofiles>=23.2.1
|
||||
sqlalchemy>=2.0.25
|
||||
aiosqlite>=0.19.0
|
||||
python-dotenv>=1.0.0
|
||||
loguru>=0.7.2
|
||||
playwright>=1.40.0
|
||||
requests>=2.31.0
|
||||
41
frontend/.gitignore
vendored
Normal file
41
frontend/.gitignore
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.*
|
||||
.yarn/*
|
||||
!.yarn/patches
|
||||
!.yarn/plugins
|
||||
!.yarn/releases
|
||||
!.yarn/versions
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# env files (can opt-in for committing if needed)
|
||||
.env*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
36
frontend/README.md
Normal file
36
frontend/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
||||
|
||||
## Getting Started
|
||||
|
||||
First, run the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
# or
|
||||
yarn dev
|
||||
# or
|
||||
pnpm dev
|
||||
# or
|
||||
bun dev
|
||||
```
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
||||
|
||||
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
||||
|
||||
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about Next.js, take a look at the following resources:
|
||||
|
||||
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
||||
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
||||
|
||||
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
||||
|
||||
## Deploy on Vercel
|
||||
|
||||
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
||||
|
||||
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
||||
18
frontend/eslint.config.mjs
Normal file
18
frontend/eslint.config.mjs
Normal file
@@ -0,0 +1,18 @@
|
||||
import { defineConfig, globalIgnores } from "eslint/config";
|
||||
import nextVitals from "eslint-config-next/core-web-vitals";
|
||||
import nextTs from "eslint-config-next/typescript";
|
||||
|
||||
const eslintConfig = defineConfig([
|
||||
...nextVitals,
|
||||
...nextTs,
|
||||
// Override default ignores of eslint-config-next.
|
||||
globalIgnores([
|
||||
// Default ignores of eslint-config-next:
|
||||
".next/**",
|
||||
"out/**",
|
||||
"build/**",
|
||||
"next-env.d.ts",
|
||||
]),
|
||||
]);
|
||||
|
||||
export default eslintConfig;
|
||||
15
frontend/next.config.ts
Normal file
15
frontend/next.config.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import type { NextConfig } from "next";
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
// 允许跨域请求后端 API
|
||||
async rewrites() {
|
||||
return [
|
||||
{
|
||||
source: '/api/:path*',
|
||||
destination: 'http://127.0.0.1:8000/api/:path*',
|
||||
},
|
||||
];
|
||||
},
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
6550
frontend/package-lock.json
generated
Normal file
6550
frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
26
frontend/package.json
Normal file
26
frontend/package.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "eslint"
|
||||
},
|
||||
"dependencies": {
|
||||
"next": "16.1.1",
|
||||
"react": "19.2.3",
|
||||
"react-dom": "19.2.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/postcss": "^4",
|
||||
"@types/node": "^20",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "16.1.1",
|
||||
"tailwindcss": "^4",
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
7
frontend/postcss.config.mjs
Normal file
7
frontend/postcss.config.mjs
Normal file
@@ -0,0 +1,7 @@
|
||||
const config = {
|
||||
plugins: {
|
||||
"@tailwindcss/postcss": {},
|
||||
},
|
||||
};
|
||||
|
||||
export default config;
|
||||
1
frontend/public/file.svg
Normal file
1
frontend/public/file.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
|
||||
|
After Width: | Height: | Size: 391 B |
1
frontend/public/globe.svg
Normal file
1
frontend/public/globe.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
|
||||
|
After Width: | Height: | Size: 1.0 KiB |
1
frontend/public/next.svg
Normal file
1
frontend/public/next.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
|
||||
|
After Width: | Height: | Size: 1.3 KiB |
1
frontend/public/vercel.svg
Normal file
1
frontend/public/vercel.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
|
||||
|
After Width: | Height: | Size: 128 B |
1
frontend/public/window.svg
Normal file
1
frontend/public/window.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
|
||||
|
After Width: | Height: | Size: 385 B |
BIN
frontend/src/app/favicon.ico
Normal file
BIN
frontend/src/app/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
26
frontend/src/app/globals.css
Normal file
26
frontend/src/app/globals.css
Normal file
@@ -0,0 +1,26 @@
|
||||
@import "tailwindcss";
|
||||
|
||||
:root {
|
||||
--background: #ffffff;
|
||||
--foreground: #171717;
|
||||
}
|
||||
|
||||
@theme inline {
|
||||
--color-background: var(--background);
|
||||
--color-foreground: var(--foreground);
|
||||
--font-sans: var(--font-geist-sans);
|
||||
--font-mono: var(--font-geist-mono);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--background: #0a0a0a;
|
||||
--foreground: #ededed;
|
||||
}
|
||||
}
|
||||
|
||||
body {
|
||||
background: var(--background);
|
||||
color: var(--foreground);
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
}
|
||||
34
frontend/src/app/layout.tsx
Normal file
34
frontend/src/app/layout.tsx
Normal file
@@ -0,0 +1,34 @@
|
||||
import type { Metadata } from "next";
|
||||
import { Geist, Geist_Mono } from "next/font/google";
|
||||
import "./globals.css";
|
||||
|
||||
const geistSans = Geist({
|
||||
variable: "--font-geist-sans",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
const geistMono = Geist_Mono({
|
||||
variable: "--font-geist-mono",
|
||||
subsets: ["latin"],
|
||||
});
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Create Next App",
|
||||
description: "Generated by create next app",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: Readonly<{
|
||||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body
|
||||
className={`${geistSans.variable} ${geistMono.variable} antialiased`}
|
||||
>
|
||||
{children}
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
348
frontend/src/app/page.tsx
Normal file
348
frontend/src/app/page.tsx
Normal file
@@ -0,0 +1,348 @@
|
||||
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
|
||||
const API_BASE = "http://127.0.0.1:8000";
|
||||
|
||||
// 类型定义
|
||||
interface Material {
|
||||
id: string;
|
||||
name: string;
|
||||
scene: string;
|
||||
size_mb: number;
|
||||
path: string;
|
||||
}
|
||||
|
||||
interface Task {
|
||||
task_id: string;
|
||||
status: string;
|
||||
progress: number;
|
||||
message: string;
|
||||
download_url?: string;
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
const [materials, setMaterials] = useState<Material[]>([]);
|
||||
const [selectedMaterial, setSelectedMaterial] = useState<string>("");
|
||||
const [text, setText] = useState<string>(
|
||||
"大家好,欢迎来到我的频道,今天给大家分享一些有趣的内容。"
|
||||
);
|
||||
const [voice, setVoice] = useState<string>("zh-CN-YunxiNeural");
|
||||
const [isGenerating, setIsGenerating] = useState(false);
|
||||
const [currentTask, setCurrentTask] = useState<Task | null>(null);
|
||||
const [generatedVideo, setGeneratedVideo] = useState<string | null>(null);
|
||||
const [fetchError, setFetchError] = useState<string | null>(null);
|
||||
const [debugData, setDebugData] = useState<string>("");
|
||||
|
||||
// 可选音色
|
||||
const voices = [
|
||||
{ id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
|
||||
{ id: "zh-CN-YunjianNeural", name: "云健 (男声-新闻)" },
|
||||
{ id: "zh-CN-YunyangNeural", name: "云扬 (男声-专业)" },
|
||||
{ id: "zh-CN-XiaoxiaoNeural", name: "晓晓 (女声-活泼)" },
|
||||
{ id: "zh-CN-XiaoyiNeural", name: "晓伊 (女声-温柔)" },
|
||||
];
|
||||
|
||||
// 加载素材列表
|
||||
useEffect(() => {
|
||||
fetchMaterials();
|
||||
}, []);
|
||||
|
||||
const fetchMaterials = async () => {
|
||||
try {
|
||||
setFetchError(null);
|
||||
setDebugData("Loading...");
|
||||
|
||||
// Add timestamp to prevent caching
|
||||
const url = `${API_BASE}/api/materials/?t=${new Date().getTime()}`;
|
||||
const res = await fetch(url);
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`HTTP ${res.status} ${res.statusText}`);
|
||||
}
|
||||
|
||||
const text = await res.text(); // Get raw text first
|
||||
setDebugData(text.substring(0, 200) + (text.length > 200 ? "..." : "")); // Show preview
|
||||
|
||||
const data = JSON.parse(text);
|
||||
setMaterials(data.materials || []);
|
||||
|
||||
if (data.materials?.length > 0) {
|
||||
if (!selectedMaterial) {
|
||||
setSelectedMaterial(data.materials[0].id);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("获取素材失败:", error);
|
||||
setFetchError(String(error));
|
||||
setDebugData(`Error: ${String(error)}`);
|
||||
}
|
||||
};
|
||||
|
||||
// 生成视频
|
||||
const handleGenerate = async () => {
|
||||
if (!selectedMaterial || !text.trim()) {
|
||||
alert("请选择素材并输入文案");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsGenerating(true);
|
||||
setGeneratedVideo(null);
|
||||
|
||||
try {
|
||||
// 查找选中的素材对象以获取路径
|
||||
const materialObj = materials.find(m => m.id === selectedMaterial);
|
||||
if (!materialObj) {
|
||||
alert("素材数据异常");
|
||||
return;
|
||||
}
|
||||
|
||||
// 创建生成任务
|
||||
const res = await fetch(`${API_BASE}/api/videos/generate`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
material_path: materialObj.path,
|
||||
text: text,
|
||||
voice: voice,
|
||||
add_subtitle: true,
|
||||
}),
|
||||
});
|
||||
|
||||
const data = await res.json();
|
||||
const taskId = data.task_id;
|
||||
|
||||
// 轮询任务状态
|
||||
const pollTask = async () => {
|
||||
const taskRes = await fetch(`${API_BASE}/api/videos/tasks/${taskId}`);
|
||||
const taskData: Task = await taskRes.json();
|
||||
setCurrentTask(taskData);
|
||||
|
||||
if (taskData.status === "completed") {
|
||||
setGeneratedVideo(`${API_BASE}${taskData.download_url}`);
|
||||
setIsGenerating(false);
|
||||
} else if (taskData.status === "failed") {
|
||||
alert("视频生成失败: " + taskData.message);
|
||||
setIsGenerating(false);
|
||||
} else {
|
||||
setTimeout(pollTask, 1000);
|
||||
}
|
||||
};
|
||||
|
||||
pollTask();
|
||||
} catch (error) {
|
||||
console.error("生成失败:", error);
|
||||
setIsGenerating(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-slate-900 via-purple-900 to-slate-900">
|
||||
{/* Header */}
|
||||
<header className="border-b border-white/10 bg-black/20 backdrop-blur-sm">
|
||||
<div className="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
|
||||
<h1 className="text-2xl font-bold text-white flex items-center gap-3">
|
||||
<span className="text-3xl">🎬</span>
|
||||
ViGent
|
||||
</h1>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main className="max-w-6xl mx-auto px-6 py-8">
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
|
||||
{/* 左侧: 输入区域 */}
|
||||
<div className="space-y-6">
|
||||
{/* 素材选择 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<div className="flex justify-between items-center mb-4">
|
||||
<h2 className="text-lg font-semibold text-white flex items-center gap-2">
|
||||
📹 选择素材视频
|
||||
</h2>
|
||||
<button
|
||||
onClick={fetchMaterials}
|
||||
className="px-3 py-1 text-xs bg-white/10 hover:bg-white/20 rounded text-gray-300"
|
||||
>
|
||||
🔄 刷新
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{fetchError ? (
|
||||
<div className="p-4 bg-red-500/20 text-red-200 rounded-xl text-sm mb-4">
|
||||
获取素材失败: {fetchError}
|
||||
<br />
|
||||
API: {API_BASE}/api/materials/
|
||||
</div>
|
||||
) : materials.length === 0 ? (
|
||||
<div className="text-center py-8 text-gray-400">
|
||||
<p>暂无素材视频</p>
|
||||
<p className="text-sm mt-2">
|
||||
请将视频放入 backend/uploads/materials/ 目录
|
||||
</p>
|
||||
<div className="mt-4 p-4 bg-black/40 rounded text-left text-xs font-mono text-gray-500 overflow-auto whitespace-pre-wrap break-all">
|
||||
<p className="font-bold text-purple-400">Debug Info:</p>
|
||||
<p>Time: {new Date().toLocaleTimeString()}</p>
|
||||
<p>Items: {materials.length}</p>
|
||||
<p className="mt-2 text-gray-400 border-t border-gray-700 pt-2">Raw Response:</p>
|
||||
<p>{debugData}</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
{materials.map((m) => (
|
||||
<button
|
||||
key={m.id}
|
||||
onClick={() => setSelectedMaterial(m.id)}
|
||||
className={`p-4 rounded-xl border-2 transition-all text-left ${selectedMaterial === m.id
|
||||
? "border-purple-500 bg-purple-500/20"
|
||||
: "border-white/10 bg-white/5 hover:border-white/30"
|
||||
}`}
|
||||
>
|
||||
<div className="text-white font-medium truncate">
|
||||
{m.scene || m.name}
|
||||
</div>
|
||||
<div className="text-gray-400 text-sm mt-1">
|
||||
{m.size_mb.toFixed(1)} MB
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* 文案输入 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
|
||||
✍️ 输入口播文案
|
||||
</h2>
|
||||
<textarea
|
||||
value={text}
|
||||
onChange={(e) => setText(e.target.value)}
|
||||
placeholder="请输入你想说的话..."
|
||||
className="w-full h-40 bg-black/30 border border-white/10 rounded-xl p-4 text-white placeholder-gray-500 resize-none focus:outline-none focus:border-purple-500 transition-colors"
|
||||
/>
|
||||
<div className="flex justify-between mt-2 text-sm text-gray-400">
|
||||
<span>{text.length} 字</span>
|
||||
<span>预计时长: ~{Math.ceil(text.length / 4)} 秒</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 音色选择 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
|
||||
🎙️ 选择配音音色
|
||||
</h2>
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
{voices.map((v) => (
|
||||
<button
|
||||
key={v.id}
|
||||
onClick={() => setVoice(v.id)}
|
||||
className={`p-3 rounded-xl border-2 transition-all text-left ${voice === v.id
|
||||
? "border-purple-500 bg-purple-500/20"
|
||||
: "border-white/10 bg-white/5 hover:border-white/30"
|
||||
}`}
|
||||
>
|
||||
<span className="text-white text-sm">{v.name}</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 生成按钮 */}
|
||||
<button
|
||||
onClick={handleGenerate}
|
||||
disabled={isGenerating || !selectedMaterial}
|
||||
className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${isGenerating || !selectedMaterial
|
||||
? "bg-gray-600 cursor-not-allowed text-gray-400"
|
||||
: "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
|
||||
}`}
|
||||
>
|
||||
{isGenerating ? (
|
||||
<span className="flex items-center justify-center gap-3">
|
||||
<svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
|
||||
<circle
|
||||
className="opacity-25"
|
||||
cx="12"
|
||||
cy="12"
|
||||
r="10"
|
||||
stroke="currentColor"
|
||||
strokeWidth="4"
|
||||
fill="none"
|
||||
/>
|
||||
<path
|
||||
className="opacity-75"
|
||||
fill="currentColor"
|
||||
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
|
||||
/>
|
||||
</svg>
|
||||
生成中... {currentTask?.progress || 0}%
|
||||
</span>
|
||||
) : (
|
||||
"🚀 生成视频"
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* 右侧: 预览区域 */}
|
||||
<div className="space-y-6">
|
||||
{/* 进度显示 */}
|
||||
{currentTask && isGenerating && (
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4">
|
||||
⏳ 生成进度
|
||||
</h2>
|
||||
<div className="space-y-3">
|
||||
<div className="h-3 bg-black/30 rounded-full overflow-hidden">
|
||||
<div
|
||||
className="h-full bg-gradient-to-r from-purple-500 to-pink-500 transition-all duration-300"
|
||||
style={{ width: `${currentTask.progress}%` }}
|
||||
/>
|
||||
</div>
|
||||
<p className="text-gray-300">{currentTask.message}</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* 视频预览 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
|
||||
🎥 视频预览
|
||||
</h2>
|
||||
<div className="aspect-video bg-black/50 rounded-xl overflow-hidden flex items-center justify-center">
|
||||
{generatedVideo ? (
|
||||
<video
|
||||
src={generatedVideo}
|
||||
controls
|
||||
className="w-full h-full object-contain"
|
||||
/>
|
||||
) : (
|
||||
<div className="text-gray-500 text-center">
|
||||
<div className="text-5xl mb-4">📹</div>
|
||||
<p>生成的视频将在这里预览</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{generatedVideo && (
|
||||
<a
|
||||
href={generatedVideo}
|
||||
download
|
||||
className="mt-4 w-full py-3 rounded-xl bg-green-600 hover:bg-green-700 text-white font-medium flex items-center justify-center gap-2 transition-colors"
|
||||
>
|
||||
⬇️ 下载视频
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
{/* Footer */}
|
||||
<footer className="border-t border-white/10 mt-12">
|
||||
<div className="max-w-6xl mx-auto px-6 py-4 text-center text-gray-500 text-sm">
|
||||
ViGent - 基于 MuseTalk + EdgeTTS
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
335
frontend/src/app/publish/page.tsx
Normal file
335
frontend/src/app/publish/page.tsx
Normal file
@@ -0,0 +1,335 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import Link from "next/link";
|
||||
|
||||
const API_BASE = "http://127.0.0.1:8000";
|
||||
|
||||
interface Account {
|
||||
platform: string;
|
||||
name: string;
|
||||
logged_in: boolean;
|
||||
enabled: boolean;
|
||||
}
|
||||
|
||||
interface Video {
|
||||
name: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
export default function PublishPage() {
|
||||
const [accounts, setAccounts] = useState<Account[]>([]);
|
||||
const [videos, setVideos] = useState<Video[]>([]);
|
||||
const [selectedVideo, setSelectedVideo] = useState<string>("");
|
||||
const [selectedPlatforms, setSelectedPlatforms] = useState<string[]>([]);
|
||||
const [title, setTitle] = useState<string>("");
|
||||
const [tags, setTags] = useState<string>("");
|
||||
const [isPublishing, setIsPublishing] = useState(false);
|
||||
const [publishResults, setPublishResults] = useState<any[]>([]);
|
||||
|
||||
// 加载账号和视频列表
|
||||
useEffect(() => {
|
||||
fetchAccounts();
|
||||
fetchVideos();
|
||||
}, []);
|
||||
|
||||
const fetchAccounts = async () => {
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/publish/accounts`);
|
||||
const data = await res.json();
|
||||
setAccounts(data.accounts || []);
|
||||
} catch (error) {
|
||||
console.error("获取账号失败:", error);
|
||||
}
|
||||
};
|
||||
|
||||
const fetchVideos = async () => {
|
||||
try {
|
||||
// 获取已生成的视频列表 (从 outputs 目录)
|
||||
const res = await fetch(`${API_BASE}/api/videos/tasks`);
|
||||
const data = await res.json();
|
||||
|
||||
const completedVideos = data.tasks
|
||||
?.filter((t: any) => t.status === "completed")
|
||||
.map((t: any) => ({
|
||||
name: `${t.task_id}_output.mp4`,
|
||||
path: `outputs/${t.task_id}_output.mp4`,
|
||||
})) || [];
|
||||
|
||||
setVideos(completedVideos);
|
||||
if (completedVideos.length > 0) {
|
||||
setSelectedVideo(completedVideos[0].path);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("获取视频失败:", error);
|
||||
}
|
||||
};
|
||||
|
||||
const togglePlatform = (platform: string) => {
|
||||
if (selectedPlatforms.includes(platform)) {
|
||||
setSelectedPlatforms(selectedPlatforms.filter((p) => p !== platform));
|
||||
} else {
|
||||
setSelectedPlatforms([...selectedPlatforms, platform]);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePublish = async () => {
|
||||
if (!selectedVideo || !title || selectedPlatforms.length === 0) {
|
||||
alert("请选择视频、填写标题并选择至少一个平台");
|
||||
return;
|
||||
}
|
||||
|
||||
setIsPublishing(true);
|
||||
setPublishResults([]);
|
||||
|
||||
const tagList = tags.split(/[,,\s]+/).filter((t) => t.trim());
|
||||
|
||||
for (const platform of selectedPlatforms) {
|
||||
try {
|
||||
const res = await fetch(`${API_BASE}/api/publish/`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
video_path: selectedVideo,
|
||||
platform,
|
||||
title,
|
||||
tags: tagList,
|
||||
description: "",
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await res.json();
|
||||
setPublishResults((prev) => [...prev, result]);
|
||||
} catch (error) {
|
||||
setPublishResults((prev) => [
|
||||
...prev,
|
||||
{ platform, success: false, message: String(error) },
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
setIsPublishing(false);
|
||||
};
|
||||
|
||||
const handleLogin = async (platform: string) => {
|
||||
alert(
|
||||
`登录功能需要在服务端执行。\n\n请在终端运行:\ncurl -X POST http://localhost:8000/api/publish/login/${platform}`
|
||||
);
|
||||
};
|
||||
|
||||
const platformIcons: Record<string, string> = {
|
||||
douyin: "🎵",
|
||||
xiaohongshu: "📕",
|
||||
weixin: "💬",
|
||||
kuaishou: "⚡",
|
||||
bilibili: "📺",
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-slate-900 via-purple-900 to-slate-900">
|
||||
{/* Header */}
|
||||
<header className="border-b border-white/10 bg-black/20 backdrop-blur-sm">
|
||||
<div className="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
|
||||
<Link href="/" className="text-2xl font-bold text-white flex items-center gap-3 hover:opacity-80">
|
||||
<span className="text-3xl">🎬</span>
|
||||
TalkingHead Agent
|
||||
</Link>
|
||||
<nav className="flex gap-4">
|
||||
<Link
|
||||
href="/"
|
||||
className="px-4 py-2 text-gray-400 hover:text-white transition-colors"
|
||||
>
|
||||
视频生成
|
||||
</Link>
|
||||
<Link
|
||||
href="/publish"
|
||||
className="px-4 py-2 text-white bg-purple-600 rounded-lg"
|
||||
>
|
||||
发布管理
|
||||
</Link>
|
||||
</nav>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<main className="max-w-6xl mx-auto px-6 py-8">
|
||||
<h1 className="text-3xl font-bold text-white mb-8">📤 社交媒体发布</h1>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
|
||||
{/* 左侧: 账号管理 */}
|
||||
<div className="space-y-6">
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
|
||||
👤 平台账号
|
||||
</h2>
|
||||
|
||||
<div className="space-y-3">
|
||||
{accounts.map((account) => (
|
||||
<div
|
||||
key={account.platform}
|
||||
className="flex items-center justify-between p-4 bg-black/30 rounded-xl"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">
|
||||
{platformIcons[account.platform]}
|
||||
</span>
|
||||
<div>
|
||||
<div className="text-white font-medium">
|
||||
{account.name}
|
||||
</div>
|
||||
<div
|
||||
className={`text-sm ${account.logged_in
|
||||
? "text-green-400"
|
||||
: "text-gray-500"
|
||||
}`}
|
||||
>
|
||||
{account.logged_in ? "✓ 已登录" : "未登录"}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => handleLogin(account.platform)}
|
||||
className={`px-4 py-2 rounded-lg text-sm font-medium transition-colors ${account.logged_in
|
||||
? "bg-gray-600 text-gray-300"
|
||||
: "bg-purple-600 hover:bg-purple-700 text-white"
|
||||
}`}
|
||||
>
|
||||
{account.logged_in ? "重新登录" : "登录"}
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 右侧: 发布表单 */}
|
||||
<div className="space-y-6">
|
||||
{/* 选择视频 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4">
|
||||
🎥 选择要发布的视频
|
||||
</h2>
|
||||
|
||||
{videos.length === 0 ? (
|
||||
<p className="text-gray-400">
|
||||
暂无已生成的视频,请先
|
||||
<Link href="/" className="text-purple-400 hover:underline">
|
||||
生成视频
|
||||
</Link>
|
||||
</p>
|
||||
) : (
|
||||
<select
|
||||
value={selectedVideo}
|
||||
onChange={(e) => setSelectedVideo(e.target.value)}
|
||||
className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white"
|
||||
>
|
||||
{videos.map((v) => (
|
||||
<option key={v.path} value={v.path}>
|
||||
{v.name}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* 填写信息 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4">✍️ 发布信息</h2>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<label className="block text-gray-400 text-sm mb-2">
|
||||
标题
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={title}
|
||||
onChange={(e) => setTitle(e.target.value)}
|
||||
placeholder="输入视频标题..."
|
||||
className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white placeholder-gray-500"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-gray-400 text-sm mb-2">
|
||||
标签 (用逗号分隔)
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={tags}
|
||||
onChange={(e) => setTags(e.target.value)}
|
||||
placeholder="AI, 数字人, 口播..."
|
||||
className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white placeholder-gray-500"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 选择平台 */}
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
|
||||
<h2 className="text-lg font-semibold text-white mb-4">📱 选择发布平台</h2>
|
||||
|
||||
<div className="grid grid-cols-3 gap-3">
|
||||
{accounts
|
||||
.filter((a) => a.logged_in)
|
||||
.map((account) => (
|
||||
<button
|
||||
key={account.platform}
|
||||
onClick={() => togglePlatform(account.platform)}
|
||||
className={`p-3 rounded-xl border-2 transition-all ${selectedPlatforms.includes(account.platform)
|
||||
? "border-purple-500 bg-purple-500/20"
|
||||
: "border-white/10 bg-white/5 hover:border-white/30"
|
||||
}`}
|
||||
>
|
||||
<span className="text-2xl block mb-1">
|
||||
{platformIcons[account.platform]}
|
||||
</span>
|
||||
<span className="text-white text-sm">{account.name}</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{accounts.filter((a) => a.logged_in).length === 0 && (
|
||||
<p className="text-gray-400 text-center py-4">
|
||||
请先登录至少一个平台账号
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* 发布按钮 */}
|
||||
<button
|
||||
onClick={handlePublish}
|
||||
disabled={isPublishing || selectedPlatforms.length === 0}
|
||||
className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${isPublishing || selectedPlatforms.length === 0
|
||||
? "bg-gray-600 cursor-not-allowed text-gray-400"
|
||||
: "bg-gradient-to-r from-green-600 to-teal-600 hover:from-green-700 hover:to-teal-700 text-white"
|
||||
}`}
|
||||
>
|
||||
{isPublishing ? "发布中..." : "🚀 一键发布"}
|
||||
</button>
|
||||
|
||||
{/* 发布结果 */}
|
||||
{publishResults.length > 0 && (
|
||||
<div className="bg-white/5 rounded-2xl p-6 border border-white/10">
|
||||
<h2 className="text-lg font-semibold text-white mb-4">
|
||||
发布结果
|
||||
</h2>
|
||||
<div className="space-y-2">
|
||||
{publishResults.map((result, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className={`p-3 rounded-lg ${result.success ? "bg-green-500/20" : "bg-red-500/20"
|
||||
}`}
|
||||
>
|
||||
<span className="text-white">
|
||||
{platformIcons[result.platform]} {result.message}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
34
frontend/tsconfig.json
Normal file
34
frontend/tsconfig.json
Normal file
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2017",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "react-jsx",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": [
|
||||
"next-env.d.ts",
|
||||
"**/*.ts",
|
||||
"**/*.tsx",
|
||||
".next/types/**/*.ts",
|
||||
".next/dev/types/**/*.ts",
|
||||
"**/*.mts"
|
||||
],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
186
models/MuseTalk/DEPLOY.md
Normal file
186
models/MuseTalk/DEPLOY.md
Normal file
@@ -0,0 +1,186 @@
|
||||
# MuseTalk 部署指南
|
||||
|
||||
## 硬件要求
|
||||
|
||||
| 配置 | 最低要求 | 推荐配置 |
|
||||
|------|----------|----------|
|
||||
| GPU | 8GB VRAM (如 RTX 3060) | 24GB VRAM (如 RTX 3090) |
|
||||
| 内存 | 32GB | 64GB |
|
||||
| CUDA | 11.7+ | 12.0+ |
|
||||
|
||||
---
|
||||
|
||||
## 📦 安装步骤
|
||||
|
||||
### 1. 克隆 MuseTalk 仓库
|
||||
|
||||
```bash
|
||||
# 进入 ViGent 项目的 models 目录
|
||||
cd /home/rongye/ProgramFiles/ViGent/models
|
||||
|
||||
# 克隆 MuseTalk 仓库
|
||||
git clone https://github.com/TMElyralab/MuseTalk.git MuseTalk_repo
|
||||
|
||||
# 保留我们的自定义文件
|
||||
cp MuseTalk/DEPLOY.md MuseTalk_repo/
|
||||
cp MuseTalk/musetalk_api.py MuseTalk_repo/
|
||||
|
||||
# 替换目录
|
||||
rm -rf MuseTalk
|
||||
mv MuseTalk_repo MuseTalk
|
||||
```
|
||||
|
||||
### 2. 创建虚拟环境
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
|
||||
conda create -n musetalk python=3.10 -y
|
||||
conda activate musetalk
|
||||
```
|
||||
|
||||
### 3. 安装 PyTorch (CUDA 12.1)
|
||||
|
||||
```bash
|
||||
# CUDA 12.1 (适配服务器 CUDA 12.8)
|
||||
pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
|
||||
```
|
||||
|
||||
### 4. 安装 MuseTalk 依赖
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 安装 mmlab 系列 (MuseTalk 必需)
|
||||
pip install --no-cache-dir -U openmim
|
||||
mim install mmengine
|
||||
mim install "mmcv>=2.0.1"
|
||||
mim install "mmdet>=3.1.0"
|
||||
mim install "mmpose>=1.1.0"
|
||||
```
|
||||
|
||||
### 5. 下载模型权重 ⬇️
|
||||
|
||||
> **权重文件较大(约 5GB),请确保网络稳定**
|
||||
|
||||
#### 方式一:从 Hugging Face 下载 (推荐)
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
|
||||
|
||||
# 安装 huggingface-cli
|
||||
pip install huggingface_hub
|
||||
|
||||
# 下载 MuseTalk 权重 (v1.5)
|
||||
huggingface-cli download TMElyralab/MuseTalk \
|
||||
--local-dir ./models/musetalk \
|
||||
--include "*.pth" "*.json"
|
||||
|
||||
# 下载 MuseTalk V15 权重
|
||||
huggingface-cli download TMElyralab/MuseTalk \
|
||||
--local-dir ./models/musetalkV15 \
|
||||
--include "unet.pth"
|
||||
|
||||
# 下载 SD-VAE 模型 (Stable Diffusion VAE)
|
||||
huggingface-cli download stabilityai/sd-vae-ft-mse \
|
||||
--local-dir ./models/sd-vae-ft-mse
|
||||
|
||||
# 下载 Whisper 模型 (音频特征提取)
|
||||
# MuseTalk 使用 whisper-tiny
|
||||
huggingface-cli download openai/whisper-tiny \
|
||||
--local-dir ./models/whisper
|
||||
```
|
||||
|
||||
#### 方式二:手动下载
|
||||
|
||||
从以下链接下载并放到对应目录:
|
||||
|
||||
| 模型 | 下载链接 | 存放路径 |
|
||||
|------|----------|----------|
|
||||
| MuseTalk | [Hugging Face](https://huggingface.co/TMElyralab/MuseTalk) | `models/MuseTalk/models/musetalk/` |
|
||||
| MuseTalk V15 | 同上 | `models/MuseTalk/models/musetalkV15/` |
|
||||
| SD-VAE | [Hugging Face](https://huggingface.co/stabilityai/sd-vae-ft-mse) | `models/MuseTalk/models/sd-vae-ft-mse/` |
|
||||
| Whisper | [Hugging Face](https://huggingface.co/openai/whisper-tiny) | `models/MuseTalk/models/whisper/` |
|
||||
| DWPose | 按官方 README | `models/MuseTalk/models/dwpose/` |
|
||||
| Face Parse | 按官方 README | `models/MuseTalk/models/face-parse-bisent/` |
|
||||
|
||||
### 6. 验证安装
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
|
||||
conda activate musetalk
|
||||
|
||||
# 测试推理 (使用 GPU1)
|
||||
CUDA_VISIBLE_DEVICES=1 python -m scripts.inference \
|
||||
--version v15 \
|
||||
--inference_config configs/inference/test.yaml \
|
||||
--result_dir ./results \
|
||||
--use_float16
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📂 目录结构
|
||||
|
||||
安装完成后目录结构:
|
||||
|
||||
```
|
||||
models/MuseTalk/
|
||||
├── configs/
|
||||
│ └── inference/
|
||||
├── models/ # ⬅️ 权重文件目录
|
||||
│ ├── musetalk/ # MuseTalk 基础权重
|
||||
│ │ ├── config.json
|
||||
│ │ └── pytorch_model.bin
|
||||
│ ├── musetalkV15/ # V1.5 版本 UNet
|
||||
│ │ └── unet.pth
|
||||
│ ├── sd-vae-ft-mse/ # Stable Diffusion VAE
|
||||
│ │ └── diffusion_pytorch_model.bin
|
||||
│ ├── whisper/ # Whisper 模型
|
||||
│ ├── dwpose/ # 姿态检测
|
||||
│ └── face-parse-bisent/ # 人脸解析
|
||||
├── musetalk/ # MuseTalk 源码
|
||||
├── scripts/
|
||||
│ └── inference.py
|
||||
├── DEPLOY.md # 本文档
|
||||
└── musetalk_api.py # API 服务
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 ViGent 集成配置
|
||||
|
||||
### 环境变量配置
|
||||
|
||||
在 `/home/rongye/ProgramFiles/ViGent/backend/.env` 中设置:
|
||||
|
||||
```bash
|
||||
# MuseTalk 配置
|
||||
MUSETALK_LOCAL=true
|
||||
MUSETALK_GPU_ID=1
|
||||
MUSETALK_VERSION=v15
|
||||
MUSETALK_USE_FLOAT16=true
|
||||
MUSETALK_BATCH_SIZE=8
|
||||
```
|
||||
|
||||
### 启动后端服务
|
||||
|
||||
```bash
|
||||
cd /home/rongye/ProgramFiles/ViGent/backend
|
||||
source venv/bin/activate
|
||||
|
||||
# 设置 GPU 并启动
|
||||
CUDA_VISIBLE_DEVICES=1 uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚨 常见问题
|
||||
|
||||
### Q1: CUDA out of memory
|
||||
**解决**:减小 `MUSETALK_BATCH_SIZE` 或启用 `MUSETALK_USE_FLOAT16=true`
|
||||
|
||||
### Q2: mmcv 安装失败
|
||||
**解决**:确保 CUDA 版本匹配,使用 `mim install mmcv==2.0.1`
|
||||
|
||||
### Q3: Whisper 加载失败
|
||||
**解决**:检查 `models/whisper/` 目录是否包含完整模型文件
|
||||
157
models/MuseTalk/musetalk_api.py
Normal file
157
models/MuseTalk/musetalk_api.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""
|
||||
MuseTalk API 服务
|
||||
|
||||
这个脚本将 MuseTalk 封装为 FastAPI 服务,
|
||||
可以独立部署在 GPU 服务器上。
|
||||
|
||||
用法:
|
||||
python musetalk_api.py --port 8001
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import uvicorn
|
||||
|
||||
# 添加 MuseTalk 路径
|
||||
MUSETALK_DIR = Path(__file__).parent
|
||||
sys.path.insert(0, str(MUSETALK_DIR))
|
||||
|
||||
app = FastAPI(
|
||||
title="MuseTalk API",
|
||||
description="唇形同步推理服务",
|
||||
version="0.1.0"
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 全局模型实例 (懒加载)
|
||||
_model = None
|
||||
|
||||
|
||||
def get_model():
|
||||
"""懒加载 MuseTalk 模型"""
|
||||
global _model
|
||||
if _model is None:
|
||||
print("🔄 加载 MuseTalk 模型...")
|
||||
# TODO: 根据 MuseTalk 实际 API 调整
|
||||
# from musetalk.inference import MuseTalkInference
|
||||
# _model = MuseTalkInference()
|
||||
print("✅ MuseTalk 模型加载完成")
|
||||
return _model
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"name": "MuseTalk API", "status": "ok"}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""健康检查"""
|
||||
return {"status": "healthy", "gpu": True}
|
||||
|
||||
|
||||
@app.post("/lipsync")
|
||||
async def lipsync(
|
||||
video: UploadFile = File(..., description="输入视频文件"),
|
||||
audio: UploadFile = File(..., description="音频文件"),
|
||||
fps: int = Form(25, description="输出帧率")
|
||||
):
|
||||
"""
|
||||
唇形同步推理
|
||||
|
||||
Args:
|
||||
video: 输入视频 (静态人物)
|
||||
audio: 驱动音频
|
||||
fps: 输出帧率
|
||||
|
||||
Returns:
|
||||
生成的视频文件
|
||||
"""
|
||||
# 创建临时目录
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
tmpdir = Path(tmpdir)
|
||||
|
||||
# 保存上传的文件
|
||||
video_path = tmpdir / "input_video.mp4"
|
||||
audio_path = tmpdir / "input_audio.wav"
|
||||
output_path = tmpdir / "output.mp4"
|
||||
|
||||
with open(video_path, "wb") as f:
|
||||
shutil.copyfileobj(video.file, f)
|
||||
with open(audio_path, "wb") as f:
|
||||
shutil.copyfileobj(audio.file, f)
|
||||
|
||||
try:
|
||||
# 执行唇形同步
|
||||
model = get_model()
|
||||
|
||||
# TODO: 调用实际的 MuseTalk 推理
|
||||
# result = model.inference(
|
||||
# source_video=str(video_path),
|
||||
# driving_audio=str(audio_path),
|
||||
# output_path=str(output_path),
|
||||
# fps=fps
|
||||
# )
|
||||
|
||||
# 临时: 使用 subprocess 调用 MuseTalk CLI
|
||||
import subprocess
|
||||
cmd = [
|
||||
sys.executable, "-m", "scripts.inference",
|
||||
"--video_path", str(video_path),
|
||||
"--audio_path", str(audio_path),
|
||||
"--output_path", str(output_path),
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=str(MUSETALK_DIR),
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"MuseTalk 推理失败: {result.stderr}")
|
||||
|
||||
if not output_path.exists():
|
||||
raise RuntimeError("输出文件不存在")
|
||||
|
||||
# 返回生成的视频
|
||||
# 需要先复制到持久化位置
|
||||
final_output = Path("outputs") / f"lipsync_{video.filename}"
|
||||
final_output.parent.mkdir(exist_ok=True)
|
||||
shutil.copy(output_path, final_output)
|
||||
|
||||
return FileResponse(
|
||||
final_output,
|
||||
media_type="video/mp4",
|
||||
filename=f"lipsync_{video.filename}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--port", type=int, default=8001)
|
||||
parser.add_argument("--host", type=str, default="0.0.0.0")
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"🚀 MuseTalk API 启动在 http://{args.host}:{args.port}")
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
Reference in New Issue
Block a user