Init: 初版代码

2026-01-14 14:39:02 +08:00
parent 41c2e3f9d3
commit 302a43a22f
44 changed files with 9999 additions and 316 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,314 +1,46 @@
-# ---> Python
-# Byte-compiled / optimized / DLL files
+# ============ 环境配置 ============
+.env
+*.local
+
+# ============ Python ============
 __pycache__/
 *.py[cod]
 *$py.class
-
-# C extensions
 *.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
 venv/
-ENV/
-env.bak/
-venv.bak/
+.venv/
+*.egg-info/
+.eggs/
+dist/
+build/

-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-
-# Ruff stuff:
-.ruff_cache/
-
-# PyPI configuration file
-.pypirc
-
-# ---> Node
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-.pnpm-debug.log*
-
-# Diagnostic reports (https://nodejs.org/api/report.html)
-report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
-
-# Runtime data
-pids
-*.pid
-*.seed
-*.pid.lock
-
-# Directory for instrumented libs generated by jscoverage/JSCover
-lib-cov
-
-# Coverage directory used by tools like istanbul
-coverage
-*.lcov
-
-# nyc test coverage
-.nyc_output
-
-# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-
-# Bower dependency directory (https://bower.io/)
-bower_components
-
-# node-waf configuration
-.lock-wscript
-
-# Compiled binary addons (https://nodejs.org/api/addons.html)
-build/Release
-
-# Dependency directories
+# ============ Node.js ============
 node_modules/
-jspm_packages/
+.next/
+out/
+.turbo/

-# Snowpack dependency directory (https://snowpack.dev/)
-web_modules/
+# ============ IDE ============
+.vscode/
+.idea/
+*.swp
+*.swo

-# TypeScript cache
-*.tsbuildinfo
+# ============ 系统文件 ============
+.DS_Store
+Thumbs.db
+desktop.ini

-# Optional npm cache directory
-.npm
+# ============ 项目输出 ============
+backend/outputs/
+backend/uploads/
+backend/cookies/
+*_cookies.json

-# Optional eslint cache
-.eslintcache
-
-# Optional stylelint cache
-.stylelintcache
-
-# Microbundle cache
-.rpt2_cache/
-.rts2_cache_cjs/
-.rts2_cache_es/
-.rts2_cache_umd/
-
-# Optional REPL history
-.node_repl_history
-
-# Output of 'npm pack'
-*.tgz
-
-# Yarn Integrity file
-.yarn-integrity
-
-# dotenv environment variable files
-.env
-.env.development.local
-.env.test.local
-.env.production.local
-.env.local
-
-# parcel-bundler cache (https://parceljs.org/)
-.cache
-.parcel-cache
-
-# Next.js build output
-.next
-out
-
-# Nuxt.js build / generate output
-.nuxt
-dist
-
-# Gatsby files
-.cache/
-# Comment in the public line in if your project uses Gatsby and not Next.js
-# https://nextjs.org/blog/next-9-1#public-directory-support
-# public
-
-# vuepress build output
-.vuepress/dist
-
-# vuepress v2.x temp and cache directory
-.temp
-.cache
-
-# vitepress build output
-**/.vitepress/dist
-
-# vitepress cache directory
-**/.vitepress/cache
-
-# Docusaurus cache and generated files
-.docusaurus
-
-# Serverless directories
-.serverless/
-
-# FuseBox cache
-.fusebox/
-
-# DynamoDB Local files
-.dynamodb/
-
-# TernJS port file
-.tern-port
-
-# Stores VSCode versions used for testing VSCode extensions
-.vscode-test
-
-# yarn v2
-.yarn/cache
-.yarn/unplugged
-.yarn/build-state.yml
-.yarn/install-state.gz
-.pnp.*
+# ============ MuseTalk ============
+models/MuseTalk/models/
+models/MuseTalk/results/

+# ============ 日志 ============
+*.log
+logs/
--- a/Docs/DEPLOY_MANUAL.md
+++ b/Docs/DEPLOY_MANUAL.md
@@ -0,0 +1,263 @@
+# ViGent 手动部署指南
+
+## 服务器信息
+
+| 配置 | 规格 |
+|------|------|
+| 服务器 | Dell PowerEdge R730 |
+| CPU | 2× Intel Xeon E5-2680 v4 (56 线程) |
+| 内存 | 192GB DDR4 |
+| GPU 0 | NVIDIA RTX 3090 24GB |
+| GPU 1 | NVIDIA RTX 3090 24GB (用于 MuseTalk) |
+| 部署路径 | `/home/rongye/ProgramFiles/ViGent` |
+
+---
+
+## 步骤 1: 环境检查
+
+```bash
+# 检查 GPU
+nvidia-smi
+
+# 检查 Python 版本 (需要 3.10+)
+python3 --version
+
+# 检查 Node.js 版本 (需要 18+)
+node --version
+
+# 检查 FFmpeg
+ffmpeg -version
+```
+
+如果缺少 FFmpeg:
+```bash
+sudo apt update
+sudo apt install ffmpeg
+```
+
+---
+
+## 步骤 2: 创建目录结构
+
+```bash
+mkdir -p /home/rongye/ProgramFiles/ViGent
+cd /home/rongye/ProgramFiles/ViGent
+```
+
+将项目文件复制到该目录。
+
+---
+
+## 步骤 3: 安装后端依赖
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/backend
+
+# 创建虚拟环境
+python3 -m venv venv
+source venv/bin/activate
+
+# 安装 PyTorch (CUDA 12.1)
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+
+# 安装其他依赖
+pip install -r requirements.txt
+
+# 安装 Playwright 浏览器 (社交发布用)
+playwright install chromium
+```
+
+---
+
+## 步骤 4: 安装 MMPose (唇形检测)
+
+```bash
+source /home/rongye/ProgramFiles/ViGent/backend/venv/bin/activate
+
+pip install -U openmim
+mim install mmengine
+mim install "mmcv>=2.0.1"
+mim install "mmdet>=3.1.0"
+mim install "mmpose>=1.1.0"
+```
+
+---
+
+## 步骤 5: 安装 MuseTalk
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/models
+
+# 克隆仓库
+git clone https://github.com/TMElyralab/MuseTalk.git
+cd MuseTalk
+
+# 激活虚拟环境
+source /home/rongye/ProgramFiles/ViGent/backend/venv/bin/activate
+
+# 安装依赖
+pip install -r requirements.txt
+```
+
+---
+
+## 步骤 6: 下载 MuseTalk 模型权重
+
+从 HuggingFace 下载模型:
+- 地址: https://huggingface.co/TMElyralab/MuseTalk
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
+
+# 使用 huggingface-cli 下载 (需要安装 huggingface_hub)
+pip install huggingface_hub
+huggingface-cli download TMElyralab/MuseTalk --local-dir ./models
+```
+
+或手动下载后放到:
+```
+/home/rongye/ProgramFiles/ViGent/models/MuseTalk/models/
+```
+
+---
+
+## 步骤 7: 配置环境变量
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/backend
+
+# 复制配置模板
+cp .env.example .env
+
+# 编辑配置
+nano .env
+```
+
+修改以下配置:
+```ini
+# GPU 配置
+MUSETALK_GPU_ID=1
+MUSETALK_LOCAL=true
+
+# 其他配置按需修改
+DEBUG=false
+```
+
+---
+
+## 步骤 8: 安装前端依赖
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/frontend
+
+# 安装依赖
+npm install
+```
+
+---
+
+## 步骤 9: 测试运行
+
+### 启动后端
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/backend
+source venv/bin/activate
+uvicorn app.main:app --host 0.0.0.0 --port 8000
+```
+
+### 启动前端 (新开终端)
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/frontend
+npm run dev -- --host 0.0.0.0
+```
+
+---
+
+## 步骤 10: 验证
+
+1. 访问 http://服务器IP:3000 查看前端
+2. 访问 http://服务器IP:8000/docs 查看 API 文档
+3. 上传测试视频，生成口播视频
+
+---
+
+## 使用 systemd 管理服务 (可选)
+
+### 后端服务
+
+创建 `/etc/systemd/system/vigent-backend.service`:
+```ini
+[Unit]
+Description=ViGent Backend API
+After=network.target
+
+[Service]
+Type=simple
+User=rongye
+WorkingDirectory=/home/rongye/ProgramFiles/ViGent/backend
+Environment="PATH=/home/rongye/ProgramFiles/ViGent/backend/venv/bin"
+ExecStart=/home/rongye/ProgramFiles/ViGent/backend/venv/bin/uvicorn app.main:app --host 0.0.0.0 --port 8000
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### 前端服务
+
+创建 `/etc/systemd/system/vigent-frontend.service`:
+```ini
+[Unit]
+Description=ViGent Frontend
+After=network.target
+
+[Service]
+Type=simple
+User=rongye
+WorkingDirectory=/home/rongye/ProgramFiles/ViGent/frontend
+ExecStart=/usr/bin/npm run start
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### 启用服务
+
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable vigent-backend vigent-frontend
+sudo systemctl start vigent-backend vigent-frontend
+```
+
+---
+
+## 故障排除
+
+### GPU 不可用
+
+```bash
+# 检查 CUDA
+nvidia-smi
+python3 -c "import torch; print(torch.cuda.is_available())"
+```
+
+### 端口被占用
+
+```bash
+# 查看端口占用
+sudo lsof -i :8000
+sudo lsof -i :3000
+```
+
+### 查看日志
+
+```bash
+# 后端日志
+journalctl -u vigent-backend -f
+
+# 前端日志
+journalctl -u vigent-frontend -f
+```
--- a/Docs/DevLogs/Day1.md
+++ b/Docs/DevLogs/Day1.md
@@ -0,0 +1,171 @@
+# Day 1 - ViGent 数字人口播系统开发
+
+**日期**：2026-01-13  
+**开发环境**：Windows 11 (本地开发) / Ubuntu 24.04 (服务器部署)  
+**目标平台**：Dell PowerEdge R730 (2× RTX 3090 24GB)
+
+---
+
+## 🎯 今日目标
+
+搭建数字人口播视频生成系统的完整框架，包括：
+1. 后端 API (FastAPI)
+2. 前端 UI (Next.js)
+3. 视频生成流程 (TTS + FFmpeg)
+4. 社交媒体发布功能
+
+---
+
+## 📦 项目初始化
+
+### 后端项目结构
+```
+backend/
+├── app/
+│   ├── api/              # API 路由
+│   │   ├── materials.py  # 素材管理
+│   │   ├── videos.py     # 视频生成
+│   │   └── publish.py    # 社交发布
+│   ├── services/         # 核心服务
+│   │   ├── tts_service.py      # EdgeTTS 配音
+│   │   ├── video_service.py    # FFmpeg 视频合成
+│   │   ├── lipsync_service.py  # MuseTalk 唇形同步
+│   │   └── publish_service.py  # Playwright 自动发布
+│   └── core/
+│       └── config.py     # 配置管理
+├── requirements.txt
+└── .env.example
+```
+
+### 前端项目
+- 使用 Next.js 14 + TypeScript + Tailwind CSS
+- 主页面: 视频生成界面
+- 发布页面: `/publish` 多平台发布管理
+
+---
+
+## 🔧 TTS + 视频合成
+
+### EdgeTTS 集成
+- 使用 `edge-tts` 库实现免费中文语音合成
+- 支持多种音色: 云溪、云健、云扬、晓晓、晓伊
+
+### FFmpeg 视频合成
+```python
+# 核心命令
+ffmpeg -i video.mp4 -i audio.mp3 -c:v copy -c:a aac -map 0:v -map 1:a output.mp4
+```
+
+### 🐛 Bug 修复: asyncio subprocess 问题
+
+**问题**：在 FastAPI BackgroundTasks 中调用 `asyncio.create_subprocess_exec` 导致 `NotImplementedError`
+
+**原因**：BackgroundTasks 运行在非 asyncio 上下文中
+
+**修复**：将 `_run_ffmpeg` 和 `_get_duration` 改为使用同步 `subprocess.run`
+
+```python
+# 修复前
+result = await asyncio.create_subprocess_exec(...)
+
+# 修复后
+result = subprocess.run(cmd, shell=True, capture_output=True, text=True, encoding='utf-8')
+```
+
+**状态**：✅ 已修复，视频生成成功
+
+---
+
+## 🎬 MuseTalk 唇形同步集成
+
+### 架构设计
+- GPU0: 其他服务
+- GPU1: MuseTalk 唇形同步
+
+### 代码实现
+- `lipsync_service.py` 支持本地模式和远程 API 模式
+- 通过 `CUDA_VISIBLE_DEVICES=1` 指定使用 GPU1
+- 如未配置，自动跳过唇形同步
+
+---
+
+## 📱 社交媒体发布
+
+### 支持平台
+| 平台 | 状态 |
+|------|------|
+| 抖音 | ✅ 框架完成 |
+| 小红书 | ✅ 框架完成 |
+| 微信视频号 | ✅ 框架完成 |
+| 快手 | ✅ 框架完成 |
+| B站 | ✅ 框架完成 |
+
+### 技术方案
+- 使用 Playwright 进行浏览器自动化
+- Cookie 管理实现免登录发布
+- 前端提供账号管理和一键发布 UI
+
+---
+
+## 📚 文档产出
+
+| 文件 | 说明 |
+|------|------|
+| `README.md` | 项目说明 |
+| `DEPLOY_MANUAL.md` | 手动部署指南 |
+| `deploy.sh` | 一键部署脚本 |
+| `.env.example` | 环境配置模板 |
+
+---
+
+## ✅ 今日完成
+
+1. ✅ FastAPI 后端框架搭建
+2. ✅ EdgeTTS 语音合成服务
+3. ✅ FFmpeg 视频合成服务
+4. ✅ MuseTalk 唇形同步集成 (代码层面)
+5. ✅ Next.js 前端 UI (视频生成 + 发布管理)
+6. ✅ Playwright 社交媒体发布服务
+7. ✅ 端到端视频生成测试通过
+8. ✅ 服务器部署文档编写
+
+---
+
+
+### 下午调试记录 (Afternoon Debugging Session)
+
+**1. 前端 "Undefined" 错误**
+- **现象**：视频生成失败，弹窗显示 "undefined"。
+- **原因**：
+  1. 后端 `videos.py` 在异常捕获时未设置 `message` 字段，前端无法获取错误信息。
+  2. 路径解析逻辑错误导致文件未找到。
+- **修复**：
+  - 后端补充 `tasks[task_id]["message"]` 字段。
+  - 修复 `pathlib.Path` 引用缺失。
+
+**2. 路径解析问题**
+- **现象**：本地测试时无法找到素材文件。
+- **原因**：Windows 本地路径 (`d:\...`) 与相对路径混合使用，且 `BASE_DIR` 指向了错误的父级目录。
+- **修复**：
+  - `materials.py` 所有返回路径改为相对路径 (`uploads/materials/xxx`)。
+  - `videos.py` 增加智能路径解析：非绝对路径自动拼接 `BASE_DIR`。
+  - `config.py` 调整 `BASE_DIR` 指向项目根目录。
+
+**3. 语法错误修复**
+- **现象**：`page.tsx` 出现 `Parsing ecmascript source code failed`。
+- **原因**：调试代码逻辑错误地插入到了 JSX渲染块中。
+- **修复**：完全重写 `page.tsx`，规范化代码结构，增加 "Raw Response" 调试面板。
+
+**4. 本地 Fallback 逻辑验证**
+- **现象**：进度条从 5% 直接跳到 100%。
+- **原因**：本地 MuseTalk 未启用，系统触发 `fallback` 逻辑（仅复制文件）。
+- **验证**：符合预期行为，确保了无 GPU 环境下的流程连通性。
+
+---
+
+## 📋 明日计划
+
+1. 在服务器上部署系统
+2. 下载 MuseTalk 模型权重
+3. 测试完整唇形同步流程
+4. 优化前端 UI 交互体验
--- a/Docs/Doc_Rules.md
+++ b/Docs/Doc_Rules.md
@@ -0,0 +1,96 @@
+# 📋 开发日志更新规则
+
+> **本文件定义了 AI 助手更新开发文档的规范**
+
+---
+
+## ⚡ 核心原则
+
+| 规则 | 说明 |
+|------|------|
+| **默认更新** | 只更新 `DayN.md` |
+| **按需更新** | `task_complete.md` 仅在用户**明确要求**时更新 |
+| **增量追加** | 禁止覆盖/新建。请使用 replace/edit 工具插入新内容。 |
+| **先读后写** | 更新前先查看文件当前内容 |
+
+---
+
+## 📁 文件结构
+
+```
+ViGent/Docs/
+├── task_complete.md              # 任务总览（仅按需更新）
+├── Doc_Rules.md                  # 本文件
+└── DevLogs/
+    ├── Day1.md                   # 开发日志
+    └── ...
+```
+
+---
+
+## 📅 DayN.md 更新规则（日常更新）
+
+### 新建判断
+- 检查最新 `DayN.md` 的日期
+- **今天** → 追加到现有文件
+- **之前** → 创建 `Day{N+1}.md`
+
+### 追加格式
+```markdown
+---
+
+## 🔧 [章节标题]
+
+### 问题描述
+简要描述...
+
+### 解决方案
+```code
+# 代码示例
+```
+
+### 结果
+- ✅ 修复了 xxx
+```
+
+### 快速修复格式
+```markdown
+## 🐛 [Bug 简述] (HH:MM)
+
+**问题**：一句话描述
+**修复**：修改了 `文件名` 中的 xxx
+**状态**：✅ 已修复 / 🔄 待验证
+```
+
+---
+
+## 📝 task_complete.md 更新规则（仅按需）
+
+> ⚠️ **仅当用户明确要求更新 `task_complete.md` 时才更新**
+
+### 更新原则
+- **格式一致性**：直接参考 `task_complete.md` 现有格式追加内容。
+- **进度更新**：仅在阶段性里程碑时更新进度百分比。
+
+---
+
+## 🚀 新对话检查清单
+
+1. 查看 `task_complete.md` → 了解整体进度
+2. 查看最新 `DayN.md` → 确认今天是第几天
+3. 根据日期决定追加或新建 Day 文件
+
+---
+
+## 🎯 项目组件
+
+| 组件 | 位置 |
+|------|------|
+| 后端 (FastAPI) | `ViGent/backend/` |
+| 前端 (Next.js) | `ViGent/frontend/` |
+| AI 模型 (MuseTalk) | `ViGent/models/` |
+| 文档 | `ViGent/Docs/` |
+
+---
+
+**最后更新**：2026-01-13
--- a/Docs/Logs.md
+++ b/Docs/Logs.md
--- a/Docs/README.md
+++ b/Docs/README.md
@@ -0,0 +1,72 @@
+# ViGent - 数字人口播视频生成系统
+
+基于 MuseTalk + EdgeTTS 的开源数字人口播视频生成系统
+
+## 功能
+
+- 📹 上传静态人物视频，生成口播视频（唇形同步）
+- 🎙️ TTS 配音 / 声音克隆
+- 💬 自动生成字幕
+- 📱 一键发布到多个社交平台
+
+## 技术栈
+
+| 模块 | 技术 |
+|------|------|
+| 前端 | Next.js 14 |
+| 后端 | FastAPI + Celery |
+| 唇形同步 | MuseTalk (GPU1) |
+| TTS | EdgeTTS |
+| 视频处理 | FFmpeg |
+| 自动发布 | Playwright |
+
+## 项目结构
+
+```
+/home/rongye/ProgramFiles/ViGent/
+├── backend/          # FastAPI 后端
+├── frontend/         # Next.js 前端
+├── models/           # AI 模型 (MuseTalk)
+└── deploy.sh         # 一键部署脚本
+```
+
+## 服务器部署 (Dell R730)
+
+```bash
+# 进入部署目录
+cd /home/rongye/ProgramFiles/ViGent
+
+# 一键部署
+chmod +x deploy.sh
+./deploy.sh
+```
+
+## 启动服务
+
+```bash
+# 后端 API (端口 8000)
+cd /home/rongye/ProgramFiles/ViGent/backend
+source venv/bin/activate
+uvicorn app.main:app --host 0.0.0.0 --port 8000
+
+# 前端 UI (端口 3000)
+cd /home/rongye/ProgramFiles/ViGent/frontend
+npm run dev
+```
+
+## GPU 配置
+
+| GPU | 用途 |
+|-----|------|
+| GPU 0 (RTX 3090 24GB) | 其他服务 |
+| GPU 1 (RTX 3090 24GB) | MuseTalk 唇形同步 |
+
+## 访问地址
+
+- 视频生成: http://服务器IP:3000
+- 发布管理: http://服务器IP:3000/publish
+- API 文档: http://服务器IP:8000/docs
+
+## License
+
+MIT
--- a/Docs/implementation_plan.md
+++ b/Docs/implementation_plan.md
@@ -0,0 +1,305 @@
+# 数字人口播视频生成系统 - 实现计划
+
+## 项目目标
+
+构建一个开源的数字人口播视频生成系统，功能包括：
+- 上传静态人物视频 → 生成口播视频（唇形同步）
+- TTS 配音或声音克隆
+- 字幕自动生成与渲染
+- 一键发布到多个社交平台
+
+---
+
+## 技术架构
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                    前端 (Next.js)                        │
+│         素材管理 | 视频生成 | 发布管理 | 任务状态         │
+└─────────────────────────────────────────────────────────┘
+                            │ REST API
+                            ▼
+┌─────────────────────────────────────────────────────────┐
+│                   后端 (FastAPI)                         │
+├─────────────────────────────────────────────────────────┤
+│  Celery 任务队列 (Redis)                                 │
+│  ├── 视频生成任务                                        │
+│  ├── TTS 配音任务                                        │
+│  └── 自动发布任务                                        │
+└─────────────────────────────────────────────────────────┘
+        │               │               │
+        ▼               ▼               ▼
+  ┌──────────┐   ┌──────────┐   ┌──────────┐
+  │ MuseTalk │   │  FFmpeg  │   │Playwright│
+  │ 唇形同步  │   │ 视频合成  │   │ 自动发布  │
+  └──────────┘   └──────────┘   └──────────┘
+```
+
+---
+
+## 技术选型
+
+| 模块 | 技术选择 | 备选方案 |
+|------|----------|----------|
+| **前端框架** | Next.js 14 | Vue 3 + Vite |
+| **UI 组件库** | Tailwind + shadcn/ui | Ant Design |
+| **后端框架** | FastAPI | Flask |
+| **任务队列** | Celery + Redis | RQ / Dramatiq |
+| **唇形同步** | MuseTalk | Wav2Lip / SadTalker |
+| **TTS 配音** | EdgeTTS | CosyVoice |
+| **声音克隆** | GPT-SoVITS (可选) | - |
+| **视频处理** | FFmpeg | MoviePy |
+| **自动发布** | social-auto-upload | 自行实现 |
+| **数据库** | SQLite → PostgreSQL | MySQL |
+| **文件存储** | 本地 / MinIO | 阿里云 OSS |
+
+---
+
+## 分阶段实施计划
+
+### 阶段一：核心功能验证 (MVP)
+
+> **目标**：验证 MuseTalk + EdgeTTS 效果，跑通端到端流程
+
+#### 1.1 环境搭建
+
+```bash
+# 创建项目目录
+mkdir TalkingHeadAgent
+cd TalkingHeadAgent
+
+# 克隆 MuseTalk
+git clone https://github.com/TMElyralab/MuseTalk.git
+
+# 安装依赖
+cd MuseTalk
+pip install -r requirements.txt
+
+# 下载模型权重 (按官方文档)
+```
+
+#### 1.2 集成 EdgeTTS
+
+```python
+# tts_engine.py
+import edge_tts
+import asyncio
+
+async def text_to_speech(text: str, voice: str = "zh-CN-YunxiNeural", output_path: str = "output.mp3"):
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_path)
+    return output_path
+```
+
+#### 1.3 端到端测试脚本
+
+```python
+# test_pipeline.py
+"""
+1. 文案 → EdgeTTS → 音频
+2. 静态视频 + 音频 → MuseTalk → 口播视频
+3. 添加字幕 → FFmpeg → 最终视频
+"""
+```
+
+#### 1.4 验证标准
+- [ ] MuseTalk 能正常推理
+- [ ] 唇形与音频同步率 > 90%
+- [ ] 单个视频生成时间 < 2 分钟
+
+---
+
+### 阶段二：后端 API 开发
+
+> **目标**：将核心功能封装为 API，支持异步任务
+
+#### 2.1 项目结构
+
+```
+backend/
+├── app/
+│   ├── main.py              # FastAPI 入口
+│   ├── api/
+│   │   ├── videos.py        # 视频生成 API
+│   │   ├── materials.py     # 素材管理 API
+│   │   └── publish.py       # 发布管理 API
+│   ├── services/
+│   │   ├── tts_service.py   # TTS 服务
+│   │   ├── lipsync_service.py  # 唇形同步服务
+│   │   └── video_service.py    # 视频合成服务
+│   ├── tasks/
+│   │   └── celery_tasks.py  # Celery 异步任务
+│   ├── models/
+│   │   └── schemas.py       # Pydantic 模型
+│   └── core/
+│       └── config.py        # 配置管理
+├── requirements.txt
+└── docker-compose.yml       # Redis + API
+```
+
+#### 2.2 核心 API 设计
+
+| 端点 | 方法 | 功能 |
+|------|------|------|
+| `/api/materials` | POST | 上传素材视频 |
+| `/api/materials` | GET | 获取素材列表 |
+| `/api/videos/generate` | POST | 创建视频生成任务 |
+| `/api/tasks/{id}` | GET | 查询任务状态 |
+| `/api/videos/{id}/download` | GET | 下载生成的视频 |
+| `/api/publish` | POST | 发布到社交平台 |
+
+#### 2.3 Celery 任务定义
+
+```python
+# tasks/celery_tasks.py
+@celery.task
+def generate_video_task(material_id: str, text: str, voice: str):
+    # 1. TTS 生成音频
+    # 2. MuseTalk 唇形同步
+    # 3. FFmpeg 添加字幕
+    # 4. 保存并返回视频 URL
+    pass
+```
+
+---
+
+### 阶段三：前端 Web UI
+
+> **目标**：提供用户友好的操作界面
+
+#### 3.1 页面设计
+
+| 页面 | 功能 |
+|------|------|
+| **素材库** | 上传/管理多场景素材视频 |
+| **生成视频** | 输入文案、选择素材、生成预览 |
+| **任务中心** | 查看生成进度、下载视频 |
+| **发布管理** | 绑定平台、一键发布、定时发布 |
+
+#### 3.2 技术实现
+
+```bash
+# 创建 Next.js 项目
+npx create-next-app@latest frontend --typescript --tailwind --app
+
+# 安装依赖
+cd frontend
+npm install @tanstack/react-query axios
+```
+
+---
+
+### 阶段四：社交媒体发布
+
+> **目标**：集成 social-auto-upload，支持多平台发布
+
+#### 4.1 复用 social-auto-upload
+
+```bash
+# 复制模块
+cp -r SuperIPAgent/social-auto-upload backend/social_upload
+```
+
+#### 4.2 Cookie 管理
+
+```python
+# 用户通过浏览器登录 → 保存 Cookie → 后续自动发布
+```
+
+#### 4.3 支持平台
+- 抖音
+- 小红书
+- 微信视频号
+- 快手
+
+---
+
+### 阶段五：优化与扩展
+
+| 功能 | 实现方式 |
+|------|----------|
+| **声音克隆** | 集成 GPT-SoVITS，用自己的声音 |
+| **批量生成** | 上传 Excel/CSV，批量生成视频 |
+| **字幕编辑器** | 可视化调整字幕样式、位置 |
+| **Docker 部署** | 一键部署到云服务器 |
+
+---
+
+## 项目目录结构 (最终)
+
+```
+TalkingHeadAgent/
+├── frontend/                # Next.js 前端
+│   ├── app/
+│   ├── components/
+│   └── package.json
+├── backend/                 # FastAPI 后端
+│   ├── app/
+│   ├── MuseTalk/            # 唇形同步模型
+│   ├── social_upload/       # 社交发布模块
+│   └── requirements.txt
+├── docker-compose.yml       # 一键部署
+└── README.md
+```
+
+---
+
+## 开发时间估算
+
+| 阶段 | 预计时间 | 说明 |
+|------|----------|------|
+| 阶段一 | 2-3 天 | 环境搭建 + 效果验证 |
+| 阶段二 | 3-4 天 | 后端 API 开发 |
+| 阶段三 | 3-4 天 | 前端 UI 开发 |
+| 阶段四 | 2 天 | 社交发布集成 |
+| 阶段五 | 按需 | 持续优化 |
+
+**总计**：约 10-13 天可完成 MVP
+
+---
+
+## 验证计划
+
+### 阶段一验证
+1. 运行 `test_pipeline.py` 脚本
+2. 检查生成视频的唇形同步效果
+3. 确认音画同步
+
+### 阶段二验证
+1. 使用 Postman/curl 测试所有 API 端点
+2. 验证任务队列正常工作
+3. 检查视频生成完整流程
+
+### 阶段三验证
+1. 在浏览器中完成完整操作流程
+2. 验证上传、生成、下载功能
+3. 检查响应式布局
+
+### 阶段四验证
+1. 发布一个测试视频到抖音
+2. 验证定时发布功能
+3. 检查发布状态同步
+
+---
+
+## 硬件要求
+
+| 配置 | 最低要求 | 推荐配置 |
+|------|----------|----------|
+| **GPU** | NVIDIA GTX 1060 6GB | RTX 3060 12GB+ |
+| **内存** | 16GB | 32GB |
+| **存储** | 100GB SSD | 500GB SSD |
+| **CUDA** | 11.7+ | 12.0+ |
+
+---
+
+## 下一步行动
+
+1. **确认你的 GPU 配置** - MuseTalk 需要 NVIDIA GPU
+2. **选择开发起点** - 从阶段一开始验证效果
+3. **确定项目位置** - 在哪个目录创建项目
+
+---
+
+> [!IMPORTANT]
+> 请确认以上计划是否符合你的需求，有任何需要调整的地方请告诉我。
--- a/Docs/task_complete.md
+++ b/Docs/task_complete.md
@@ -0,0 +1,119 @@
+# ViGent 数字人口播系统 - 开发任务清单
+
+**项目**：ViGent 数字人口播视频生成系统  
+**服务器**：Dell R730 (2× RTX 3090 24GB)  
+**更新时间**：2026-01-13  
+**整体进度**：80%（核心功能验证通过，待服务器部署）
+
+## 📖 快速导航
+
+| 章节 | 说明 |
+|------|------|
+| [已完成任务](#-已完成任务) | Day 1 完成的功能 |
+| [后续规划](#️-后续规划) | 待办项目 |
+| [进度统计](#-进度统计) | 各模块完成度 |
+| [里程碑](#-里程碑) | 关键节点 |
+| [时间线](#-时间线) | 开发历程 |
+
+**相关文档**：
+- [Day 日志](file:///d:/CodingProjects/Antigravity/ViGent/Docs/DevLogs/) (Day1-)
+- [部署指南](file:///d:/CodingProjects/Antigravity/ViGent/TalkingHeadAgent/DEPLOY_MANUAL.md)
+
+---
+
+## ✅ 已完成任务
+
+### 阶段一：核心功能验证
+- [x] EdgeTTS 配音集成
+- [x] FFmpeg 视频合成
+- [x] MuseTalk 唇形同步 (代码集成)
+- [x] 端到端流程验证
+
+### 阶段二：后端 API 开发
+- [x] FastAPI 项目搭建
+- [x] 视频生成 API
+- [x] 素材管理 API
+- [x] 文件存储管理
+
+### 阶段三：前端 Web UI
+- [x] Next.js 项目初始化
+- [x] 视频生成页面
+- [x] 发布管理页面
+- [x] 任务状态展示
+
+### 阶段四：社交媒体发布
+- [x] Playwright 自动化框架
+- [x] Cookie 管理功能
+- [x] 多平台发布 UI
+- [ ] 定时发布功能
+
+### 阶段五：部署与文档
+- [x] 手动部署指南 (DEPLOY_MANUAL.md)
+- [x] 一键部署脚本 (deploy.sh)
+- [x] 环境配置模板 (.env.example)
+- [x] 项目文档 (README.md)
+
+---
+
+## 🛤️ 后续规划
+
+### 🔴 优先待办
+- [ ] 服务器环境部署
+- [ ] MuseTalk 模型权重下载
+- [ ] 唇形同步完整测试
+- [ ] 生产环境验证
+
+### 🟠 功能完善
+- [ ] 定时发布功能
+- [ ] 批量视频生成
+- [ ] 字幕样式编辑器
+
+### 🔵 长期探索
+- [ ] 声音克隆 (GPT-SoVITS)
+- [ ] Docker 容器化
+- [ ] Celery 分布式任务队列
+
+---
+
+## 📊 进度统计
+
+### 总体进度
+```
+████████████████░░░░ 80%
+```
+
+### 各模块进度
+
+| 模块 | 进度 | 状态 |
+|------|------|------|
+| 后端 API | 100% | ✅ 完成 |
+| 前端 UI | 100% | ✅ 完成 |
+| TTS 配音 | 100% | ✅ 完成 |
+| 视频合成 | 100% | ✅ 完成 |
+| 唇形同步 | 80% | ✅ 本地Fallback验证通过，待服务器部署 |
+| 社交发布 | 80% | 🔄 框架完成，待测试 |
+| 服务器部署 | 0% | ⏳ 待开始 |
+
+---
+
+## 🎯 里程碑
+
+### Milestone 1: 项目框架搭建 ✅
+**完成时间**: Day 1  
+**成果**: 
+- FastAPI 后端 + Next.js 前端
+- EdgeTTS + FFmpeg 集成
+- 视频生成端到端验证
+
+---
+
+## 📅 时间线
+
+```
+Day 1: 项目初始化 + 核心功能   ✅ 完成
+       - 后端 API 框架
+       - 前端 UI
+       - TTS + 视频合成
+       - 社交发布框架
+       - 部署文档
+```
--- a/README.md
+++ b/README.md
@@ -1,2 +1,138 @@
-# ViGent
+# ViGent - 数字人口播视频生成系统

+基于 **MuseTalk + EdgeTTS** 的开源数字人口播视频生成系统。
+
+> 📹 上传静态人物视频 → 🎙️ 输入口播文案 → 🎬 自动生成唇形同步视频
+
+---
+
+## ✨ 功能特性
+
+- 🎬 **唇形同步** - MuseTalk v1.5 驱动，AI 生成自然口型
+- 🎙️ **TTS 配音** - EdgeTTS 多音色支持（云溪、晓晓等）
+- 📱 **一键发布** - Playwright 自动发布到抖音、小红书、B站等
+- 🖥️ **Web UI** - Next.js 现代化界面
+
+## 🛠️ 技术栈
+
+| 模块 | 技术 |
+|------|------|
+| 前端 | Next.js 14 + TypeScript + TailwindCSS |
+| 后端 | FastAPI + Python 3.10 |
+| 唇形同步 | MuseTalk v1.5 (GPU) |
+| TTS | EdgeTTS |
+| 视频处理 | FFmpeg |
+| 自动发布 | Playwright |
+
+---
+
+## 📂 项目结构
+
+```
+ViGent/
+├── backend/              # FastAPI 后端
+│   ├── app/
+│   │   ├── api/          # API 路由
+│   │   ├── services/     # 核心服务 (TTS, LipSync, Video)
+│   │   └── core/         # 配置
+│   ├── requirements.txt
+│   └── .env.example
+├── frontend/             # Next.js 前端
+│   └── src/app/
+├── models/               # AI 模型
+│   └── MuseTalk/         # 唇形同步模型
+│       └── DEPLOY.md     # MuseTalk 部署指南
+└── Docs/                 # 文档
+    ├── task_complete.md
+    └── DevLogs/
+```
+
+---
+
+## 🚀 快速开始
+
+### 1. 克隆项目
+
+```bash
+git clone <仓库地址> /home/rongye/ProgramFiles/ViGent
+cd /home/rongye/ProgramFiles/ViGent
+```
+
+### 2. 安装后端
+
+```bash
+cd backend
+python -m venv venv
+source venv/bin/activate  # Windows: venv\Scripts\activate
+pip install -r requirements.txt
+cp .env.example .env
+```
+
+### 3. 安装前端
+
+```bash
+cd frontend
+npm install
+```
+
+### 4. 安装 MuseTalk (服务器)
+
+详见 [models/MuseTalk/DEPLOY.md](models/MuseTalk/DEPLOY.md)
+
+```bash
+cd models/MuseTalk
+# 按照 DEPLOY.md 步骤安装
+```
+
+### 5. 启动服务
+
+```bash
+# 终端 1: 后端 (端口 8000)
+cd backend && source venv/bin/activate
+uvicorn app.main:app --host 0.0.0.0 --port 8000
+
+# 终端 2: 前端 (端口 3000)
+cd frontend
+npm run dev
+```
+
+---
+
+## 🖥️ 服务器配置
+
+**目标服务器**: Dell PowerEdge R730
+
+| 配置 | 规格 |
+|------|------|
+| CPU | 2× Intel Xeon E5-2680 v4 (56 线程) |
+| 内存 | 192GB DDR4 |
+| GPU | 2× NVIDIA RTX 3090 24GB |
+| 存储 | 4.47TB |
+
+**GPU 分配**:
+- GPU 0: 其他服务
+- GPU 1: MuseTalk 唇形同步
+
+---
+
+## 🌐 访问地址
+
+| 服务 | 地址 |
+|------|------|
+| 视频生成 | http://服务器IP:3000 |
+| 发布管理 | http://服务器IP:3000/publish |
+| API 文档 | http://服务器IP:8000/docs |
+
+---
+
+## 📖 文档
+
+- [MuseTalk 部署指南](models/MuseTalk/DEPLOY.md)
+- [开发日志](Docs/DevLogs/)
+- [任务进度](Docs/task_complete.md)
+
+---
+
+## 📄 License
+
+MIT
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -0,0 +1,39 @@
+# ViGent 环境配置示例
+# 复制此文件为 .env 并填入实际值
+
+# 调试模式
+DEBUG=true
+
+# Redis 配置 (Celery 任务队列)
+REDIS_URL=redis://localhost:6379/0
+
+# =============== TTS 配置 ===============
+# 默认 TTS 音色
+DEFAULT_TTS_VOICE=zh-CN-YunxiNeural
+
+# =============== MuseTalk 配置 ===============
+# GPU 选择 (0=第一块GPU, 1=第二块GPU)
+MUSETALK_GPU_ID=1
+
+# 使用本地模式 (true) 或远程 API (false)
+MUSETALK_LOCAL=true
+
+# 远程 API 地址 (仅 MUSETALK_LOCAL=false 时使用)
+# MUSETALK_API_URL=http://localhost:8001
+
+# 模型版本 (v1 或 v15，推荐 v15)
+MUSETALK_VERSION=v15
+
+# 推理批次大小 (根据 GPU 显存调整，RTX 3090 可用 8-16)
+MUSETALK_BATCH_SIZE=8
+
+# 使用半精度加速 (推荐开启，减少显存占用)
+MUSETALK_USE_FLOAT16=true
+
+# =============== 上传配置 ===============
+# 最大上传文件大小 (MB)
+MAX_UPLOAD_SIZE_MB=500
+
+# =============== FFmpeg 配置 ===============
+# FFmpeg 路径 (如果不在系统 PATH 中)
+# FFMPEG_PATH=/usr/bin/ffmpeg
--- a/backend/app/init.py
+++ b/backend/app/init.py
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
--- a/backend/app/api/materials.py
+++ b/backend/app/api/materials.py
@@ -0,0 +1,53 @@
+from fastapi import APIRouter, UploadFile, File, HTTPException
+from app.core.config import settings
+import shutil
+import uuid
+from pathlib import Path
+
+router = APIRouter()
+
+@router.post("/")
+async def upload_material(file: UploadFile = File(...)):
+    if not file.filename.lower().endswith(('.mp4', '.mov', '.avi')):
+        raise HTTPException(400, "Invalid format")
+    
+    file_id = str(uuid.uuid4())
+    ext = Path(file.filename).suffix
+    save_path = settings.UPLOAD_DIR / "materials" / f"{file_id}{ext}"
+    
+    # Save file
+    with open(save_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    
+    # Calculate size
+    size_mb = save_path.stat().st_size / (1024 * 1024)
+        
+    return {
+        "id": file_id,
+        "name": file.filename, 
+        "path": f"uploads/materials/{file_id}{ext}",
+        "size_mb": size_mb,
+        "type": "video"
+    }
+
+@router.get("/")
+async def list_materials():
+    materials_dir = settings.UPLOAD_DIR / "materials"
+    files = []
+    if materials_dir.exists():
+        for f in materials_dir.glob("*"):
+            try:
+                stat = f.stat()
+                files.append({
+                    "id": f.stem,
+                    "name": f.name,
+                    "path": f"uploads/materials/{f.name}",
+                    "size_mb": stat.st_size / (1024 * 1024),
+                    "type": "video",
+                    "created_at": stat.st_ctime
+                })
+            except Exception:
+                continue
+    # Sort by creation time desc
+    files.sort(key=lambda x: x.get("created_at", 0), reverse=True)
+    return {"materials": files}
--- a/backend/app/api/publish.py
+++ b/backend/app/api/publish.py
@@ -0,0 +1,59 @@
+"""
+发布管理 API
+"""
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from pydantic import BaseModel
+from typing import List, Optional
+from datetime import datetime
+from loguru import logger
+from app.services.publish_service import PublishService
+
+router = APIRouter()
+publish_service = PublishService()
+
+class PublishRequest(BaseModel):
+    video_path: str
+    platform: str
+    title: str
+    tags: List[str] = []
+    description: str = ""
+    publish_time: Optional[datetime] = None
+
+class PublishResponse(BaseModel):
+    success: bool
+    message: str
+    platform: str
+    url: Optional[str] = None
+
+@router.post("/", response_model=PublishResponse)
+async def publish_video(request: PublishRequest, background_tasks: BackgroundTasks):
+    try:
+        result = await publish_service.publish(
+            video_path=request.video_path,
+            platform=request.platform,
+            title=request.title,
+            tags=request.tags,
+            description=request.description,
+            publish_time=request.publish_time
+        )
+        return PublishResponse(
+            success=result.get("success", False),
+            message=result.get("message", ""),
+            platform=request.platform,
+            url=result.get("url")
+        )
+    except Exception as e:
+        logger.error(f"发布失败: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.get("/platforms")
+async def list_platforms():
+    return {"platforms": [{"id": pid, **pinfo} for pid, pinfo in publish_service.PLATFORMS.items()]}
+
+@router.get("/accounts")
+async def list_accounts():
+    return {"accounts": publish_service.get_accounts()}
+
+@router.post("/login/{platform}")
+async def login_platform(platform: str):
+    return await publish_service.login(platform)
--- a/backend/app/api/videos.py
+++ b/backend/app/api/videos.py
@@ -0,0 +1,85 @@
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from pydantic import BaseModel
+from typing import Optional
+from pathlib import Path
+import uuid
+import traceback
+from app.services.tts_service import TTSService
+from app.services.video_service import VideoService
+from app.services.lipsync_service import LipSyncService
+from app.core.config import settings
+
+router = APIRouter()
+
+class GenerateRequest(BaseModel):
+    text: str
+    voice: str = "zh-CN-YunxiNeural"
+    material_path: str
+
+tasks = {} # In-memory task store
+
+async def _process_video_generation(task_id: str, req: GenerateRequest):
+    try:
+        # Resolve path if it's relative
+        input_material_path = Path(req.material_path)
+        if not input_material_path.is_absolute():
+            input_material_path = settings.BASE_DIR.parent / req.material_path
+            
+        tasks[task_id]["status"] = "processing"
+        tasks[task_id]["progress"] = 5
+        tasks[task_id]["message"] = "Initializing generation..."
+        
+        # 1. TTS
+        tasks[task_id]["message"] = "Generating Audio (TTS)..."
+        tts = TTSService()
+        audio_path = settings.OUTPUT_DIR / f"{task_id}_audio.mp3"
+        await tts.generate_audio(req.text, req.voice, str(audio_path))
+        
+        tasks[task_id]["progress"] = 30
+        
+        # 2. LipSync
+        tasks[task_id]["message"] = "Synthesizing Video (MuseTalk)..."
+        lipsync = LipSyncService()
+        lipsync_video_path = settings.OUTPUT_DIR / f"{task_id}_lipsync.mp4"
+        
+        # Check health and generate
+        if await lipsync.check_health():
+             await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
+        else:
+             # Skip lipsync if not available
+             import shutil
+             shutil.copy(str(input_material_path), lipsync_video_path)
+
+        tasks[task_id]["progress"] = 80
+        
+        # 3. Composition
+        tasks[task_id]["message"] = "Final compositing..."
+        video = VideoService()
+        final_output = settings.OUTPUT_DIR / f"{task_id}_output.mp4"
+        await video.compose(str(lipsync_video_path), str(audio_path), str(final_output))
+        
+        tasks[task_id]["status"] = "completed"
+        tasks[task_id]["progress"] = 100
+        tasks[task_id]["message"] = "Generation Complete!"
+        tasks[task_id]["output"] = str(final_output)
+        tasks[task_id]["download_url"] = f"/outputs/{final_output.name}"
+        
+    except Exception as e:
+        tasks[task_id]["status"] = "failed"
+        tasks[task_id]["message"] = f"Error: {str(e)}"
+        tasks[task_id]["error"] = traceback.format_exc()
+
+@router.post("/generate")
+async def generate_video(req: GenerateRequest, background_tasks: BackgroundTasks):
+    task_id = str(uuid.uuid4())
+    tasks[task_id] = {"status": "pending", "task_id": task_id}
+    background_tasks.add_task(_process_video_generation, task_id, req)
+    return {"task_id": task_id}
+
+@router.get("/tasks/{task_id}")
+async def get_task(task_id: str):
+    return tasks.get(task_id, {"status": "not_found"})
+
+@router.get("/tasks")
+async def list_tasks():
+    return {"tasks": list(tasks.values())}
--- a/backend/app/core/init.py
+++ b/backend/app/core/init.py
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -0,0 +1,36 @@
+from pydantic_settings import BaseSettings
+from pathlib import Path
+from typing import Literal
+
+class Settings(BaseSettings):
+    # 基础路径配置
+    BASE_DIR: Path = Path(__file__).resolve().parent.parent
+    UPLOAD_DIR: Path = BASE_DIR.parent / "uploads"
+    OUTPUT_DIR: Path = BASE_DIR.parent / "outputs"
+    
+    # 数据库/缓存
+    REDIS_URL: str = "redis://localhost:6379/0"
+    DEBUG: bool = True
+    
+    # TTS 配置
+    DEFAULT_TTS_VOICE: str = "zh-CN-YunxiNeural"
+    MAX_UPLOAD_SIZE_MB: int = 500
+    
+    # MuseTalk 配置
+    MUSETALK_GPU_ID: int = 1                    # GPU ID (默认使用 GPU1)
+    MUSETALK_LOCAL: bool = True                 # 使用本地推理 (False 则使用远程 API)
+    MUSETALK_API_URL: str = "http://localhost:8001"  # 远程 API 地址
+    MUSETALK_VERSION: Literal["v1", "v15"] = "v15"   # 模型版本
+    MUSETALK_BATCH_SIZE: int = 8                # 推理批次大小
+    MUSETALK_USE_FLOAT16: bool = True           # 使用半精度加速
+    
+    @property
+    def MUSETALK_DIR(self) -> Path:
+        """MuseTalk 目录路径 (动态计算)"""
+        return self.BASE_DIR.parent.parent / "models" / "MuseTalk"
+
+    class Config:
+        env_file = ".env"
+        extra = "ignore"  # 忽略未知的环境变量
+
+settings = Settings()
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -0,0 +1,32 @@
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from app.core import config
+from app.api import materials, videos, publish
+
+settings = config.settings
+
+app = FastAPI(title="ViGent TalkingHead Agent")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Create dirs
+settings.UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+settings.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+(settings.UPLOAD_DIR / "materials").mkdir(exist_ok=True)
+
+app.mount("/outputs", StaticFiles(directory=str(settings.OUTPUT_DIR)), name="outputs")
+
+app.include_router(materials.router, prefix="/api/materials", tags=["Materials"])
+app.include_router(videos.router, prefix="/api/videos", tags=["Videos"])
+app.include_router(publish.router, prefix="/api/publish", tags=["Publish"])
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
--- a/backend/app/services/lipsync_service.py
+++ b/backend/app/services/lipsync_service.py
@@ -0,0 +1,448 @@
+"""
+唇形同步服务
+支持本地 MuseTalk 推理 (Python API) 或远程 MuseTalk API
+配置为使用 GPU1 (CUDA:1)
+"""
+import os
+import sys
+import shutil
+import subprocess
+import tempfile
+import httpx
+from pathlib import Path
+from loguru import logger
+from typing import Optional, Any
+
+from app.core.config import settings
+
+# 设置 MuseTalk 使用 GPU1 (在导入 torch 之前设置)
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(settings.MUSETALK_GPU_ID))
+
+
+class LipSyncService:
+    """唇形同步服务 - MuseTalk 集成"""
+    
+    def __init__(self):
+        self.use_local = settings.MUSETALK_LOCAL
+        self.api_url = settings.MUSETALK_API_URL
+        self.version = settings.MUSETALK_VERSION
+        self.musetalk_dir = settings.MUSETALK_DIR
+        
+        # 模型相关 (懒加载)
+        self._model_loaded = False
+        self._vae = None
+        self._unet = None
+        self._pe = None
+        self._whisper = None
+        self._audio_processor = None
+        self._face_parser = None
+        self._device = None
+        
+        # 运行时检测
+        self._gpu_available: Optional[bool] = None
+        self._weights_available: Optional[bool] = None
+    
+    def _check_gpu(self) -> bool:
+        """检查 GPU 是否可用"""
+        if self._gpu_available is not None:
+            return self._gpu_available
+        
+        try:
+            import torch
+            self._gpu_available = torch.cuda.is_available()
+            if self._gpu_available:
+                device_name = torch.cuda.get_device_name(0)
+                logger.info(f"✅ GPU 可用: {device_name}")
+            else:
+                logger.warning("⚠️ GPU 不可用，将使用 Fallback 模式")
+        except ImportError:
+            self._gpu_available = False
+            logger.warning("⚠️ PyTorch 未安装，将使用 Fallback 模式")
+        
+        return self._gpu_available
+    
+    def _check_weights(self) -> bool:
+        """检查模型权重是否存在"""
+        if self._weights_available is not None:
+            return self._weights_available
+        
+        # 检查关键权重文件
+        required_dirs = [
+            self.musetalk_dir / "models" / "musetalkV15",
+            self.musetalk_dir / "models" / "whisper",
+        ]
+        
+        self._weights_available = all(d.exists() for d in required_dirs)
+        
+        if self._weights_available:
+            logger.info("✅ MuseTalk 权重文件已就绪")
+        else:
+            missing = [str(d) for d in required_dirs if not d.exists()]
+            logger.warning(f"⚠️ 缺少权重文件: {missing}")
+        
+        return self._weights_available
+    
+    def _load_models(self):
+        """懒加载 MuseTalk 模型 (Python API 方式)"""
+        if self._model_loaded:
+            return True
+        
+        if not self._check_gpu() or not self._check_weights():
+            return False
+        
+        logger.info("🔄 加载 MuseTalk 模型到 GPU...")
+        
+        try:
+            # 添加 MuseTalk 到 Python 路径
+            if str(self.musetalk_dir) not in sys.path:
+                sys.path.insert(0, str(self.musetalk_dir))
+                logger.debug(f"Added to sys.path: {self.musetalk_dir}")
+            
+            import torch
+            from omegaconf import OmegaConf
+            from transformers import WhisperModel
+            
+            # 导入 MuseTalk 模块
+            from musetalk.utils.utils import load_all_model
+            from musetalk.utils.audio_processor import AudioProcessor
+            from musetalk.utils.face_parsing import FaceParsing
+            
+            # 设置设备 (CUDA_VISIBLE_DEVICES=1 后，可见设备变为 cuda:0)
+            self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+            
+            # 加载模型
+            unet_model_path = str(self.musetalk_dir / "models" / "musetalkV15" / "unet.pth")
+            unet_config = str(self.musetalk_dir / "models" / "musetalk" / "config.json")
+            whisper_dir = str(self.musetalk_dir / "models" / "whisper")
+            
+            self._vae, self._unet, self._pe = load_all_model(
+                unet_model_path=unet_model_path,
+                vae_type="sd-vae",
+                unet_config=unet_config,
+                device=self._device
+            )
+            
+            # 使用半精度加速
+            if settings.MUSETALK_USE_FLOAT16:
+                self._pe = self._pe.half()
+                self._vae.vae = self._vae.vae.half()
+                self._unet.model = self._unet.model.half()
+            
+            # 移动到 GPU
+            self._pe = self._pe.to(self._device)
+            self._vae.vae = self._vae.vae.to(self._device)
+            self._unet.model = self._unet.model.to(self._device)
+            
+            # 加载 Whisper
+            weight_dtype = self._unet.model.dtype
+            self._whisper = WhisperModel.from_pretrained(whisper_dir)
+            self._whisper = self._whisper.to(device=self._device, dtype=weight_dtype).eval()
+            self._whisper.requires_grad_(False)
+            
+            # 音频处理器
+            self._audio_processor = AudioProcessor(feature_extractor_path=whisper_dir)
+            
+            # 人脸解析器 (v15 版本支持更多参数)
+            if self.version == "v15":
+                self._face_parser = FaceParsing(
+                    left_cheek_width=90,
+                    right_cheek_width=90
+                )
+            else:
+                self._face_parser = FaceParsing()
+            
+            self._model_loaded = True
+            logger.info("✅ MuseTalk 模型加载完成")
+            return True
+            
+        except Exception as e:
+            logger.error(f"❌ MuseTalk 模型加载失败: {e}")
+            import traceback
+            logger.debug(traceback.format_exc())
+            return False
+    
+    async def generate(
+        self, 
+        video_path: str, 
+        audio_path: str, 
+        output_path: str, 
+        fps: int = 25
+    ) -> str:
+        """生成唇形同步视频"""
+        logger.info(f"🎬 唇形同步任务: {Path(video_path).name} + {Path(audio_path).name}")
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        # 决定使用哪种模式
+        if self.use_local:
+            if self._load_models():
+                return await self._local_generate_api(video_path, audio_path, output_path, fps)
+            else:
+                logger.warning("⚠️ 本地推理失败，尝试 subprocess 方式")
+                return await self._local_generate_subprocess(video_path, audio_path, output_path, fps)
+        else:
+            return await self._remote_generate(video_path, audio_path, output_path, fps)
+    
+    async def _local_generate_api(
+        self, 
+        video_path: str, 
+        audio_path: str, 
+        output_path: str, 
+        fps: int
+    ) -> str:
+        """使用 Python API 进行本地推理"""
+        import torch
+        import cv2
+        import copy
+        import glob
+        import pickle
+        import numpy as np
+        from tqdm import tqdm
+        
+        from musetalk.utils.utils import get_file_type, get_video_fps, datagen
+        from musetalk.utils.preprocessing import get_landmark_and_bbox, read_imgs, coord_placeholder
+        from musetalk.utils.blending import get_image
+        
+        logger.info("🔄 开始 MuseTalk 推理 (Python API)...")
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tmpdir = Path(tmpdir)
+            result_img_dir = tmpdir / "frames"
+            result_img_dir.mkdir()
+            
+            # 1. 提取视频帧
+            logger.info("📹 提取视频帧...")
+            if get_file_type(video_path) == "video":
+                frames_dir = tmpdir / "input_frames"
+                frames_dir.mkdir()
+                cmd = f'ffmpeg -v fatal -i "{video_path}" -start_number 0 "{frames_dir}/%08d.png"'
+                subprocess.run(cmd, shell=True, check=True)
+                input_img_list = sorted(glob.glob(str(frames_dir / "*.png")))
+                video_fps = get_video_fps(video_path)
+            else:
+                input_img_list = [video_path]
+                video_fps = fps
+            
+            # 2. 提取音频特征
+            logger.info("🎵 提取音频特征...")
+            whisper_input_features, librosa_length = self._audio_processor.get_audio_feature(audio_path)
+            weight_dtype = self._unet.model.dtype
+            whisper_chunks = self._audio_processor.get_whisper_chunk(
+                whisper_input_features,
+                self._device,
+                weight_dtype,
+                self._whisper,
+                librosa_length,
+                fps=video_fps,
+                audio_padding_length_left=2,
+                audio_padding_length_right=2,
+            )
+            
+            # 3. 预处理图像
+            logger.info("🧑 检测人脸关键点...")
+            coord_list, frame_list = get_landmark_and_bbox(input_img_list, bbox_shift=0)
+            
+            # 4. 编码潜在表示
+            logger.info("🔢 编码图像潜在表示...")
+            input_latent_list = []
+            for bbox, frame in zip(coord_list, frame_list):
+                if bbox == coord_placeholder:
+                    continue
+                x1, y1, x2, y2 = bbox
+                if self.version == "v15":
+                    y2 = min(y2 + 10, frame.shape[0])
+                crop_frame = frame[y1:y2, x1:x2]
+                crop_frame = cv2.resize(crop_frame, (256, 256), interpolation=cv2.INTER_LANCZOS4)
+                latents = self._vae.get_latents_for_unet(crop_frame)
+                input_latent_list.append(latents)
+            
+            # 循环帧列表
+            frame_list_cycle = frame_list + frame_list[::-1]
+            coord_list_cycle = coord_list + coord_list[::-1]
+            input_latent_list_cycle = input_latent_list + input_latent_list[::-1]
+            
+            # 5. 批量推理
+            logger.info("🤖 执行 MuseTalk 推理...")
+            timesteps = torch.tensor([0], device=self._device)
+            batch_size = settings.MUSETALK_BATCH_SIZE
+            video_num = len(whisper_chunks)
+            
+            gen = datagen(
+                whisper_chunks=whisper_chunks,
+                vae_encode_latents=input_latent_list_cycle,
+                batch_size=batch_size,
+                delay_frame=0,
+                device=self._device,
+            )
+            
+            res_frame_list = []
+            total = int(np.ceil(float(video_num) / batch_size))
+            
+            with torch.no_grad():
+                for i, (whisper_batch, latent_batch) in enumerate(tqdm(gen, total=total, desc="推理")):
+                    audio_feature_batch = self._pe(whisper_batch)
+                    latent_batch = latent_batch.to(dtype=self._unet.model.dtype)
+                    pred_latents = self._unet.model(
+                        latent_batch, timesteps, encoder_hidden_states=audio_feature_batch
+                    ).sample
+                    recon = self._vae.decode_latents(pred_latents)
+                    for res_frame in recon:
+                        res_frame_list.append(res_frame)
+            
+            # 6. 合成结果帧
+            logger.info("🖼️ 合成结果帧...")
+            for i, res_frame in enumerate(tqdm(res_frame_list, desc="合成")):
+                bbox = coord_list_cycle[i % len(coord_list_cycle)]
+                ori_frame = copy.deepcopy(frame_list_cycle[i % len(frame_list_cycle)])
+                x1, y1, x2, y2 = bbox
+                
+                if self.version == "v15":
+                    y2 = min(y2 + 10, ori_frame.shape[0])
+                
+                try:
+                    res_frame = cv2.resize(res_frame.astype(np.uint8), (x2 - x1, y2 - y1))
+                except:
+                    continue
+                
+                if self.version == "v15":
+                    combine_frame = get_image(
+                        ori_frame, res_frame, [x1, y1, x2, y2], 
+                        mode="jaw", fp=self._face_parser
+                    )
+                else:
+                    combine_frame = get_image(ori_frame, res_frame, [x1, y1, x2, y2], fp=self._face_parser)
+                
+                cv2.imwrite(str(result_img_dir / f"{i:08d}.png"), combine_frame)
+            
+            # 7. 合成视频
+            logger.info("🎬 合成最终视频...")
+            temp_video = tmpdir / "temp_video.mp4"
+            cmd_video = f'ffmpeg -y -v warning -r {video_fps} -f image2 -i "{result_img_dir}/%08d.png" -vcodec libx264 -vf format=yuv420p -crf 18 "{temp_video}"'
+            subprocess.run(cmd_video, shell=True, check=True)
+            
+            # 8. 添加音频
+            cmd_audio = f'ffmpeg -y -v warning -i "{audio_path}" -i "{temp_video}" -c:v copy -c:a aac -shortest "{output_path}"'
+            subprocess.run(cmd_audio, shell=True, check=True)
+            
+            logger.info(f"✅ 唇形同步完成: {output_path}")
+            return output_path
+    
+    async def _local_generate_subprocess(
+        self, 
+        video_path: str, 
+        audio_path: str, 
+        output_path: str, 
+        fps: int
+    ) -> str:
+        """使用 subprocess 调用 MuseTalk CLI"""
+        logger.info("🔄 使用 subprocess 调用 MuseTalk...")
+        
+        # 如果权重不存在，直接 fallback
+        if not self._check_weights():
+            logger.warning("⚠️ 权重不存在，使用 Fallback 模式")
+            shutil.copy(video_path, output_path)
+            return output_path
+        
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # 创建临时配置文件
+            config_path = Path(tmpdir) / "inference_config.yaml"
+            config_content = f"""
+task1:
+  video_path: "{video_path}"
+  audio_path: "{audio_path}"
+  result_name: "output.mp4"
+"""
+            config_path.write_text(config_content)
+            
+            result_dir = Path(tmpdir) / "results"
+            result_dir.mkdir()
+            
+            cmd = [
+                sys.executable, "-m", "scripts.inference",
+                "--version", self.version,
+                "--inference_config", str(config_path),
+                "--result_dir", str(result_dir),
+                "--gpu_id", "0",  # 因为 CUDA_VISIBLE_DEVICES 已设置
+            ]
+            
+            if settings.MUSETALK_USE_FLOAT16:
+                cmd.append("--use_float16")
+            
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.musetalk_dir),
+                capture_output=True,
+                text=True,
+                env={**os.environ, "CUDA_VISIBLE_DEVICES": str(settings.MUSETALK_GPU_ID)}
+            )
+            
+            if result.returncode != 0:
+                logger.error(f"MuseTalk CLI 失败: {result.stderr}")
+                # Fallback
+                shutil.copy(video_path, output_path)
+                return output_path
+            
+            # 查找输出文件
+            output_files = list(result_dir.rglob("*.mp4"))
+            if output_files:
+                shutil.copy(output_files[0], output_path)
+                logger.info(f"✅ 唇形同步完成: {output_path}")
+            else:
+                logger.warning("⚠️ 未找到输出文件，使用 Fallback")
+                shutil.copy(video_path, output_path)
+            
+            return output_path
+    
+    async def _remote_generate(
+        self, 
+        video_path: str, 
+        audio_path: str, 
+        output_path: str, 
+        fps: int
+    ) -> str:
+        """调用远程 MuseTalk API 服务"""
+        logger.info(f"📡 调用远程 API: {self.api_url}")
+        
+        try:
+            async with httpx.AsyncClient(timeout=300.0) as client:
+                # 上传文件
+                with open(video_path, "rb") as vf, open(audio_path, "rb") as af:
+                    files = {
+                        "video": (Path(video_path).name, vf, "video/mp4"),
+                        "audio": (Path(audio_path).name, af, "audio/mpeg"),
+                    }
+                    data = {"fps": fps}
+                    
+                    response = await client.post(
+                        f"{self.api_url}/lipsync",
+                        files=files,
+                        data=data
+                    )
+                
+                if response.status_code == 200:
+                    # 保存响应视频
+                    with open(output_path, "wb") as f:
+                        f.write(response.content)
+                    logger.info(f"✅ 远程推理完成: {output_path}")
+                    return output_path
+                else:
+                    raise RuntimeError(f"API 错误: {response.status_code} - {response.text}")
+                    
+        except Exception as e:
+            logger.error(f"远程 API 调用失败: {e}")
+            # Fallback
+            shutil.copy(video_path, output_path)
+            return output_path
+    
+    async def check_health(self) -> bool:
+        """健康检查"""
+        if self.use_local:
+            gpu_ok = self._check_gpu()
+            weights_ok = self._check_weights()
+            return gpu_ok and weights_ok
+        else:
+            try:
+                async with httpx.AsyncClient(timeout=5.0) as client:
+                    response = await client.get(f"{self.api_url}/health")
+                    return response.status_code == 200
+            except:
+                return False
--- a/backend/app/services/publish_service.py
+++ b/backend/app/services/publish_service.py
@@ -0,0 +1,71 @@
+"""
+发布服务 (Playwright)
+"""
+from playwright.async_api import async_playwright
+from pathlib import Path
+import json
+import asyncio
+from loguru import logger
+from app.core.config import settings
+
+class PublishService:
+    PLATFORMS = {
+        "douyin": {"name": "抖音", "url": "https://creator.douyin.com/"},
+        "xiaohongshu": {"name": "小红书", "url": "https://creator.xiaohongshu.com/"},
+        "weixin": {"name": "微信视频号", "url": "https://channels.weixin.qq.com/"},
+        "kuaishou": {"name": "快手", "url": "https://cp.kuaishou.com/"},
+        "bilibili": {"name": "B站", "url": "https://member.bilibili.com/platform/upload/video/frame"},
+    }
+    
+    def __init__(self):
+        self.cookies_dir = settings.BASE_DIR / "cookies"
+        self.cookies_dir.mkdir(exist_ok=True)
+
+    def get_accounts(self):
+        accounts = []
+        for pid, pinfo in self.PLATFORMS.items():
+            cookie_file = self.cookies_dir / f"{pid}_cookies.json"
+            accounts.append({
+                "platform": pid,
+                "name": pinfo["name"],
+                "logged_in": cookie_file.exists(),
+                "enabled": True
+            })
+        return accounts
+
+    async def login(self, platform: str):
+        if platform not in self.PLATFORMS:
+            raise ValueError("Unsupported platform")
+            
+        pinfo = self.PLATFORMS[platform]
+        logger.info(f"Logging in to {platform}...")
+        
+        async with async_playwright() as p:
+            browser = await p.chromium.launch(headless=False)
+            context = await browser.new_context()
+            page = await context.new_page()
+            
+            await page.goto(pinfo["url"])
+            logger.info("Please login manually in the browser window...")
+            
+            # Wait for user input (naive check via title or url change, or explicit timeout)
+            # For simplicity in restore, wait for 60s or until manually closed? 
+            # In a real API, this blocks. 
+            # We implemented a simplistic wait in the previous iteration.
+            try:
+                await page.wait_for_timeout(45000) # Give user 45s to login
+                cookies = await context.cookies()
+                cookie_path = self.cookies_dir / f"{platform}_cookies.json"
+                with open(cookie_path, "w") as f:
+                    json.dump(cookies, f)
+                return {"success": True, "message": f"Login {platform} successful"}
+            except Exception as e:
+                return {"success": False, "message": str(e)}
+            finally:
+                await browser.close()
+
+    async def publish(self, video_path: str, platform: str, title: str, **kwargs):
+        # Placeholder for actual automation logic
+        # Real implementation requires complex selectors per platform
+        await asyncio.sleep(2)
+        return {"success": True, "message": f"Published to {platform} (Mock)", "url": ""}
--- a/backend/app/services/tts_service.py
+++ b/backend/app/services/tts_service.py
@@ -0,0 +1,33 @@
+"""
+TTS 服务 (EdgeTTS)
+"""
+import edge_tts
+import asyncio
+from pathlib import Path
+from loguru import logger
+
+class TTSService:
+    VOICES = {
+        "zh-CN-YunxiNeural": "云希 (男, 轻松)",
+        "zh-CN-YunjianNeural": "云健 (男, 体育)",
+        "zh-CN-YunyangNeural": "云扬 (男, 专业)",
+        "zh-CN-XiaoxiaoNeural": "晓晓 (女, 活泼)",
+        "zh-CN-XiaoyiNeural": "晓伊 (女, 卡通)",
+    }
+
+    async def generate_audio(self, text: str, voice: str, output_path: str) -> str:
+        """生成语音"""
+        logger.info(f"TTS Generating: {text[:20]}... ({voice})")
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            communicate = edge_tts.Communicate(text, voice)
+            await communicate.save(output_path)
+            # Create SUBTITLES (vtt -> srt conversion logic omitted for brevity in restore)
+            return output_path
+        except Exception as e:
+            logger.error(f"TTS Failed: {e}")
+            raise
+
+    async def list_voices(self):
+        return [{"id": k, "name": v} for k, v in self.VOICES.items()]
--- a/backend/app/services/video_service.py
+++ b/backend/app/services/video_service.py
@@ -0,0 +1,95 @@
+"""
+视频合成服务
+"""
+import os
+import subprocess
+import json
+from pathlib import Path
+from loguru import logger
+from typing import Optional
+
+class VideoService:
+    def __init__(self):
+        pass
+
+    def _run_ffmpeg(self, cmd: list) -> bool:
+        cmd_str = ' '.join(f'"{c}"' if ' ' in c or '\\' in c else c for c in cmd)
+        logger.debug(f"FFmpeg CMD: {cmd_str}")
+        try:
+            # Synchronous call for BackgroundTasks compatibility
+            result = subprocess.run(
+                cmd_str,
+                shell=True,
+                capture_output=True,
+                text=True,
+                encoding='utf-8',
+            )
+            if result.returncode != 0:
+                logger.error(f"FFmpeg Error: {result.stderr}")
+                return False
+            return True
+        except Exception as e:
+            logger.error(f"FFmpeg Exception: {e}")
+            return False
+
+    def _get_duration(self, file_path: str) -> float:
+        # Synchronous call for BackgroundTasks compatibility
+        cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "{file_path}"'
+        try:
+            result = subprocess.run(
+                cmd,
+                shell=True,
+                capture_output=True,
+                text=True,
+            )
+            return float(result.stdout.strip())
+        except Exception:
+            return 0.0
+
+    async def compose(
+        self,
+        video_path: str,
+        audio_path: str,
+        output_path: str,
+        subtitle_path: Optional[str] = None
+    ) -> str:
+        """合成视频"""
+        # Ensure output dir
+        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        video_duration = self._get_duration(video_path)
+        audio_duration = self._get_duration(audio_path)
+        
+        # Audio loop if needed
+        loop_count = 1
+        if audio_duration > video_duration and video_duration > 0:
+            loop_count = int(audio_duration / video_duration) + 1
+            
+        cmd = ["ffmpeg", "-y"]
+        
+        # Input video (stream_loop must be before -i)
+        if loop_count > 1:
+            cmd.extend(["-stream_loop", str(loop_count)])
+        cmd.extend(["-i", video_path])
+        
+        # Input audio
+        cmd.extend(["-i", audio_path])
+        
+        # Filter complex
+        filter_complex = []
+        
+        # Subtitles (skip for now to mimic previous state or implement basic)
+        # Previous state: subtitles disabled due to font issues
+        # if subtitle_path: ...
+        
+        # Audio map
+        cmd.extend(["-c:v", "libx264", "-c:a", "aac", "-shortest"])
+        # Use audio from input 1
+        cmd.extend(["-map", "0:v", "-map", "1:a"])
+        
+        cmd.append(output_path)
+        
+        if self._run_ffmpeg(cmd):
+            return output_path
+        else:
+            raise RuntimeError("FFmpeg composition failed")
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -0,0 +1,20 @@
+# ViGent Backend 依赖
+# MuseTalk 依赖请参考: models/MuseTalk/DEPLOY.md
+
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+python-multipart>=0.0.6
+pydantic>=2.5.3
+pydantic-settings>=2.1.0
+celery>=5.3.6
+redis>=5.0.1
+edge-tts>=6.1.9
+ffmpeg-python>=0.2.0
+httpx>=0.26.0
+aiofiles>=23.2.1
+sqlalchemy>=2.0.25
+aiosqlite>=0.19.0
+python-dotenv>=1.0.0
+loguru>=0.7.2
+playwright>=1.40.0
+requests>=2.31.0
--- a/frontend/.gitignore
+++ b/frontend/.gitignore
@@ -0,0 +1,41 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# env files (can opt-in for committing if needed)
+.env*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
--- a/frontend/README.md
+++ b/frontend/README.md
@@ -0,0 +1,36 @@
+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+
+## Getting Started
+
+First, run the development server:
+
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+
+## Learn More
+
+To learn more about Next.js, take a look at the following resources:
+
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+
+## Deploy on Vercel
+
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
--- a/frontend/eslint.config.mjs
+++ b/frontend/eslint.config.mjs
@@ -0,0 +1,18 @@
+import { defineConfig, globalIgnores } from "eslint/config";
+import nextVitals from "eslint-config-next/core-web-vitals";
+import nextTs from "eslint-config-next/typescript";
+
+const eslintConfig = defineConfig([
+  ...nextVitals,
+  ...nextTs,
+  // Override default ignores of eslint-config-next.
+  globalIgnores([
+    // Default ignores of eslint-config-next:
+    ".next/**",
+    "out/**",
+    "build/**",
+    "next-env.d.ts",
+  ]),
+]);
+
+export default eslintConfig;
--- a/frontend/next.config.ts
+++ b/frontend/next.config.ts
@@ -0,0 +1,15 @@
+import type { NextConfig } from "next";
+
+const nextConfig: NextConfig = {
+  // 允许跨域请求后端 API
+  async rewrites() {
+    return [
+      {
+        source: '/api/:path*',
+        destination: 'http://127.0.0.1:8000/api/:path*',
+      },
+    ];
+  },
+};
+
+export default nextConfig;
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "frontend",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "eslint"
+  },
+  "dependencies": {
+    "next": "16.1.1",
+    "react": "19.2.3",
+    "react-dom": "19.2.3"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "16.1.1",
+    "tailwindcss": "^4",
+    "typescript": "^5"
+  }
+}
--- a/frontend/postcss.config.mjs
+++ b/frontend/postcss.config.mjs
@@ -0,0 +1,7 @@
+const config = {
+  plugins: {
+    "@tailwindcss/postcss": {},
+  },
+};
+
+export default config;
--- a/frontend/public/file.svg
+++ b/frontend/public/file.svg
@@ -0,0 +1 @@
+<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>
--- a/frontend/public/globe.svg
+++ b/frontend/public/globe.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
--- a/frontend/public/next.svg
+++ b/frontend/public/next.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
--- a/frontend/public/vercel.svg
+++ b/frontend/public/vercel.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>
--- a/frontend/public/window.svg
+++ b/frontend/public/window.svg
@@ -0,0 +1 @@
+<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>
--- a/frontend/src/app/favicon.ico
+++ b/frontend/src/app/favicon.ico
--- a/frontend/src/app/globals.css
+++ b/frontend/src/app/globals.css
@@ -0,0 +1,26 @@
+@import "tailwindcss";
+
+:root {
+  --background: #ffffff;
+  --foreground: #171717;
+}
+
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+}
+
+@media (prefers-color-scheme: dark) {
+  :root {
+    --background: #0a0a0a;
+    --foreground: #ededed;
+  }
+}
+
+body {
+  background: var(--background);
+  color: var(--foreground);
+  font-family: Arial, Helvetica, sans-serif;
+}
--- a/frontend/src/app/layout.tsx
+++ b/frontend/src/app/layout.tsx
@@ -0,0 +1,34 @@
+import type { Metadata } from "next";
+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+
+export const metadata: Metadata = {
+  title: "Create Next App",
+  description: "Generated by create next app",
+};
+
+export default function RootLayout({
+  children,
+}: Readonly<{
+  children: React.ReactNode;
+}>) {
+  return (
+    <html lang="en">
+      <body
+        className={`${geistSans.variable} ${geistMono.variable} antialiased`}
+      >
+        {children}
+      </body>
+    </html>
+  );
+}
--- a/frontend/src/app/page.tsx
+++ b/frontend/src/app/page.tsx
@@ -0,0 +1,348 @@
+
+"use client";
+
+import { useState, useEffect } from "react";
+
+const API_BASE = "http://127.0.0.1:8000";
+
+// 类型定义
+interface Material {
+  id: string;
+  name: string;
+  scene: string;
+  size_mb: number;
+  path: string;
+}
+
+interface Task {
+  task_id: string;
+  status: string;
+  progress: number;
+  message: string;
+  download_url?: string;
+}
+
+export default function Home() {
+  const [materials, setMaterials] = useState<Material[]>([]);
+  const [selectedMaterial, setSelectedMaterial] = useState<string>("");
+  const [text, setText] = useState<string>(
+    "大家好，欢迎来到我的频道，今天给大家分享一些有趣的内容。"
+  );
+  const [voice, setVoice] = useState<string>("zh-CN-YunxiNeural");
+  const [isGenerating, setIsGenerating] = useState(false);
+  const [currentTask, setCurrentTask] = useState<Task | null>(null);
+  const [generatedVideo, setGeneratedVideo] = useState<string | null>(null);
+  const [fetchError, setFetchError] = useState<string | null>(null);
+  const [debugData, setDebugData] = useState<string>("");
+
+  // 可选音色
+  const voices = [
+    { id: "zh-CN-YunxiNeural", name: "云溪 (男声-年轻)" },
+    { id: "zh-CN-YunjianNeural", name: "云健 (男声-新闻)" },
+    { id: "zh-CN-YunyangNeural", name: "云扬 (男声-专业)" },
+    { id: "zh-CN-XiaoxiaoNeural", name: "晓晓 (女声-活泼)" },
+    { id: "zh-CN-XiaoyiNeural", name: "晓伊 (女声-温柔)" },
+  ];
+
+  // 加载素材列表
+  useEffect(() => {
+    fetchMaterials();
+  }, []);
+
+  const fetchMaterials = async () => {
+    try {
+      setFetchError(null);
+      setDebugData("Loading...");
+
+      // Add timestamp to prevent caching
+      const url = `${API_BASE}/api/materials/?t=${new Date().getTime()}`;
+      const res = await fetch(url);
+
+      if (!res.ok) {
+        throw new Error(`HTTP ${res.status} ${res.statusText}`);
+      }
+
+      const text = await res.text(); // Get raw text first
+      setDebugData(text.substring(0, 200) + (text.length > 200 ? "..." : "")); // Show preview
+
+      const data = JSON.parse(text);
+      setMaterials(data.materials || []);
+
+      if (data.materials?.length > 0) {
+        if (!selectedMaterial) {
+          setSelectedMaterial(data.materials[0].id);
+        }
+      }
+    } catch (error) {
+      console.error("获取素材失败:", error);
+      setFetchError(String(error));
+      setDebugData(`Error: ${String(error)}`);
+    }
+  };
+
+  // 生成视频
+  const handleGenerate = async () => {
+    if (!selectedMaterial || !text.trim()) {
+      alert("请选择素材并输入文案");
+      return;
+    }
+
+    setIsGenerating(true);
+    setGeneratedVideo(null);
+
+    try {
+      // 查找选中的素材对象以获取路径
+      const materialObj = materials.find(m => m.id === selectedMaterial);
+      if (!materialObj) {
+        alert("素材数据异常");
+        return;
+      }
+
+      // 创建生成任务
+      const res = await fetch(`${API_BASE}/api/videos/generate`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          material_path: materialObj.path,
+          text: text,
+          voice: voice,
+          add_subtitle: true,
+        }),
+      });
+
+      const data = await res.json();
+      const taskId = data.task_id;
+
+      // 轮询任务状态
+      const pollTask = async () => {
+        const taskRes = await fetch(`${API_BASE}/api/videos/tasks/${taskId}`);
+        const taskData: Task = await taskRes.json();
+        setCurrentTask(taskData);
+
+        if (taskData.status === "completed") {
+          setGeneratedVideo(`${API_BASE}${taskData.download_url}`);
+          setIsGenerating(false);
+        } else if (taskData.status === "failed") {
+          alert("视频生成失败: " + taskData.message);
+          setIsGenerating(false);
+        } else {
+          setTimeout(pollTask, 1000);
+        }
+      };
+
+      pollTask();
+    } catch (error) {
+      console.error("生成失败:", error);
+      setIsGenerating(false);
+    }
+  };
+
+  return (
+    <div className="min-h-screen bg-gradient-to-br from-slate-900 via-purple-900 to-slate-900">
+      {/* Header */}
+      <header className="border-b border-white/10 bg-black/20 backdrop-blur-sm">
+        <div className="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
+          <h1 className="text-2xl font-bold text-white flex items-center gap-3">
+            <span className="text-3xl">🎬</span>
+            ViGent
+          </h1>
+        </div>
+      </header>
+
+      <main className="max-w-6xl mx-auto px-6 py-8">
+        <div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
+          {/* 左侧: 输入区域 */}
+          <div className="space-y-6">
+            {/* 素材选择 */}
+            <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+              <div className="flex justify-between items-center mb-4">
+                <h2 className="text-lg font-semibold text-white flex items-center gap-2">
+                  📹 选择素材视频
+                </h2>
+                <button
+                  onClick={fetchMaterials}
+                  className="px-3 py-1 text-xs bg-white/10 hover:bg-white/20 rounded text-gray-300"
+                >
+                  🔄 刷新
+                </button>
+              </div>
+
+              {fetchError ? (
+                <div className="p-4 bg-red-500/20 text-red-200 rounded-xl text-sm mb-4">
+                  获取素材失败: {fetchError}
+                  <br />
+                  API: {API_BASE}/api/materials/
+                </div>
+              ) : materials.length === 0 ? (
+                <div className="text-center py-8 text-gray-400">
+                  <p>暂无素材视频</p>
+                  <p className="text-sm mt-2">
+                    请将视频放入 backend/uploads/materials/ 目录
+                  </p>
+                  <div className="mt-4 p-4 bg-black/40 rounded text-left text-xs font-mono text-gray-500 overflow-auto whitespace-pre-wrap break-all">
+                    <p className="font-bold text-purple-400">Debug Info:</p>
+                    <p>Time: {new Date().toLocaleTimeString()}</p>
+                    <p>Items: {materials.length}</p>
+                    <p className="mt-2 text-gray-400 border-t border-gray-700 pt-2">Raw Response:</p>
+                    <p>{debugData}</p>
+                  </div>
+                </div>
+              ) : (
+                <div className="grid grid-cols-2 gap-3">
+                  {materials.map((m) => (
+                    <button
+                      key={m.id}
+                      onClick={() => setSelectedMaterial(m.id)}
+                      className={`p-4 rounded-xl border-2 transition-all text-left ${selectedMaterial === m.id
+                        ? "border-purple-500 bg-purple-500/20"
+                        : "border-white/10 bg-white/5 hover:border-white/30"
+                        }`}
+                    >
+                      <div className="text-white font-medium truncate">
+                        {m.scene || m.name}
+                      </div>
+                      <div className="text-gray-400 text-sm mt-1">
+                        {m.size_mb.toFixed(1)} MB
+                      </div>
+                    </button>
+                  ))}
+                </div>
+              )}
+            </div>
+
+            {/* 文案输入 */}
+            <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+              <h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
+                ✍️ 输入口播文案
+              </h2>
+              <textarea
+                value={text}
+                onChange={(e) => setText(e.target.value)}
+                placeholder="请输入你想说的话..."
+                className="w-full h-40 bg-black/30 border border-white/10 rounded-xl p-4 text-white placeholder-gray-500 resize-none focus:outline-none focus:border-purple-500 transition-colors"
+              />
+              <div className="flex justify-between mt-2 text-sm text-gray-400">
+                <span>{text.length} 字</span>
+                <span>预计时长: ~{Math.ceil(text.length / 4)} 秒</span>
+              </div>
+            </div>
+
+            {/* 音色选择 */}
+            <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+              <h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
+                🎙️ 选择配音音色
+              </h2>
+              <div className="grid grid-cols-2 gap-3">
+                {voices.map((v) => (
+                  <button
+                    key={v.id}
+                    onClick={() => setVoice(v.id)}
+                    className={`p-3 rounded-xl border-2 transition-all text-left ${voice === v.id
+                      ? "border-purple-500 bg-purple-500/20"
+                      : "border-white/10 bg-white/5 hover:border-white/30"
+                      }`}
+                  >
+                    <span className="text-white text-sm">{v.name}</span>
+                  </button>
+                ))}
+              </div>
+            </div>
+
+            {/* 生成按钮 */}
+            <button
+              onClick={handleGenerate}
+              disabled={isGenerating || !selectedMaterial}
+              className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${isGenerating || !selectedMaterial
+                ? "bg-gray-600 cursor-not-allowed text-gray-400"
+                : "bg-gradient-to-r from-purple-600 to-pink-600 hover:from-purple-700 hover:to-pink-700 text-white shadow-lg hover:shadow-purple-500/25"
+                }`}
+            >
+              {isGenerating ? (
+                <span className="flex items-center justify-center gap-3">
+                  <svg className="animate-spin h-5 w-5" viewBox="0 0 24 24">
+                    <circle
+                      className="opacity-25"
+                      cx="12"
+                      cy="12"
+                      r="10"
+                      stroke="currentColor"
+                      strokeWidth="4"
+                      fill="none"
+                    />
+                    <path
+                      className="opacity-75"
+                      fill="currentColor"
+                      d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
+                    />
+                  </svg>
+                  生成中... {currentTask?.progress || 0}%
+                </span>
+              ) : (
+                "🚀 生成视频"
+              )}
+            </button>
+          </div>
+
+          {/* 右侧: 预览区域 */}
+          <div className="space-y-6">
+            {/* 进度显示 */}
+            {currentTask && isGenerating && (
+              <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+                <h2 className="text-lg font-semibold text-white mb-4">
+                  ⏳ 生成进度
+                </h2>
+                <div className="space-y-3">
+                  <div className="h-3 bg-black/30 rounded-full overflow-hidden">
+                    <div
+                      className="h-full bg-gradient-to-r from-purple-500 to-pink-500 transition-all duration-300"
+                      style={{ width: `${currentTask.progress}%` }}
+                    />
+                  </div>
+                  <p className="text-gray-300">{currentTask.message}</p>
+                </div>
+              </div>
+            )}
+
+            {/* 视频预览 */}
+            <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+              <h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
+                🎥 视频预览
+              </h2>
+              <div className="aspect-video bg-black/50 rounded-xl overflow-hidden flex items-center justify-center">
+                {generatedVideo ? (
+                  <video
+                    src={generatedVideo}
+                    controls
+                    className="w-full h-full object-contain"
+                  />
+                ) : (
+                  <div className="text-gray-500 text-center">
+                    <div className="text-5xl mb-4">📹</div>
+                    <p>生成的视频将在这里预览</p>
+                  </div>
+                )}
+              </div>
+
+              {generatedVideo && (
+                <a
+                  href={generatedVideo}
+                  download
+                  className="mt-4 w-full py-3 rounded-xl bg-green-600 hover:bg-green-700 text-white font-medium flex items-center justify-center gap-2 transition-colors"
+                >
+                  ⬇️ 下载视频
+                </a>
+              )}
+            </div>
+          </div>
+        </div>
+      </main>
+
+      {/* Footer */}
+      <footer className="border-t border-white/10 mt-12">
+        <div className="max-w-6xl mx-auto px-6 py-4 text-center text-gray-500 text-sm">
+          ViGent - 基于 MuseTalk + EdgeTTS
+        </div>
+      </footer>
+    </div>
+  );
+}
--- a/frontend/src/app/publish/page.tsx
+++ b/frontend/src/app/publish/page.tsx
@@ -0,0 +1,335 @@
+"use client";
+
+import { useState, useEffect } from "react";
+import Link from "next/link";
+
+const API_BASE = "http://127.0.0.1:8000";
+
+interface Account {
+    platform: string;
+    name: string;
+    logged_in: boolean;
+    enabled: boolean;
+}
+
+interface Video {
+    name: string;
+    path: string;
+}
+
+export default function PublishPage() {
+    const [accounts, setAccounts] = useState<Account[]>([]);
+    const [videos, setVideos] = useState<Video[]>([]);
+    const [selectedVideo, setSelectedVideo] = useState<string>("");
+    const [selectedPlatforms, setSelectedPlatforms] = useState<string[]>([]);
+    const [title, setTitle] = useState<string>("");
+    const [tags, setTags] = useState<string>("");
+    const [isPublishing, setIsPublishing] = useState(false);
+    const [publishResults, setPublishResults] = useState<any[]>([]);
+
+    // 加载账号和视频列表
+    useEffect(() => {
+        fetchAccounts();
+        fetchVideos();
+    }, []);
+
+    const fetchAccounts = async () => {
+        try {
+            const res = await fetch(`${API_BASE}/api/publish/accounts`);
+            const data = await res.json();
+            setAccounts(data.accounts || []);
+        } catch (error) {
+            console.error("获取账号失败:", error);
+        }
+    };
+
+    const fetchVideos = async () => {
+        try {
+            // 获取已生成的视频列表 (从 outputs 目录)
+            const res = await fetch(`${API_BASE}/api/videos/tasks`);
+            const data = await res.json();
+
+            const completedVideos = data.tasks
+                ?.filter((t: any) => t.status === "completed")
+                .map((t: any) => ({
+                    name: `${t.task_id}_output.mp4`,
+                    path: `outputs/${t.task_id}_output.mp4`,
+                })) || [];
+
+            setVideos(completedVideos);
+            if (completedVideos.length > 0) {
+                setSelectedVideo(completedVideos[0].path);
+            }
+        } catch (error) {
+            console.error("获取视频失败:", error);
+        }
+    };
+
+    const togglePlatform = (platform: string) => {
+        if (selectedPlatforms.includes(platform)) {
+            setSelectedPlatforms(selectedPlatforms.filter((p) => p !== platform));
+        } else {
+            setSelectedPlatforms([...selectedPlatforms, platform]);
+        }
+    };
+
+    const handlePublish = async () => {
+        if (!selectedVideo || !title || selectedPlatforms.length === 0) {
+            alert("请选择视频、填写标题并选择至少一个平台");
+            return;
+        }
+
+        setIsPublishing(true);
+        setPublishResults([]);
+
+        const tagList = tags.split(/[,，\s]+/).filter((t) => t.trim());
+
+        for (const platform of selectedPlatforms) {
+            try {
+                const res = await fetch(`${API_BASE}/api/publish/`, {
+                    method: "POST",
+                    headers: { "Content-Type": "application/json" },
+                    body: JSON.stringify({
+                        video_path: selectedVideo,
+                        platform,
+                        title,
+                        tags: tagList,
+                        description: "",
+                    }),
+                });
+
+                const result = await res.json();
+                setPublishResults((prev) => [...prev, result]);
+            } catch (error) {
+                setPublishResults((prev) => [
+                    ...prev,
+                    { platform, success: false, message: String(error) },
+                ]);
+            }
+        }
+
+        setIsPublishing(false);
+    };
+
+    const handleLogin = async (platform: string) => {
+        alert(
+            `登录功能需要在服务端执行。\n\n请在终端运行:\ncurl -X POST http://localhost:8000/api/publish/login/${platform}`
+        );
+    };
+
+    const platformIcons: Record<string, string> = {
+        douyin: "🎵",
+        xiaohongshu: "📕",
+        weixin: "💬",
+        kuaishou: "⚡",
+        bilibili: "📺",
+    };
+
+    return (
+        <div className="min-h-screen bg-gradient-to-br from-slate-900 via-purple-900 to-slate-900">
+            {/* Header */}
+            <header className="border-b border-white/10 bg-black/20 backdrop-blur-sm">
+                <div className="max-w-6xl mx-auto px-6 py-4 flex items-center justify-between">
+                    <Link href="/" className="text-2xl font-bold text-white flex items-center gap-3 hover:opacity-80">
+                        <span className="text-3xl">🎬</span>
+                        TalkingHead Agent
+                    </Link>
+                    <nav className="flex gap-4">
+                        <Link
+                            href="/"
+                            className="px-4 py-2 text-gray-400 hover:text-white transition-colors"
+                        >
+                            视频生成
+                        </Link>
+                        <Link
+                            href="/publish"
+                            className="px-4 py-2 text-white bg-purple-600 rounded-lg"
+                        >
+                            发布管理
+                        </Link>
+                    </nav>
+                </div>
+            </header>
+
+            <main className="max-w-6xl mx-auto px-6 py-8">
+                <h1 className="text-3xl font-bold text-white mb-8">📤 社交媒体发布</h1>
+
+                <div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
+                    {/* 左侧: 账号管理 */}
+                    <div className="space-y-6">
+                        <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+                            <h2 className="text-lg font-semibold text-white mb-4 flex items-center gap-2">
+                                👤 平台账号
+                            </h2>
+
+                            <div className="space-y-3">
+                                {accounts.map((account) => (
+                                    <div
+                                        key={account.platform}
+                                        className="flex items-center justify-between p-4 bg-black/30 rounded-xl"
+                                    >
+                                        <div className="flex items-center gap-3">
+                                            <span className="text-2xl">
+                                                {platformIcons[account.platform]}
+                                            </span>
+                                            <div>
+                                                <div className="text-white font-medium">
+                                                    {account.name}
+                                                </div>
+                                                <div
+                                                    className={`text-sm ${account.logged_in
+                                                            ? "text-green-400"
+                                                            : "text-gray-500"
+                                                        }`}
+                                                >
+                                                    {account.logged_in ? "✓ 已登录" : "未登录"}
+                                                </div>
+                                            </div>
+                                        </div>
+                                        <button
+                                            onClick={() => handleLogin(account.platform)}
+                                            className={`px-4 py-2 rounded-lg text-sm font-medium transition-colors ${account.logged_in
+                                                    ? "bg-gray-600 text-gray-300"
+                                                    : "bg-purple-600 hover:bg-purple-700 text-white"
+                                                }`}
+                                        >
+                                            {account.logged_in ? "重新登录" : "登录"}
+                                        </button>
+                                    </div>
+                                ))}
+                            </div>
+                        </div>
+                    </div>
+
+                    {/* 右侧: 发布表单 */}
+                    <div className="space-y-6">
+                        {/* 选择视频 */}
+                        <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+                            <h2 className="text-lg font-semibold text-white mb-4">
+                                🎥 选择要发布的视频
+                            </h2>
+
+                            {videos.length === 0 ? (
+                                <p className="text-gray-400">
+                                    暂无已生成的视频，请先
+                                    <Link href="/" className="text-purple-400 hover:underline">
+                                        生成视频
+                                    </Link>
+                                </p>
+                            ) : (
+                                <select
+                                    value={selectedVideo}
+                                    onChange={(e) => setSelectedVideo(e.target.value)}
+                                    className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white"
+                                >
+                                    {videos.map((v) => (
+                                        <option key={v.path} value={v.path}>
+                                            {v.name}
+                                        </option>
+                                    ))}
+                                </select>
+                            )}
+                        </div>
+
+                        {/* 填写信息 */}
+                        <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+                            <h2 className="text-lg font-semibold text-white mb-4">✍️ 发布信息</h2>
+
+                            <div className="space-y-4">
+                                <div>
+                                    <label className="block text-gray-400 text-sm mb-2">
+                                        标题
+                                    </label>
+                                    <input
+                                        type="text"
+                                        value={title}
+                                        onChange={(e) => setTitle(e.target.value)}
+                                        placeholder="输入视频标题..."
+                                        className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white placeholder-gray-500"
+                                    />
+                                </div>
+                                <div>
+                                    <label className="block text-gray-400 text-sm mb-2">
+                                        标签 (用逗号分隔)
+                                    </label>
+                                    <input
+                                        type="text"
+                                        value={tags}
+                                        onChange={(e) => setTags(e.target.value)}
+                                        placeholder="AI, 数字人, 口播..."
+                                        className="w-full p-3 bg-black/30 border border-white/10 rounded-xl text-white placeholder-gray-500"
+                                    />
+                                </div>
+                            </div>
+                        </div>
+
+                        {/* 选择平台 */}
+                        <div className="bg-white/5 rounded-2xl p-6 border border-white/10 backdrop-blur-sm">
+                            <h2 className="text-lg font-semibold text-white mb-4">📱 选择发布平台</h2>
+
+                            <div className="grid grid-cols-3 gap-3">
+                                {accounts
+                                    .filter((a) => a.logged_in)
+                                    .map((account) => (
+                                        <button
+                                            key={account.platform}
+                                            onClick={() => togglePlatform(account.platform)}
+                                            className={`p-3 rounded-xl border-2 transition-all ${selectedPlatforms.includes(account.platform)
+                                                    ? "border-purple-500 bg-purple-500/20"
+                                                    : "border-white/10 bg-white/5 hover:border-white/30"
+                                                }`}
+                                        >
+                                            <span className="text-2xl block mb-1">
+                                                {platformIcons[account.platform]}
+                                            </span>
+                                            <span className="text-white text-sm">{account.name}</span>
+                                        </button>
+                                    ))}
+                            </div>
+
+                            {accounts.filter((a) => a.logged_in).length === 0 && (
+                                <p className="text-gray-400 text-center py-4">
+                                    请先登录至少一个平台账号
+                                </p>
+                            )}
+                        </div>
+
+                        {/* 发布按钮 */}
+                        <button
+                            onClick={handlePublish}
+                            disabled={isPublishing || selectedPlatforms.length === 0}
+                            className={`w-full py-4 rounded-xl font-bold text-lg transition-all ${isPublishing || selectedPlatforms.length === 0
+                                    ? "bg-gray-600 cursor-not-allowed text-gray-400"
+                                    : "bg-gradient-to-r from-green-600 to-teal-600 hover:from-green-700 hover:to-teal-700 text-white"
+                                }`}
+                        >
+                            {isPublishing ? "发布中..." : "🚀 一键发布"}
+                        </button>
+
+                        {/* 发布结果 */}
+                        {publishResults.length > 0 && (
+                            <div className="bg-white/5 rounded-2xl p-6 border border-white/10">
+                                <h2 className="text-lg font-semibold text-white mb-4">
+                                    发布结果
+                                </h2>
+                                <div className="space-y-2">
+                                    {publishResults.map((result, i) => (
+                                        <div
+                                            key={i}
+                                            className={`p-3 rounded-lg ${result.success ? "bg-green-500/20" : "bg-red-500/20"
+                                                }`}
+                                        >
+                                            <span className="text-white">
+                                                {platformIcons[result.platform]} {result.message}
+                                            </span>
+                                        </div>
+                                    ))}
+                                </div>
+                            </div>
+                        )}
+                    </div>
+                </div>
+            </main>
+        </div>
+    );
+}
--- a/frontend/tsconfig.json
+++ b/frontend/tsconfig.json
@@ -0,0 +1,34 @@
+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts",
+    "**/*.mts"
+  ],
+  "exclude": ["node_modules"]
+}
--- a/models/MuseTalk/DEPLOY.md
+++ b/models/MuseTalk/DEPLOY.md
@@ -0,0 +1,186 @@
+# MuseTalk 部署指南
+
+## 硬件要求
+
+| 配置 | 最低要求 | 推荐配置 |
+|------|----------|----------|
+| GPU | 8GB VRAM (如 RTX 3060) | 24GB VRAM (如 RTX 3090) |
+| 内存 | 32GB | 64GB |
+| CUDA | 11.7+ | 12.0+ |
+
+---
+
+## 📦 安装步骤
+
+### 1. 克隆 MuseTalk 仓库
+
+```bash
+# 进入 ViGent 项目的 models 目录
+cd /home/rongye/ProgramFiles/ViGent/models
+
+# 克隆 MuseTalk 仓库
+git clone https://github.com/TMElyralab/MuseTalk.git MuseTalk_repo
+
+# 保留我们的自定义文件
+cp MuseTalk/DEPLOY.md MuseTalk_repo/
+cp MuseTalk/musetalk_api.py MuseTalk_repo/
+
+# 替换目录
+rm -rf MuseTalk
+mv MuseTalk_repo MuseTalk
+```
+
+### 2. 创建虚拟环境
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
+conda create -n musetalk python=3.10 -y
+conda activate musetalk
+```
+
+### 3. 安装 PyTorch (CUDA 12.1)
+
+```bash
+# CUDA 12.1 (适配服务器 CUDA 12.8)
+pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+```
+
+### 4. 安装 MuseTalk 依赖
+
+```bash
+pip install -r requirements.txt
+
+# 安装 mmlab 系列 (MuseTalk 必需)
+pip install --no-cache-dir -U openmim
+mim install mmengine
+mim install "mmcv>=2.0.1"
+mim install "mmdet>=3.1.0"
+mim install "mmpose>=1.1.0"
+```
+
+### 5. 下载模型权重 ⬇️
+
+> **权重文件较大（约 5GB），请确保网络稳定**
+
+#### 方式一：从 Hugging Face 下载 (推荐)
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
+
+# 安装 huggingface-cli
+pip install huggingface_hub
+
+# 下载 MuseTalk 权重 (v1.5)
+huggingface-cli download TMElyralab/MuseTalk \
+    --local-dir ./models/musetalk \
+    --include "*.pth" "*.json"
+
+# 下载 MuseTalk V15 权重
+huggingface-cli download TMElyralab/MuseTalk \
+    --local-dir ./models/musetalkV15 \
+    --include "unet.pth"
+
+# 下载 SD-VAE 模型 (Stable Diffusion VAE)
+huggingface-cli download stabilityai/sd-vae-ft-mse \
+    --local-dir ./models/sd-vae-ft-mse
+
+# 下载 Whisper 模型 (音频特征提取)
+# MuseTalk 使用 whisper-tiny
+huggingface-cli download openai/whisper-tiny \
+    --local-dir ./models/whisper
+```
+
+#### 方式二：手动下载
+
+从以下链接下载并放到对应目录：
+
+| 模型 | 下载链接 | 存放路径 |
+|------|----------|----------|
+| MuseTalk | [Hugging Face](https://huggingface.co/TMElyralab/MuseTalk) | `models/MuseTalk/models/musetalk/` |
+| MuseTalk V15 | 同上 | `models/MuseTalk/models/musetalkV15/` |
+| SD-VAE | [Hugging Face](https://huggingface.co/stabilityai/sd-vae-ft-mse) | `models/MuseTalk/models/sd-vae-ft-mse/` |
+| Whisper | [Hugging Face](https://huggingface.co/openai/whisper-tiny) | `models/MuseTalk/models/whisper/` |
+| DWPose | 按官方 README | `models/MuseTalk/models/dwpose/` |
+| Face Parse | 按官方 README | `models/MuseTalk/models/face-parse-bisent/` |
+
+### 6. 验证安装
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/models/MuseTalk
+conda activate musetalk
+
+# 测试推理 (使用 GPU1)
+CUDA_VISIBLE_DEVICES=1 python -m scripts.inference \
+    --version v15 \
+    --inference_config configs/inference/test.yaml \
+    --result_dir ./results \
+    --use_float16
+```
+
+---
+
+## 📂 目录结构
+
+安装完成后目录结构：
+
+```
+models/MuseTalk/
+├── configs/
+│   └── inference/
+├── models/                     # ⬅️ 权重文件目录
+│   ├── musetalk/               # MuseTalk 基础权重
+│   │   ├── config.json
+│   │   └── pytorch_model.bin
+│   ├── musetalkV15/            # V1.5 版本 UNet
+│   │   └── unet.pth
+│   ├── sd-vae-ft-mse/          # Stable Diffusion VAE
+│   │   └── diffusion_pytorch_model.bin
+│   ├── whisper/                # Whisper 模型
+│   ├── dwpose/                 # 姿态检测
+│   └── face-parse-bisent/      # 人脸解析
+├── musetalk/                   # MuseTalk 源码
+├── scripts/
+│   └── inference.py
+├── DEPLOY.md                   # 本文档
+└── musetalk_api.py             # API 服务
+```
+
+---
+
+## 🔧 ViGent 集成配置
+
+### 环境变量配置
+
+在 `/home/rongye/ProgramFiles/ViGent/backend/.env` 中设置：
+
+```bash
+# MuseTalk 配置
+MUSETALK_LOCAL=true
+MUSETALK_GPU_ID=1
+MUSETALK_VERSION=v15
+MUSETALK_USE_FLOAT16=true
+MUSETALK_BATCH_SIZE=8
+```
+
+### 启动后端服务
+
+```bash
+cd /home/rongye/ProgramFiles/ViGent/backend
+source venv/bin/activate
+
+# 设置 GPU 并启动
+CUDA_VISIBLE_DEVICES=1 uvicorn app.main:app --host 0.0.0.0 --port 8000
+```
+
+---
+
+## 🚨 常见问题
+
+### Q1: CUDA out of memory
+**解决**：减小 `MUSETALK_BATCH_SIZE` 或启用 `MUSETALK_USE_FLOAT16=true`
+
+### Q2: mmcv 安装失败
+**解决**：确保 CUDA 版本匹配，使用 `mim install mmcv==2.0.1`
+
+### Q3: Whisper 加载失败
+**解决**：检查 `models/whisper/` 目录是否包含完整模型文件
--- a/models/MuseTalk/musetalk_api.py
+++ b/models/MuseTalk/musetalk_api.py
@@ -0,0 +1,157 @@
+"""
+MuseTalk API 服务
+
+这个脚本将 MuseTalk 封装为 FastAPI 服务，
+可以独立部署在 GPU 服务器上。
+
+用法:
+    python musetalk_api.py --port 8001
+"""
+
+import os
+import sys
+import argparse
+import tempfile
+import shutil
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+
+# 添加 MuseTalk 路径
+MUSETALK_DIR = Path(__file__).parent
+sys.path.insert(0, str(MUSETALK_DIR))
+
+app = FastAPI(
+    title="MuseTalk API",
+    description="唇形同步推理服务",
+    version="0.1.0"
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# 全局模型实例 (懒加载)
+_model = None
+
+
+def get_model():
+    """懒加载 MuseTalk 模型"""
+    global _model
+    if _model is None:
+        print("🔄 加载 MuseTalk 模型...")
+        # TODO: 根据 MuseTalk 实际 API 调整
+        # from musetalk.inference import MuseTalkInference
+        # _model = MuseTalkInference()
+        print("✅ MuseTalk 模型加载完成")
+    return _model
+
+
+@app.get("/")
+async def root():
+    return {"name": "MuseTalk API", "status": "ok"}
+
+
+@app.get("/health")
+async def health():
+    """健康检查"""
+    return {"status": "healthy", "gpu": True}
+
+
+@app.post("/lipsync")
+async def lipsync(
+    video: UploadFile = File(..., description="输入视频文件"),
+    audio: UploadFile = File(..., description="音频文件"),
+    fps: int = Form(25, description="输出帧率")
+):
+    """
+    唇形同步推理
+    
+    Args:
+        video: 输入视频 (静态人物)
+        audio: 驱动音频
+        fps: 输出帧率
+    
+    Returns:
+        生成的视频文件
+    """
+    # 创建临时目录
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmpdir = Path(tmpdir)
+        
+        # 保存上传的文件
+        video_path = tmpdir / "input_video.mp4"
+        audio_path = tmpdir / "input_audio.wav"
+        output_path = tmpdir / "output.mp4"
+        
+        with open(video_path, "wb") as f:
+            shutil.copyfileobj(video.file, f)
+        with open(audio_path, "wb") as f:
+            shutil.copyfileobj(audio.file, f)
+        
+        try:
+            # 执行唇形同步
+            model = get_model()
+            
+            # TODO: 调用实际的 MuseTalk 推理
+            # result = model.inference(
+            #     source_video=str(video_path),
+            #     driving_audio=str(audio_path),
+            #     output_path=str(output_path),
+            #     fps=fps
+            # )
+            
+            # 临时: 使用 subprocess 调用 MuseTalk CLI
+            import subprocess
+            cmd = [
+                sys.executable, "-m", "scripts.inference",
+                "--video_path", str(video_path),
+                "--audio_path", str(audio_path),
+                "--output_path", str(output_path),
+            ]
+            
+            result = subprocess.run(
+                cmd,
+                cwd=str(MUSETALK_DIR),
+                capture_output=True,
+                text=True
+            )
+            
+            if result.returncode != 0:
+                raise RuntimeError(f"MuseTalk 推理失败: {result.stderr}")
+            
+            if not output_path.exists():
+                raise RuntimeError("输出文件不存在")
+            
+            # 返回生成的视频
+            # 需要先复制到持久化位置
+            final_output = Path("outputs") / f"lipsync_{video.filename}"
+            final_output.parent.mkdir(exist_ok=True)
+            shutil.copy(output_path, final_output)
+            
+            return FileResponse(
+                final_output,
+                media_type="video/mp4",
+                filename=f"lipsync_{video.filename}"
+            )
+            
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--port", type=int, default=8001)
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    args = parser.parse_args()
+    
+    print(f"🚀 MuseTalk API 启动在 http://{args.host}:{args.port}")
+    uvicorn.run(app, host=args.host, port=args.port)
				`@@ -0,0 +1 @@`
				`<svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>`
				`@@ -0,0 +1 @@`
				<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>
				`@@ -0,0 +1 @@`
				`<svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>`