This commit is contained in:
Kevin Wong
2026-02-03 17:12:30 +08:00
parent cb10da52fc
commit 6c6fbae13a
12 changed files with 1225 additions and 131 deletions

22
backend/app/api/assets.py Normal file
View File

@@ -0,0 +1,22 @@
from fastapi import APIRouter, Depends
from app.core.deps import get_current_user
from app.services.assets_service import list_styles, list_bgm
router = APIRouter()
@router.get("/subtitle-styles")
async def list_subtitle_styles(current_user: dict = Depends(get_current_user)):
return {"styles": list_styles("subtitle")}
@router.get("/title-styles")
async def list_title_styles(current_user: dict = Depends(get_current_user)):
return {"styles": list_styles("title")}
@router.get("/bgm")
async def list_bgm_items(current_user: dict = Depends(get_current_user)):
return {"bgm": list_bgm()}

View File

@@ -8,13 +8,19 @@ import traceback
import time
import httpx
import os
from app.services.tts_service import TTSService
from app.services.video_service import VideoService
from app.services.lipsync_service import LipSyncService
from app.services.voice_clone_service import voice_clone_service
from app.services.storage import storage_service
from app.services.whisper_service import whisper_service
from app.services.remotion_service import remotion_service
from app.services.tts_service import TTSService
from app.services.video_service import VideoService
from app.services.lipsync_service import LipSyncService
from app.services.voice_clone_service import voice_clone_service
from app.services.assets_service import (
get_style,
get_default_style,
resolve_bgm_path,
prepare_style_for_remotion,
)
from app.services.storage import storage_service
from app.services.whisper_service import whisper_service
from app.services.remotion_service import remotion_service
from app.core.config import settings
from app.core.deps import get_current_user
@@ -28,9 +34,15 @@ class GenerateRequest(BaseModel):
tts_mode: str = "edgetts" # "edgetts" | "voiceclone"
ref_audio_id: Optional[str] = None # 参考音频 storage path
ref_text: Optional[str] = None # 参考音频的转写文字
# 字幕和标题功能
title: Optional[str] = None # 视频标题(片头显示)
enable_subtitles: bool = True # 是否启用逐字高亮字幕
# 字幕和标题功能
title: Optional[str] = None # 视频标题(片头显示)
enable_subtitles: bool = True # 是否启用逐字高亮字幕
subtitle_style_id: Optional[str] = None # 字幕样式 ID
title_style_id: Optional[str] = None # 标题样式 ID
subtitle_font_size: Optional[int] = None # 字幕字号(覆盖样式)
title_font_size: Optional[int] = None # 标题字号(覆盖样式)
bgm_id: Optional[str] = None # 背景音乐 ID
bgm_volume: Optional[float] = 0.2 # 背景音乐音量 (0-1)
tasks = {} # In-memory task store
@@ -52,15 +64,15 @@ async def _check_lipsync_ready(force: bool = False) -> bool:
now = time.time()
# 5分钟缓存
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
return _lipsync_ready
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
return bool(_lipsync_ready)
lipsync = _get_lipsync_service()
health = await lipsync.check_health()
_lipsync_ready = health.get("ready", False)
_lipsync_last_check = now
print(f"[LipSync] Health check: ready={_lipsync_ready}")
return _lipsync_ready
return bool(_lipsync_ready)
async def _download_material(path_or_url: str, temp_path: Path):
"""下载素材到临时文件 (流式下载,节省内存)"""
@@ -194,25 +206,79 @@ async def _process_video_generation(task_id: str, req: GenerateRequest, user_id:
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
captions_path = None
tasks[task_id]["progress"] = 85
tasks[task_id]["progress"] = 85
# 3.5 背景音乐混音(不影响唇形与字幕对齐)
video = VideoService()
final_audio_path = audio_path
if req.bgm_id:
tasks[task_id]["message"] = "正在合成背景音乐..."
tasks[task_id]["progress"] = 86
bgm_path = resolve_bgm_path(req.bgm_id)
if bgm_path:
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
temp_files.append(mix_output_path)
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
volume = max(0.0, min(float(volume), 1.0))
try:
video.mix_audio(
voice_path=str(audio_path),
bgm_path=str(bgm_path),
output_path=str(mix_output_path),
bgm_volume=volume
)
final_audio_path = mix_output_path
except Exception as e:
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
else:
logger.warning(f"BGM not found: {req.bgm_id}")
# 4. Remotion 视频合成(字幕 + 标题)- 进度 85% -> 95%
# 判断是否需要使用 Remotion有字幕或标题时使用
use_remotion = (captions_path and captions_path.exists()) or req.title
# 4. Remotion 视频合成(字幕 + 标题)- 进度 85% -> 95%
# 判断是否需要使用 Remotion有字幕或标题时使用
use_remotion = (captions_path and captions_path.exists()) or req.title
subtitle_style = None
title_style = None
if req.enable_subtitles:
subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
if req.title:
title_style = get_style("title", req.title_style_id) or get_default_style("title")
if req.subtitle_font_size and req.enable_subtitles:
if subtitle_style is None:
subtitle_style = {}
subtitle_style["font_size"] = int(req.subtitle_font_size)
if req.title_font_size and req.title:
if title_style is None:
title_style = {}
title_style["font_size"] = int(req.title_font_size)
if use_remotion:
subtitle_style = prepare_style_for_remotion(
subtitle_style,
temp_dir,
f"{task_id}_subtitle_font"
)
title_style = prepare_style_for_remotion(
title_style,
temp_dir,
f"{task_id}_title_font"
)
final_output_local_path = temp_dir / f"{task_id}_output.mp4"
temp_files.append(final_output_local_path)
if use_remotion:
tasks[task_id]["message"] = "正在合成视频 (Remotion)..."
tasks[task_id]["progress"] = 87
if use_remotion:
tasks[task_id]["message"] = "正在合成视频 (Remotion)..."
tasks[task_id]["progress"] = 87
# 先用 FFmpeg 合成音视频Remotion 需要带音频的视频)
composed_video_path = temp_dir / f"{task_id}_composed.mp4"
temp_files.append(composed_video_path)
video = VideoService()
await video.compose(str(lipsync_video_path), str(audio_path), str(composed_video_path))
await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
# 检查 Remotion 是否可用
remotion_health = await remotion_service.check_health()
@@ -223,16 +289,18 @@ async def _process_video_generation(task_id: str, req: GenerateRequest, user_id:
mapped = 87 + int(percent * 0.08)
tasks[task_id]["progress"] = mapped
await remotion_service.render(
video_path=str(composed_video_path),
output_path=str(final_output_local_path),
captions_path=str(captions_path) if captions_path else None,
title=req.title,
title_duration=3.0,
fps=25,
enable_subtitles=req.enable_subtitles,
on_progress=on_remotion_progress
)
await remotion_service.render(
video_path=str(composed_video_path),
output_path=str(final_output_local_path),
captions_path=str(captions_path) if captions_path else None,
title=req.title,
title_duration=3.0,
fps=25,
enable_subtitles=req.enable_subtitles,
subtitle_style=subtitle_style,
title_style=title_style,
on_progress=on_remotion_progress
)
print(f"[Pipeline] Remotion render completed")
except Exception as e:
logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
@@ -248,8 +316,7 @@ async def _process_video_generation(task_id: str, req: GenerateRequest, user_id:
tasks[task_id]["message"] = "正在合成最终视频..."
tasks[task_id]["progress"] = 90
video = VideoService()
await video.compose(str(lipsync_video_path), str(audio_path), str(final_output_local_path))
await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
total_time = time.time() - start_time

View File

@@ -3,9 +3,10 @@ from pathlib import Path
class Settings(BaseSettings):
# 基础路径配置
BASE_DIR: Path = Path(__file__).resolve().parent.parent
UPLOAD_DIR: Path = BASE_DIR.parent / "uploads"
OUTPUT_DIR: Path = BASE_DIR.parent / "outputs"
BASE_DIR: Path = Path(__file__).resolve().parent.parent
UPLOAD_DIR: Path = BASE_DIR.parent / "uploads"
OUTPUT_DIR: Path = BASE_DIR.parent / "outputs"
ASSETS_DIR: Path = BASE_DIR.parent / "assets"
# 数据库/缓存
REDIS_URL: str = "redis://localhost:6379/0"

View File

@@ -2,7 +2,7 @@ from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from app.core import config
from app.api import materials, videos, publish, login_helper, auth, admin, ref_audios, ai, tools
from app.api import materials, videos, publish, login_helper, auth, admin, ref_audios, ai, tools, assets
from loguru import logger
import os
@@ -41,12 +41,14 @@ app.add_middleware(
)
# Create dirs
settings.UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
settings.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
(settings.UPLOAD_DIR / "materials").mkdir(exist_ok=True)
settings.UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
settings.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
(settings.UPLOAD_DIR / "materials").mkdir(exist_ok=True)
settings.ASSETS_DIR.mkdir(parents=True, exist_ok=True)
app.mount("/outputs", StaticFiles(directory=str(settings.OUTPUT_DIR)), name="outputs")
app.mount("/uploads", StaticFiles(directory=str(settings.UPLOAD_DIR)), name="uploads")
app.mount("/outputs", StaticFiles(directory=str(settings.OUTPUT_DIR)), name="outputs")
app.mount("/uploads", StaticFiles(directory=str(settings.UPLOAD_DIR)), name="uploads")
app.mount("/assets", StaticFiles(directory=str(settings.ASSETS_DIR)), name="assets")
# 注册路由
app.include_router(materials.router, prefix="/api/materials", tags=["Materials"])
@@ -55,9 +57,10 @@ app.include_router(publish.router, prefix="/api/publish", tags=["Publish"])
app.include_router(login_helper.router, prefix="/api", tags=["LoginHelper"])
app.include_router(auth.router) # /api/auth
app.include_router(admin.router) # /api/admin
app.include_router(ref_audios.router, prefix="/api/ref-audios", tags=["RefAudios"])
app.include_router(ai.router) # /api/ai
app.include_router(tools.router, prefix="/api/tools", tags=["Tools"])
app.include_router(ref_audios.router, prefix="/api/ref-audios", tags=["RefAudios"])
app.include_router(ai.router) # /api/ai
app.include_router(tools.router, prefix="/api/tools", tags=["Tools"])
app.include_router(assets.router, prefix="/api/assets", tags=["Assets"])
@app.on_event("startup")

View File

@@ -0,0 +1,128 @@
import json
import shutil
from pathlib import Path
from typing import Optional, List, Dict, Any
from loguru import logger
from app.core.config import settings
BGM_EXTENSIONS = {".wav", ".mp3", ".m4a", ".aac", ".flac", ".ogg", ".webm"}
def _style_file_path(style_type: str) -> Path:
return settings.ASSETS_DIR / "styles" / f"{style_type}.json"
def _load_style_file(style_type: str) -> List[Dict[str, Any]]:
style_path = _style_file_path(style_type)
if not style_path.exists():
return []
try:
with open(style_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
return data
except Exception as e:
logger.error(f"Failed to load style file {style_path}: {e}")
return []
def list_styles(style_type: str) -> List[Dict[str, Any]]:
return _load_style_file(style_type)
def get_style(style_type: str, style_id: Optional[str]) -> Optional[Dict[str, Any]]:
if not style_id:
return None
for item in _load_style_file(style_type):
if item.get("id") == style_id:
return item
return None
def get_default_style(style_type: str) -> Optional[Dict[str, Any]]:
styles = _load_style_file(style_type)
if not styles:
return None
for item in styles:
if item.get("is_default"):
return item
return styles[0]
def list_bgm() -> List[Dict[str, Any]]:
bgm_root = settings.ASSETS_DIR / "bgm"
if not bgm_root.exists():
return []
items: List[Dict[str, Any]] = []
for path in bgm_root.rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() not in BGM_EXTENSIONS:
continue
rel = path.relative_to(bgm_root).as_posix()
items.append({
"id": rel,
"name": path.stem,
"ext": path.suffix.lower().lstrip(".")
})
items.sort(key=lambda x: x.get("name", ""))
return items
def resolve_bgm_path(bgm_id: str) -> Optional[Path]:
if not bgm_id:
return None
bgm_root = settings.ASSETS_DIR / "bgm"
candidate = (bgm_root / bgm_id).resolve()
try:
candidate.relative_to(bgm_root.resolve())
except ValueError:
return None
if candidate.exists() and candidate.is_file():
return candidate
return None
def prepare_style_for_remotion(
style: Optional[Dict[str, Any]],
temp_dir: Path,
prefix: str
) -> Optional[Dict[str, Any]]:
if not style:
return None
prepared = dict(style)
font_file = prepared.get("font_file")
if not font_file:
return prepared
source_font = (settings.ASSETS_DIR / "fonts" / font_file).resolve()
try:
source_font.relative_to((settings.ASSETS_DIR / "fonts").resolve())
except ValueError:
logger.warning(f"Font path outside assets: {font_file}")
return prepared
if not source_font.exists():
logger.warning(f"Font file missing: {source_font}")
return prepared
temp_dir.mkdir(parents=True, exist_ok=True)
ext = source_font.suffix.lower()
target_name = f"{prefix}{ext}"
target_path = temp_dir / target_name
try:
shutil.copy(source_font, target_path)
prepared["font_file"] = target_name
if not prepared.get("font_family"):
prepared["font_family"] = prefix
except Exception as e:
logger.warning(f"Failed to copy font {source_font} -> {target_path}: {e}")
return prepared

View File

@@ -4,6 +4,7 @@ Remotion 视频渲染服务
"""
import asyncio
import json
import subprocess
from pathlib import Path
from typing import Optional
@@ -30,6 +31,8 @@ class RemotionService:
title_duration: float = 3.0,
fps: int = 25,
enable_subtitles: bool = True,
subtitle_style: Optional[dict] = None,
title_style: Optional[dict] = None,
on_progress: Optional[callable] = None
) -> str:
"""
@@ -64,6 +67,12 @@ class RemotionService:
cmd.extend(["--title", title])
cmd.extend(["--titleDuration", str(title_duration)])
if subtitle_style:
cmd.extend(["--subtitleStyle", json.dumps(subtitle_style, ensure_ascii=False)])
if title_style:
cmd.extend(["--titleStyle", json.dumps(title_style, ensure_ascii=False)])
logger.info(f"Running Remotion render: {' '.join(cmd)}")
# 在线程池中运行子进程

View File

@@ -1,9 +1,10 @@
"""
视频合成服务
"""
import os
import subprocess
import json
import os
import subprocess
import json
import shlex
from pathlib import Path
from loguru import logger
from typing import Optional
@@ -12,18 +13,18 @@ class VideoService:
def __init__(self):
pass
def _run_ffmpeg(self, cmd: list) -> bool:
cmd_str = ' '.join(f'"{c}"' if ' ' in c or '\\' in c else c for c in cmd)
logger.debug(f"FFmpeg CMD: {cmd_str}")
try:
# Synchronous call for BackgroundTasks compatibility
result = subprocess.run(
cmd_str,
shell=True,
capture_output=True,
text=True,
encoding='utf-8',
)
def _run_ffmpeg(self, cmd: list) -> bool:
cmd_str = ' '.join(shlex.quote(str(c)) for c in cmd)
logger.debug(f"FFmpeg CMD: {cmd_str}")
try:
# Synchronous call for BackgroundTasks compatibility
result = subprocess.run(
cmd,
shell=False,
capture_output=True,
text=True,
encoding='utf-8',
)
if result.returncode != 0:
logger.error(f"FFmpeg Error: {result.stderr}")
return False
@@ -32,9 +33,9 @@ class VideoService:
logger.error(f"FFmpeg Exception: {e}")
return False
def _get_duration(self, file_path: str) -> float:
# Synchronous call for BackgroundTasks compatibility
cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "{file_path}"'
def _get_duration(self, file_path: str) -> float:
# Synchronous call for BackgroundTasks compatibility
cmd = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "{file_path}"'
try:
result = subprocess.run(
cmd,
@@ -44,7 +45,39 @@ class VideoService:
)
return float(result.stdout.strip())
except Exception:
return 0.0
return 0.0
def mix_audio(
self,
voice_path: str,
bgm_path: str,
output_path: str,
bgm_volume: float = 0.2
) -> str:
"""混合人声与背景音乐"""
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
volume = max(0.0, min(float(bgm_volume), 1.0))
filter_complex = (
f"[0:a]volume=1.0[a0];"
f"[1:a]volume={volume}[a1];"
f"[a0][a1]amix=inputs=2:duration=first:dropout_transition=2:normalize=0[aout]"
)
cmd = [
"ffmpeg", "-y",
"-i", voice_path,
"-stream_loop", "-1", "-i", bgm_path,
"-filter_complex", filter_complex,
"-map", "[aout]",
"-c:a", "pcm_s16le",
"-shortest",
output_path,
]
if self._run_ffmpeg(cmd):
return output_path
raise RuntimeError("FFmpeg audio mix failed")
async def compose(
self,

View File

@@ -0,0 +1,58 @@
[
{
"id": "subtitle_classic_yellow",
"label": "经典黄字",
"font_file": "title/思源黑体/SourceHanSansCN-Bold思源黑体免费.otf",
"font_family": "SourceHanSansCN-Bold",
"font_size": 52,
"highlight_color": "#FFE600",
"normal_color": "#FFFFFF",
"stroke_color": "#000000",
"stroke_size": 3,
"letter_spacing": 2,
"bottom_margin": 80,
"is_default": true
},
{
"id": "subtitle_cyan",
"label": "清爽青蓝",
"font_file": "DingTalk Sans.ttf",
"font_family": "DingTalkSans",
"font_size": 48,
"highlight_color": "#00E5FF",
"normal_color": "#FFFFFF",
"stroke_color": "#000000",
"stroke_size": 3,
"letter_spacing": 1,
"bottom_margin": 76,
"is_default": false
},
{
"id": "subtitle_orange",
"label": "活力橙",
"font_file": "simhei.ttf",
"font_family": "SimHei",
"font_size": 50,
"highlight_color": "#FF8A00",
"normal_color": "#FFFFFF",
"stroke_color": "#000000",
"stroke_size": 3,
"letter_spacing": 2,
"bottom_margin": 80,
"is_default": false
},
{
"id": "subtitle_clean_white",
"label": "纯白轻描",
"font_file": "DingTalk JinBuTi.ttf",
"font_family": "DingTalkJinBuTi",
"font_size": 46,
"highlight_color": "#FFFFFF",
"normal_color": "#FFFFFF",
"stroke_color": "#111111",
"stroke_size": 2,
"letter_spacing": 1,
"bottom_margin": 72,
"is_default": false
}
]

View File

@@ -0,0 +1,58 @@
[
{
"id": "title_bold_white",
"label": "黑体大标题",
"font_file": "title/思源黑体/SourceHanSansCN-Heavy思源黑体免费.otf",
"font_family": "SourceHanSansCN-Heavy",
"font_size": 72,
"color": "#FFFFFF",
"stroke_color": "#000000",
"stroke_size": 8,
"letter_spacing": 4,
"top_margin": 60,
"font_weight": 900,
"is_default": true
},
{
"id": "title_serif_gold",
"label": "宋体金色",
"font_file": "title/思源宋体/SourceHanSerifCN-SemiBold思源宋体免费.otf",
"font_family": "SourceHanSerifCN-SemiBold",
"font_size": 70,
"color": "#FDE68A",
"stroke_color": "#2B1B00",
"stroke_size": 8,
"letter_spacing": 3,
"top_margin": 58,
"font_weight": 800,
"is_default": false
},
{
"id": "title_douyin",
"label": "抖音活力",
"font_file": "title/抖音美好体开源.otf",
"font_family": "DouyinMeiHao",
"font_size": 72,
"color": "#FFFFFF",
"stroke_color": "#1F0A00",
"stroke_size": 8,
"letter_spacing": 4,
"top_margin": 60,
"font_weight": 900,
"is_default": false
},
{
"id": "title_pop",
"label": "站酷快乐体",
"font_file": "title/站酷快乐体.ttf",
"font_family": "ZCoolHappy",
"font_size": 74,
"color": "#FFFFFF",
"stroke_color": "#000000",
"stroke_size": 8,
"letter_spacing": 5,
"top_margin": 62,
"font_weight": 900,
"is_default": false
}
]