Files
ViGent2/backend/app/modules/videos/workflow.py
Kevin Wong be6a3436bb 更新
2026-02-05 12:03:55 +08:00

329 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from typing import Optional, Any
from pathlib import Path
import time
import traceback
import httpx
from loguru import logger
from app.core.config import settings
from app.services.tts_service import TTSService
from app.services.video_service import VideoService
from app.services.lipsync_service import LipSyncService
from app.services.voice_clone_service import voice_clone_service
from app.services.assets_service import (
get_style,
get_default_style,
resolve_bgm_path,
prepare_style_for_remotion,
)
from app.services.storage import storage_service
from app.services.whisper_service import whisper_service
from app.services.remotion_service import remotion_service
from .schemas import GenerateRequest
from .task_store import task_store
_lipsync_service: Optional[LipSyncService] = None
_lipsync_ready: Optional[bool] = None
_lipsync_last_check: float = 0
def _get_lipsync_service() -> LipSyncService:
"""获取或创建 LipSync 服务实例(单例模式,避免重复初始化)"""
global _lipsync_service
if _lipsync_service is None:
_lipsync_service = LipSyncService()
return _lipsync_service
async def _check_lipsync_ready(force: bool = False) -> bool:
"""检查 LipSync 是否就绪带缓存5分钟内不重复检查"""
global _lipsync_ready, _lipsync_last_check
now = time.time()
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
return bool(_lipsync_ready)
lipsync = _get_lipsync_service()
health = await lipsync.check_health()
_lipsync_ready = health.get("ready", False)
_lipsync_last_check = now
print(f"[LipSync] Health check: ready={_lipsync_ready}")
return bool(_lipsync_ready)
async def _download_material(path_or_url: str, temp_path: Path):
"""下载素材到临时文件 (流式下载,节省内存)"""
if path_or_url.startswith("http"):
timeout = httpx.Timeout(None)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream("GET", path_or_url) as resp:
resp.raise_for_status()
with open(temp_path, "wb") as f:
async for chunk in resp.aiter_bytes():
f.write(chunk)
else:
src = Path(path_or_url)
if not src.is_absolute():
src = settings.BASE_DIR.parent / path_or_url
if src.exists():
import shutil
shutil.copy(src, temp_path)
else:
raise FileNotFoundError(f"Material not found: {path_or_url}")
def _update_task(task_id: str, **updates: Any) -> None:
task_store.update(task_id, updates)
async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
temp_files = []
try:
start_time = time.time()
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
temp_dir = settings.UPLOAD_DIR / "temp"
temp_dir.mkdir(parents=True, exist_ok=True)
input_material_path = temp_dir / f"{task_id}_input.mp4"
temp_files.append(input_material_path)
await _download_material(req.material_path, input_material_path)
_update_task(task_id, message="正在生成语音...", progress=10)
audio_path = temp_dir / f"{task_id}_audio.wav"
temp_files.append(audio_path)
if req.tts_mode == "voiceclone":
if not req.ref_audio_id or not req.ref_text:
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
_update_task(task_id, message="正在下载参考音频...")
ref_audio_local = temp_dir / f"{task_id}_ref.wav"
temp_files.append(ref_audio_local)
ref_audio_url = await storage_service.get_signed_url(
bucket="ref-audios",
path=req.ref_audio_id
)
await _download_material(ref_audio_url, ref_audio_local)
_update_task(task_id, message="正在克隆声音 (Qwen3-TTS)...")
await voice_clone_service.generate_audio(
text=req.text,
ref_audio_path=str(ref_audio_local),
ref_text=req.ref_text,
output_path=str(audio_path),
language="Chinese"
)
else:
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
tts = TTSService()
await tts.generate_audio(req.text, req.voice, str(audio_path))
tts_time = time.time() - start_time
print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
_update_task(task_id, progress=25)
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
lipsync = _get_lipsync_service()
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
temp_files.append(lipsync_video_path)
lipsync_start = time.time()
is_ready = await _check_lipsync_ready()
if is_ready:
print(f"[LipSync] Starting LatentSync inference...")
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
else:
print(f"[LipSync] LatentSync not ready, copying original video")
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
import shutil
shutil.copy(str(input_material_path), lipsync_video_path)
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
captions_path = None
if req.enable_subtitles:
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(captions_path)
try:
await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=str(captions_path)
)
print(f"[Pipeline] Whisper alignment completed")
except Exception as e:
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
captions_path = None
_update_task(task_id, progress=85)
video = VideoService()
final_audio_path = audio_path
if req.bgm_id:
_update_task(task_id, message="正在合成背景音乐...", progress=86)
bgm_path = resolve_bgm_path(req.bgm_id)
if bgm_path:
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
temp_files.append(mix_output_path)
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
volume = max(0.0, min(float(volume), 1.0))
try:
video.mix_audio(
voice_path=str(audio_path),
bgm_path=str(bgm_path),
output_path=str(mix_output_path),
bgm_volume=volume
)
final_audio_path = mix_output_path
except Exception as e:
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
else:
logger.warning(f"BGM not found: {req.bgm_id}")
use_remotion = (captions_path and captions_path.exists()) or req.title
subtitle_style = None
title_style = None
if req.enable_subtitles:
subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
if req.title:
title_style = get_style("title", req.title_style_id) or get_default_style("title")
if req.subtitle_font_size and req.enable_subtitles:
if subtitle_style is None:
subtitle_style = {}
subtitle_style["font_size"] = int(req.subtitle_font_size)
if req.title_font_size and req.title:
if title_style is None:
title_style = {}
title_style["font_size"] = int(req.title_font_size)
if use_remotion:
subtitle_style = prepare_style_for_remotion(
subtitle_style,
temp_dir,
f"{task_id}_subtitle_font"
)
title_style = prepare_style_for_remotion(
title_style,
temp_dir,
f"{task_id}_title_font"
)
final_output_local_path = temp_dir / f"{task_id}_output.mp4"
temp_files.append(final_output_local_path)
if use_remotion:
_update_task(task_id, message="正在合成视频 (Remotion)...", progress=87)
composed_video_path = temp_dir / f"{task_id}_composed.mp4"
temp_files.append(composed_video_path)
await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
remotion_health = await remotion_service.check_health()
if remotion_health.get("ready"):
try:
def on_remotion_progress(percent):
mapped = 87 + int(percent * 0.08)
_update_task(task_id, progress=mapped)
await remotion_service.render(
video_path=str(composed_video_path),
output_path=str(final_output_local_path),
captions_path=str(captions_path) if captions_path else None,
title=req.title,
title_duration=3.0,
fps=25,
enable_subtitles=req.enable_subtitles,
subtitle_style=subtitle_style,
title_style=title_style,
on_progress=on_remotion_progress
)
print(f"[Pipeline] Remotion render completed")
except Exception as e:
logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
import shutil
shutil.copy(str(composed_video_path), final_output_local_path)
else:
logger.warning(f"Remotion not ready: {remotion_health.get('error')}, using FFmpeg")
import shutil
shutil.copy(str(composed_video_path), final_output_local_path)
else:
_update_task(task_id, message="正在合成最终视频...", progress=90)
await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
total_time = time.time() - start_time
_update_task(task_id, message="正在上传结果...", progress=95)
storage_path = f"{user_id}/{task_id}_output.mp4"
with open(final_output_local_path, "rb") as f:
file_data = f.read()
await storage_service.upload_file(
bucket=storage_service.BUCKET_OUTPUTS,
path=storage_path,
file_data=file_data,
content_type="video/mp4"
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_OUTPUTS,
path=storage_path
)
print(f"[Pipeline] Total generation time: {total_time:.1f}s")
_update_task(
task_id,
status="completed",
progress=100,
message=f"生成完成!耗时 {total_time:.0f}",
output=storage_path,
download_url=signed_url,
)
except Exception as e:
_update_task(
task_id,
status="failed",
message=f"错误: {str(e)}",
error=traceback.format_exc(),
)
logger.error(f"Generate video failed: {e}")
finally:
for f in temp_files:
try:
if f.exists():
f.unlink()
except Exception as e:
print(f"Error cleaning up {f}: {e}")
async def get_lipsync_health():
lipsync = _get_lipsync_service()
return await lipsync.check_health()
async def get_voiceclone_health():
return await voice_clone_service.check_health()