337 lines
12 KiB
Python
337 lines
12 KiB
Python
from typing import Optional, Any
|
||
from pathlib import Path
|
||
import time
|
||
import traceback
|
||
import httpx
|
||
from loguru import logger
|
||
|
||
from app.core.config import settings
|
||
from app.services.tts_service import TTSService
|
||
from app.services.video_service import VideoService
|
||
from app.services.lipsync_service import LipSyncService
|
||
from app.services.voice_clone_service import voice_clone_service
|
||
from app.services.assets_service import (
|
||
get_style,
|
||
get_default_style,
|
||
resolve_bgm_path,
|
||
prepare_style_for_remotion,
|
||
)
|
||
from app.services.storage import storage_service
|
||
from app.services.whisper_service import whisper_service
|
||
from app.services.remotion_service import remotion_service
|
||
|
||
from .schemas import GenerateRequest
|
||
from .task_store import task_store
|
||
|
||
|
||
_lipsync_service: Optional[LipSyncService] = None
|
||
_lipsync_ready: Optional[bool] = None
|
||
_lipsync_last_check: float = 0
|
||
|
||
|
||
def _get_lipsync_service() -> LipSyncService:
|
||
"""获取或创建 LipSync 服务实例(单例模式,避免重复初始化)"""
|
||
global _lipsync_service
|
||
if _lipsync_service is None:
|
||
_lipsync_service = LipSyncService()
|
||
return _lipsync_service
|
||
|
||
|
||
async def _check_lipsync_ready(force: bool = False) -> bool:
|
||
"""检查 LipSync 是否就绪(带缓存,5分钟内不重复检查)"""
|
||
global _lipsync_ready, _lipsync_last_check
|
||
|
||
now = time.time()
|
||
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
|
||
return bool(_lipsync_ready)
|
||
|
||
lipsync = _get_lipsync_service()
|
||
health = await lipsync.check_health()
|
||
_lipsync_ready = health.get("ready", False)
|
||
_lipsync_last_check = now
|
||
print(f"[LipSync] Health check: ready={_lipsync_ready}")
|
||
return bool(_lipsync_ready)
|
||
|
||
|
||
async def _download_material(path_or_url: str, temp_path: Path):
|
||
"""下载素材到临时文件 (流式下载,节省内存)"""
|
||
if path_or_url.startswith("http"):
|
||
timeout = httpx.Timeout(None)
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
async with client.stream("GET", path_or_url) as resp:
|
||
resp.raise_for_status()
|
||
with open(temp_path, "wb") as f:
|
||
async for chunk in resp.aiter_bytes():
|
||
f.write(chunk)
|
||
else:
|
||
src = Path(path_or_url)
|
||
if not src.is_absolute():
|
||
src = settings.BASE_DIR.parent / path_or_url
|
||
|
||
if src.exists():
|
||
import shutil
|
||
shutil.copy(src, temp_path)
|
||
else:
|
||
raise FileNotFoundError(f"Material not found: {path_or_url}")
|
||
|
||
|
||
def _update_task(task_id: str, **updates: Any) -> None:
|
||
task_store.update(task_id, updates)
|
||
|
||
|
||
async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
|
||
temp_files = []
|
||
try:
|
||
start_time = time.time()
|
||
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
|
||
|
||
temp_dir = settings.UPLOAD_DIR / "temp"
|
||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
input_material_path = temp_dir / f"{task_id}_input.mp4"
|
||
temp_files.append(input_material_path)
|
||
|
||
await _download_material(req.material_path, input_material_path)
|
||
|
||
_update_task(task_id, message="正在生成语音...", progress=10)
|
||
|
||
audio_path = temp_dir / f"{task_id}_audio.wav"
|
||
temp_files.append(audio_path)
|
||
|
||
if req.tts_mode == "voiceclone":
|
||
if not req.ref_audio_id or not req.ref_text:
|
||
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
|
||
|
||
_update_task(task_id, message="正在下载参考音频...")
|
||
|
||
ref_audio_local = temp_dir / f"{task_id}_ref.wav"
|
||
temp_files.append(ref_audio_local)
|
||
|
||
ref_audio_url = await storage_service.get_signed_url(
|
||
bucket="ref-audios",
|
||
path=req.ref_audio_id
|
||
)
|
||
await _download_material(ref_audio_url, ref_audio_local)
|
||
|
||
_update_task(task_id, message="正在克隆声音 (Qwen3-TTS)...")
|
||
await voice_clone_service.generate_audio(
|
||
text=req.text,
|
||
ref_audio_path=str(ref_audio_local),
|
||
ref_text=req.ref_text,
|
||
output_path=str(audio_path),
|
||
language="Chinese"
|
||
)
|
||
else:
|
||
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
|
||
tts = TTSService()
|
||
await tts.generate_audio(req.text, req.voice, str(audio_path))
|
||
|
||
tts_time = time.time() - start_time
|
||
print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
|
||
_update_task(task_id, progress=25)
|
||
|
||
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
|
||
|
||
lipsync = _get_lipsync_service()
|
||
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
|
||
temp_files.append(lipsync_video_path)
|
||
|
||
lipsync_start = time.time()
|
||
is_ready = await _check_lipsync_ready()
|
||
|
||
if is_ready:
|
||
print(f"[LipSync] Starting LatentSync inference...")
|
||
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
|
||
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
|
||
else:
|
||
print(f"[LipSync] LatentSync not ready, copying original video")
|
||
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
|
||
import shutil
|
||
shutil.copy(str(input_material_path), lipsync_video_path)
|
||
|
||
lipsync_time = time.time() - lipsync_start
|
||
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
|
||
_update_task(task_id, progress=80)
|
||
|
||
captions_path = None
|
||
if req.enable_subtitles:
|
||
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
|
||
|
||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||
temp_files.append(captions_path)
|
||
|
||
try:
|
||
await whisper_service.align(
|
||
audio_path=str(audio_path),
|
||
text=req.text,
|
||
output_path=str(captions_path)
|
||
)
|
||
print(f"[Pipeline] Whisper alignment completed")
|
||
except Exception as e:
|
||
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
|
||
captions_path = None
|
||
|
||
_update_task(task_id, progress=85)
|
||
|
||
video = VideoService()
|
||
final_audio_path = audio_path
|
||
if req.bgm_id:
|
||
_update_task(task_id, message="正在合成背景音乐...", progress=86)
|
||
|
||
bgm_path = resolve_bgm_path(req.bgm_id)
|
||
if bgm_path:
|
||
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
|
||
temp_files.append(mix_output_path)
|
||
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
|
||
volume = max(0.0, min(float(volume), 1.0))
|
||
try:
|
||
video.mix_audio(
|
||
voice_path=str(audio_path),
|
||
bgm_path=str(bgm_path),
|
||
output_path=str(mix_output_path),
|
||
bgm_volume=volume
|
||
)
|
||
final_audio_path = mix_output_path
|
||
except Exception as e:
|
||
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
|
||
else:
|
||
logger.warning(f"BGM not found: {req.bgm_id}")
|
||
|
||
use_remotion = (captions_path and captions_path.exists()) or req.title
|
||
|
||
subtitle_style = None
|
||
title_style = None
|
||
if req.enable_subtitles:
|
||
subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
|
||
if req.title:
|
||
title_style = get_style("title", req.title_style_id) or get_default_style("title")
|
||
|
||
if req.subtitle_font_size and req.enable_subtitles:
|
||
if subtitle_style is None:
|
||
subtitle_style = {}
|
||
subtitle_style["font_size"] = int(req.subtitle_font_size)
|
||
|
||
if req.title_font_size and req.title:
|
||
if title_style is None:
|
||
title_style = {}
|
||
title_style["font_size"] = int(req.title_font_size)
|
||
|
||
if req.title_top_margin is not None and req.title:
|
||
if title_style is None:
|
||
title_style = {}
|
||
title_style["top_margin"] = int(req.title_top_margin)
|
||
|
||
if req.subtitle_bottom_margin is not None and req.enable_subtitles:
|
||
if subtitle_style is None:
|
||
subtitle_style = {}
|
||
subtitle_style["bottom_margin"] = int(req.subtitle_bottom_margin)
|
||
|
||
if use_remotion:
|
||
subtitle_style = prepare_style_for_remotion(
|
||
subtitle_style,
|
||
temp_dir,
|
||
f"{task_id}_subtitle_font"
|
||
)
|
||
title_style = prepare_style_for_remotion(
|
||
title_style,
|
||
temp_dir,
|
||
f"{task_id}_title_font"
|
||
)
|
||
|
||
final_output_local_path = temp_dir / f"{task_id}_output.mp4"
|
||
temp_files.append(final_output_local_path)
|
||
|
||
if use_remotion:
|
||
_update_task(task_id, message="正在合成视频 (Remotion)...", progress=87)
|
||
|
||
composed_video_path = temp_dir / f"{task_id}_composed.mp4"
|
||
temp_files.append(composed_video_path)
|
||
|
||
await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
|
||
|
||
remotion_health = await remotion_service.check_health()
|
||
if remotion_health.get("ready"):
|
||
try:
|
||
def on_remotion_progress(percent):
|
||
mapped = 87 + int(percent * 0.08)
|
||
_update_task(task_id, progress=mapped)
|
||
|
||
await remotion_service.render(
|
||
video_path=str(composed_video_path),
|
||
output_path=str(final_output_local_path),
|
||
captions_path=str(captions_path) if captions_path else None,
|
||
title=req.title,
|
||
title_duration=3.0,
|
||
fps=25,
|
||
enable_subtitles=req.enable_subtitles,
|
||
subtitle_style=subtitle_style,
|
||
title_style=title_style,
|
||
on_progress=on_remotion_progress
|
||
)
|
||
print(f"[Pipeline] Remotion render completed")
|
||
except Exception as e:
|
||
logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
|
||
import shutil
|
||
shutil.copy(str(composed_video_path), final_output_local_path)
|
||
else:
|
||
logger.warning(f"Remotion not ready: {remotion_health.get('error')}, using FFmpeg")
|
||
import shutil
|
||
shutil.copy(str(composed_video_path), final_output_local_path)
|
||
else:
|
||
_update_task(task_id, message="正在合成最终视频...", progress=90)
|
||
|
||
await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
|
||
|
||
total_time = time.time() - start_time
|
||
|
||
_update_task(task_id, message="正在上传结果...", progress=95)
|
||
|
||
storage_path = f"{user_id}/{task_id}_output.mp4"
|
||
await storage_service.upload_file_from_path(
|
||
bucket=storage_service.BUCKET_OUTPUTS,
|
||
storage_path=storage_path,
|
||
local_file_path=str(final_output_local_path),
|
||
content_type="video/mp4"
|
||
)
|
||
|
||
signed_url = await storage_service.get_signed_url(
|
||
bucket=storage_service.BUCKET_OUTPUTS,
|
||
path=storage_path
|
||
)
|
||
|
||
print(f"[Pipeline] Total generation time: {total_time:.1f}s")
|
||
|
||
_update_task(
|
||
task_id,
|
||
status="completed",
|
||
progress=100,
|
||
message=f"生成完成!耗时 {total_time:.0f} 秒",
|
||
output=storage_path,
|
||
download_url=signed_url,
|
||
)
|
||
|
||
except Exception as e:
|
||
_update_task(
|
||
task_id,
|
||
status="failed",
|
||
message=f"错误: {str(e)}",
|
||
error=traceback.format_exc(),
|
||
)
|
||
logger.error(f"Generate video failed: {e}")
|
||
finally:
|
||
for f in temp_files:
|
||
try:
|
||
if f.exists():
|
||
f.unlink()
|
||
except Exception as e:
|
||
print(f"Error cleaning up {f}: {e}")
|
||
|
||
|
||
async def get_lipsync_health():
|
||
lipsync = _get_lipsync_service()
|
||
return await lipsync.check_health()
|
||
|
||
|
||
async def get_voiceclone_health():
|
||
return await voice_clone_service.check_health()
|