Files
ViGent2/backend/app/modules/videos/workflow.py
Kevin Wong 48bc78fe38 更新
2026-03-02 16:35:16 +08:00

847 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from typing import Optional, Any, List
from pathlib import Path
import asyncio
import time
import traceback
import httpx
from loguru import logger
from app.core.config import settings
from app.services.tts_service import TTSService
from app.services.video_service import VideoService
from app.services.lipsync_service import LipSyncService
from app.services.voice_clone_service import voice_clone_service
from app.services.assets_service import (
get_style,
get_default_style,
resolve_bgm_path,
prepare_style_for_remotion,
)
from app.services.storage import storage_service
from app.services.whisper_service import whisper_service
from app.services.remotion_service import remotion_service
from .schemas import GenerateRequest
from .task_store import task_store
# 全局并发限制:最多同时运行 2 个视频生成任务
_generation_semaphore = asyncio.Semaphore(2)
def _locale_to_whisper_lang(locale: str) -> str:
"""'en-US''en', 'zh-CN''zh'"""
return locale.split("-")[0] if "-" in locale else locale
def _locale_to_tts_lang(locale: str) -> str:
"""'zh-CN''Chinese', 'en-US''English', 其他 → 'Auto'"""
mapping = {"zh": "Chinese", "en": "English"}
return mapping.get(locale.split("-")[0], "Auto")
_lipsync_service: Optional[LipSyncService] = None
_lipsync_ready: Optional[bool] = None
_lipsync_last_check: float = 0
def _get_lipsync_service() -> LipSyncService:
"""获取或创建 LipSync 服务实例(单例模式,避免重复初始化)"""
global _lipsync_service
if _lipsync_service is None:
_lipsync_service = LipSyncService()
return _lipsync_service
async def _check_lipsync_ready(force: bool = False) -> bool:
"""检查 LipSync 是否就绪带缓存5分钟内不重复检查"""
global _lipsync_ready, _lipsync_last_check
now = time.time()
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
return bool(_lipsync_ready)
lipsync = _get_lipsync_service()
health = await lipsync.check_health()
_lipsync_ready = health.get("ready", False)
_lipsync_last_check = now
print(f"[LipSync] Health check: ready={_lipsync_ready}")
return bool(_lipsync_ready)
async def _download_material(path_or_url: str, temp_path: Path):
"""下载素材到临时文件 (流式下载,节省内存)"""
if path_or_url.startswith("http"):
timeout = httpx.Timeout(None)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream("GET", path_or_url) as resp:
resp.raise_for_status()
with open(temp_path, "wb") as f:
async for chunk in resp.aiter_bytes():
f.write(chunk)
else:
src = Path(path_or_url)
if not src.is_absolute():
src = settings.BASE_DIR.parent / path_or_url
if src.exists():
import shutil
shutil.copy(src, temp_path)
else:
raise FileNotFoundError(f"Material not found: {path_or_url}")
def _update_task(task_id: str, **updates: Any) -> None:
task_store.update(task_id, updates)
async def _run_blocking(func, *args):
"""在线程池执行阻塞函数,避免卡住事件循环。"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, func, *args)
# ── 多素材辅助函数 ──
def _split_equal(segments: List[dict], material_paths: List[str]) -> List[dict]:
"""按素材数量均分音频时长,对齐到最近的 Whisper 字边界。
Args:
segments: Whisper 产出的 segment 列表, 每个包含 words (字级时间戳)
material_paths: 素材路径列表
Returns:
[{"material_path": "...", "start": 0.0, "end": 5.2, "index": 0}, ...]
"""
# 展平所有 Whisper 字符
all_chars: List[dict] = []
for seg in segments:
for w in seg.get("words", []):
all_chars.append(w)
n = len(material_paths)
if not all_chars or n == 0:
return [{"material_path": material_paths[0] if material_paths else "",
"start": 0.0, "end": 99999.0, "index": 0}]
# 素材数不能超过字符数,否则边界会重复
if n > len(all_chars):
logger.warning(f"[MultiMat] 素材数({n}) > 字符数({len(all_chars)}),裁剪为 {len(all_chars)}")
n = len(all_chars)
total_start = all_chars[0]["start"]
total_end = all_chars[-1]["end"]
seg_dur = (total_end - total_start) / n
# 计算 N-1 个分割点,对齐到最近的字边界
boundaries = [0] # 第一段从第 0 个字开始
for i in range(1, n):
target_time = total_start + i * seg_dur
# 找到 start 时间最接近 target_time 的字
best_idx = boundaries[-1] + 1 # 至少比上一个边界后移 1
best_diff = float("inf")
for j in range(boundaries[-1] + 1, len(all_chars)):
diff = abs(all_chars[j]["start"] - target_time)
if diff < best_diff:
best_diff = diff
best_idx = j
elif diff > best_diff:
break # 时间递增,差值开始变大后可以停了
boundaries.append(min(best_idx, len(all_chars) - 1))
boundaries.append(len(all_chars)) # 最后一段到末尾
# 按边界生成分配结果
assignments: List[dict] = []
for i in range(n):
s_idx = boundaries[i]
e_idx = boundaries[i + 1]
if s_idx >= len(all_chars) or s_idx >= e_idx:
continue
assignments.append({
"material_path": material_paths[i],
"start": all_chars[s_idx]["start"],
"end": all_chars[e_idx - 1]["end"],
"text": "".join(c["word"] for c in all_chars[s_idx:e_idx]),
"index": len(assignments),
})
if not assignments:
return [{"material_path": material_paths[0], "start": 0.0, "end": 99999.0, "index": 0}]
logger.info(f"[MultiMat] 均分 {len(all_chars)} 字为 {len(assignments)}")
for a in assignments:
dur = a["end"] - a["start"]
logger.info(f"{a['index']}: [{a['start']:.2f}-{a['end']:.2f}s] ({dur:.1f}s) {a['text'][:20]}")
return assignments
async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
_update_task(task_id, message="排队中...")
async with _generation_semaphore:
await _process_video_generation_inner(task_id, req, user_id)
async def _process_video_generation_inner(task_id: str, req: GenerateRequest, user_id: str):
temp_files = []
try:
start_time = time.time()
# ── 确定素材列表 ──
material_paths: List[str] = []
if req.custom_assignments and len(req.custom_assignments) > 1:
material_paths = [a.material_path for a in req.custom_assignments if a.material_path]
elif req.material_paths and len(req.material_paths) > 1:
material_paths = req.material_paths
else:
material_paths = [req.material_path]
is_multi = len(material_paths) > 1
target_resolution = (1080, 1920) if req.output_aspect_ratio == "9:16" else (1920, 1080)
logger.info(
f"[Render] 输出画面比例: {req.output_aspect_ratio}, "
f"目标分辨率: {target_resolution[0]}x{target_resolution[1]}"
)
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
temp_dir = settings.UPLOAD_DIR / "temp"
temp_dir.mkdir(parents=True, exist_ok=True)
video = VideoService()
input_material_path: Optional[Path] = None
# 单素材模式:下载主素材
if not is_multi:
input_material_path = temp_dir / f"{task_id}_input.mp4"
temp_files.append(input_material_path)
await _download_material(material_paths[0], input_material_path)
# 归一化旋转元数据(如 iPhone MOV 1920x1080 + rotation=-90
normalized_input_path = temp_dir / f"{task_id}_input_norm.mp4"
normalized_result = await _run_blocking(
video.normalize_orientation,
str(input_material_path),
str(normalized_input_path),
)
if normalized_result != str(input_material_path):
temp_files.append(normalized_input_path)
input_material_path = normalized_input_path
_update_task(task_id, message="正在生成语音...", progress=10)
audio_path = temp_dir / f"{task_id}_audio.wav"
temp_files.append(audio_path)
if req.generated_audio_id:
# 新流程:使用预生成的配音
_update_task(task_id, message="正在下载配音...", progress=12)
audio_url = await storage_service.get_signed_url(
bucket="generated-audios",
path=req.generated_audio_id,
)
await _download_material(audio_url, audio_path)
# 从元数据获取 language
meta_path = req.generated_audio_id.replace("_audio.wav", "_audio.json")
try:
meta_url = await storage_service.get_signed_url(
bucket="generated-audios", path=meta_path,
)
import httpx as _httpx
async with _httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(meta_url)
if resp.status_code == 200:
meta = resp.json()
req.language = meta.get("language", req.language)
# 无条件用配音元数据覆盖文案,确保字幕与配音语言一致
meta_text = meta.get("text", "")
if meta_text:
req.text = meta_text
except Exception as e:
logger.warning(f"读取配音元数据失败: {e}")
elif req.tts_mode == "voiceclone":
if not req.ref_audio_id or not req.ref_text:
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
_update_task(task_id, message="正在下载参考音频...")
ref_audio_local = temp_dir / f"{task_id}_ref.wav"
temp_files.append(ref_audio_local)
ref_audio_url = await storage_service.get_signed_url(
bucket="ref-audios",
path=req.ref_audio_id
)
await _download_material(ref_audio_url, ref_audio_local)
_update_task(task_id, message="正在克隆声音...")
await voice_clone_service.generate_audio(
text=req.text,
ref_audio_path=str(ref_audio_local),
ref_text=req.ref_text,
output_path=str(audio_path),
language=_locale_to_tts_lang(req.language)
)
else:
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
tts = TTSService()
await tts.generate_audio(req.text, req.voice, str(audio_path))
tts_time = time.time() - start_time
print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
lipsync = _get_lipsync_service()
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
temp_files.append(lipsync_video_path)
captions_path = None
async def _whisper_and_split():
"""Whisper 对齐 → _split_equal 均分素材(公共逻辑)"""
_update_task(task_id, message="正在生成字幕 (Whisper)...")
_captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(_captions_path)
captions_data = None
try:
captions_data = await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=str(_captions_path),
language=_locale_to_whisper_lang(req.language),
original_text=req.text,
)
print(f"[Pipeline] Whisper alignment completed (multi-material)")
except Exception as e:
logger.warning(f"Whisper alignment failed: {e}")
_captions_path = None
_update_task(task_id, progress=15, message="正在分配素材...")
if captions_data and captions_data.get("segments"):
result = _split_equal(captions_data["segments"], material_paths)
else:
logger.warning("[MultiMat] Whisper 无数据,按时长均分")
audio_dur = await _run_blocking(video._get_duration, str(audio_path))
if audio_dur <= 0:
audio_dur = 30.0
seg_dur = audio_dur / len(material_paths)
result = [
{"material_path": material_paths[i], "start": i * seg_dur,
"end": (i + 1) * seg_dur, "index": i}
for i in range(len(material_paths))
]
return result, _captions_path
if is_multi:
# ══════════════════════════════════════
# 多素材流水线
# ══════════════════════════════════════
_update_task(task_id, progress=12, message="正在分配素材...")
if req.custom_assignments and len(req.custom_assignments) == len(material_paths):
# 用户自定义分配,跳过 Whisper 均分
assignments = [
{
"material_path": a.material_path,
"start": a.start,
"end": a.end,
"source_start": a.source_start,
"source_end": a.source_end,
"index": i,
}
for i, a in enumerate(req.custom_assignments)
]
# 仍然需要 Whisper 生成字幕(如果启用)
captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(captions_path)
if req.enable_subtitles:
_update_task(task_id, message="正在生成字幕 (Whisper)...")
try:
await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=str(captions_path),
language=_locale_to_whisper_lang(req.language),
original_text=req.text,
)
print(f"[Pipeline] Whisper alignment completed (custom assignments)")
except Exception as e:
logger.warning(f"Whisper alignment failed: {e}")
captions_path = None
else:
captions_path = None
elif req.custom_assignments:
logger.warning(
f"[MultiMat] custom_assignments 数量({len(req.custom_assignments)})"
f" 与素材数量({len(material_paths)})不一致,回退自动分配"
)
assignments, captions_path = await _whisper_and_split()
else:
assignments, captions_path = await _whisper_and_split()
# 扩展段覆盖完整音频范围首段从0开始末段到音频结尾
audio_duration = await _run_blocking(video._get_duration, str(audio_path))
if assignments and audio_duration > 0:
assignments[0]["start"] = 0.0
assignments[-1]["end"] = audio_duration
num_segments = len(assignments)
print(f"[Pipeline] Multi-material: {num_segments} segments, {len(material_paths)} materials")
if num_segments == 0:
raise RuntimeError("Multi-material: no valid segments after splitting")
lipsync_start = time.time()
# ── 第一步:并行下载所有素材并检测分辨率 ──
material_locals: List[Path] = []
resolutions = []
async def _download_and_normalize(i: int, assignment: dict):
"""下载单个素材并归一化方向"""
material_local = temp_dir / f"{task_id}_material_{i}.mp4"
temp_files.append(material_local)
await _download_material(assignment["material_path"], material_local)
normalized_material = temp_dir / f"{task_id}_material_{i}_norm.mp4"
normalized_result = await _run_blocking(
video.normalize_orientation,
str(material_local),
str(normalized_material),
)
if normalized_result != str(material_local):
temp_files.append(normalized_material)
material_local = normalized_material
res = video.get_resolution(str(material_local))
return material_local, res
download_tasks = [
_download_and_normalize(i, assignment)
for i, assignment in enumerate(assignments)
]
download_results = await asyncio.gather(*download_tasks)
for local, res in download_results:
material_locals.append(local)
resolutions.append(res)
# 按用户选择的画面比例统一分辨率
base_res = target_resolution
need_scale = any(r != base_res for r in resolutions)
if need_scale:
logger.info(f"[MultiMat] 素材分辨率不一致,统一到 {base_res[0]}x{base_res[1]}")
# ── 第二步:并行裁剪每段素材到对应时长 ──
prepared_segments: List[Optional[Path]] = [None] * num_segments
async def _prepare_one_segment(i: int, assignment: dict):
"""将单个素材裁剪/循环到对应时长"""
seg_dur = assignment["end"] - assignment["start"]
prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
temp_files.append(prepared_path)
prepare_target_res = None if resolutions[i] == base_res else base_res
await _run_blocking(
video.prepare_segment,
str(material_locals[i]),
seg_dur,
str(prepared_path),
prepare_target_res,
assignment.get("source_start", 0.0),
assignment.get("source_end"),
25,
)
return i, prepared_path
_update_task(
task_id,
progress=15,
message=f"正在并行准备 {num_segments} 个素材片段..."
)
prepare_tasks = [
_prepare_one_segment(i, assignment)
for i, assignment in enumerate(assignments)
]
prepare_results = await asyncio.gather(*prepare_tasks)
for i, path in prepare_results:
prepared_segments[i] = path
# ── 第二步:拼接所有素材片段 ──
_update_task(task_id, progress=50, message="正在拼接素材片段...")
concat_path = temp_dir / f"{task_id}_concat.mp4"
temp_files.append(concat_path)
prepared_segment_paths = [str(p) for p in prepared_segments if p is not None]
if len(prepared_segment_paths) != num_segments:
raise RuntimeError("Multi-material: prepared segments mismatch")
await _run_blocking(
video.concat_videos,
prepared_segment_paths,
str(concat_path),
25,
)
# ── 第三步:一次 LatentSync 推理 ──
is_ready = await _check_lipsync_ready()
if is_ready:
_update_task(task_id, progress=55, message="正在合成唇形 (LatentSync)...")
print(f"[LipSync] Multi-material: single LatentSync on concatenated video")
try:
await lipsync.generate(
str(concat_path),
str(audio_path),
str(lipsync_video_path),
model_mode=req.lipsync_model,
)
except Exception as e:
logger.warning(f"[LipSync] Failed, fallback to concat without lipsync: {e}")
import shutil
shutil.copy(str(concat_path), str(lipsync_video_path))
else:
print(f"[LipSync] Not ready, using concatenated video without lipsync")
import shutil
shutil.copy(str(concat_path), str(lipsync_video_path))
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] Multi-material prepare + concat + LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
# 如果用户关闭了字幕,清除 captions_pathWhisper 仅用于句子切分)
if not req.enable_subtitles:
captions_path = None
else:
# ══════════════════════════════════════
# 单素材流水线(原有逻辑)
# ══════════════════════════════════════
if input_material_path is None:
raise RuntimeError("单素材流程缺少输入素材")
# 单素材:按用户选择画面比例统一到目标分辨率,并应用 source_start
single_source_start = 0.0
single_source_end = None
if req.custom_assignments and len(req.custom_assignments) == 1:
single_source_start = req.custom_assignments[0].source_start
single_source_end = req.custom_assignments[0].source_end
_update_task(task_id, progress=20, message="正在准备素材片段...")
audio_dur = await _run_blocking(video._get_duration, str(audio_path))
if audio_dur <= 0:
audio_dur = 30.0
single_res = await _run_blocking(video.get_resolution, str(input_material_path))
single_target_res = None if single_res == target_resolution else target_resolution
prepared_single_path = temp_dir / f"{task_id}_prepared_single.mp4"
temp_files.append(prepared_single_path)
await _run_blocking(
video.prepare_segment,
str(input_material_path),
audio_dur,
str(prepared_single_path),
single_target_res,
single_source_start,
single_source_end,
None,
)
input_material_path = prepared_single_path
_update_task(task_id, progress=25)
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
lipsync_start = time.time()
is_ready = await _check_lipsync_ready()
if is_ready:
print(f"[LipSync] Starting LatentSync inference...")
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
try:
await lipsync.generate(
str(input_material_path),
str(audio_path),
str(lipsync_video_path),
model_mode=req.lipsync_model,
)
except Exception as e:
logger.warning(f"[LipSync] Failed on single-material, fallback to prepared video: {e}")
_update_task(task_id, message="唇形同步失败,使用原始视频...")
import shutil
shutil.copy(str(input_material_path), str(lipsync_video_path))
else:
print(f"[LipSync] LatentSync not ready, copying original video")
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
import shutil
shutil.copy(str(input_material_path), lipsync_video_path)
lipsync_time = time.time() - lipsync_start
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
_update_task(task_id, progress=80)
# 单素材模式Whisper 延迟到下方与 BGM 并行执行
if not req.enable_subtitles:
captions_path = None
_update_task(task_id, progress=85)
# ── Whisper 字幕 + BGM 混音 并行(两者都只依赖 audio_path──
final_audio_path = audio_path
_whisper_task = None
_bgm_task = None
mix_output_path: Optional[Path] = None
# 单素材模式下 Whisper 尚未执行,这里与 BGM 并行启动
need_whisper = not is_multi and req.enable_subtitles and captions_path is None
if need_whisper:
captions_path = temp_dir / f"{task_id}_captions.json"
temp_files.append(captions_path)
_captions_path_str = str(captions_path)
async def _run_whisper():
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
try:
await whisper_service.align(
audio_path=str(audio_path),
text=req.text,
output_path=_captions_path_str,
language=_locale_to_whisper_lang(req.language),
original_text=req.text,
)
print(f"[Pipeline] Whisper alignment completed")
return True
except Exception as e:
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
return False
_whisper_task = _run_whisper()
if req.bgm_id:
bgm_path = resolve_bgm_path(req.bgm_id)
if bgm_path:
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
temp_files.append(mix_output_path)
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
volume = max(0.0, min(float(volume), 1.0))
_mix_output = str(mix_output_path)
_bgm_path = str(bgm_path)
_voice_path = str(audio_path)
_volume = volume
async def _run_bgm():
_update_task(task_id, message="正在合成背景音乐...", progress=86)
try:
await _run_blocking(
video.mix_audio,
_voice_path,
_bgm_path,
_mix_output,
_volume,
)
return True
except Exception as e:
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
return False
_bgm_task = _run_bgm()
else:
logger.warning(f"BGM not found: {req.bgm_id}")
# 并行等待 Whisper + BGM
parallel_tasks = [t for t in (_whisper_task, _bgm_task) if t is not None]
if parallel_tasks:
results = await asyncio.gather(*parallel_tasks)
result_idx = 0
if _whisper_task is not None:
if not results[result_idx]:
captions_path = None
result_idx += 1
if _bgm_task is not None:
if results[result_idx] and mix_output_path is not None:
final_audio_path = mix_output_path
use_remotion = (captions_path and captions_path.exists()) or req.title or req.secondary_title
subtitle_style = None
title_style = None
secondary_title_style = None
if req.enable_subtitles:
subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
if req.title:
title_style = get_style("title", req.title_style_id) or get_default_style("title")
if req.secondary_title:
secondary_title_style = get_style("title", req.secondary_title_style_id) or get_default_style("title")
if req.subtitle_font_size and req.enable_subtitles:
if subtitle_style is None:
subtitle_style = {}
subtitle_style["font_size"] = int(req.subtitle_font_size)
if req.title_font_size and req.title:
if title_style is None:
title_style = {}
title_style["font_size"] = int(req.title_font_size)
if req.title_top_margin is not None and req.title:
if title_style is None:
title_style = {}
title_style["top_margin"] = int(req.title_top_margin)
if req.subtitle_bottom_margin is not None and req.enable_subtitles:
if subtitle_style is None:
subtitle_style = {}
subtitle_style["bottom_margin"] = int(req.subtitle_bottom_margin)
if req.secondary_title_font_size and req.secondary_title:
if secondary_title_style is None:
secondary_title_style = {}
secondary_title_style["font_size"] = int(req.secondary_title_font_size)
if req.secondary_title_top_margin is not None and req.secondary_title:
if secondary_title_style is None:
secondary_title_style = {}
secondary_title_style["top_margin"] = int(req.secondary_title_top_margin)
if use_remotion:
subtitle_style = prepare_style_for_remotion(
subtitle_style,
temp_dir,
f"{task_id}_subtitle_font"
)
title_style = prepare_style_for_remotion(
title_style,
temp_dir,
f"{task_id}_title_font"
)
secondary_title_style = prepare_style_for_remotion(
secondary_title_style,
temp_dir,
f"{task_id}_secondary_title_font"
)
# 清理字体临时文件
for prefix in [f"{task_id}_subtitle_font", f"{task_id}_title_font", f"{task_id}_secondary_title_font"]:
for ext in [".ttf", ".otf", ".woff", ".woff2"]:
font_tmp = temp_dir / f"{prefix}{ext}"
if font_tmp.exists():
temp_files.append(font_tmp)
final_output_local_path = temp_dir / f"{task_id}_output.mp4"
temp_files.append(final_output_local_path)
needs_audio_compose = str(final_audio_path) != str(audio_path)
if use_remotion:
_update_task(task_id, message="正在合成视频 (Remotion)...", progress=87)
remotion_input_path = lipsync_video_path
if needs_audio_compose:
composed_video_path = temp_dir / f"{task_id}_composed.mp4"
temp_files.append(composed_video_path)
await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
remotion_input_path = composed_video_path
else:
logger.info("[Pipeline] Audio unchanged, skip pre-Remotion compose")
remotion_health = await remotion_service.check_health()
if remotion_health.get("ready"):
try:
def on_remotion_progress(percent):
mapped = 87 + int(percent * 0.08)
_update_task(task_id, progress=mapped)
title_display_mode = (
req.title_display_mode
if req.title_display_mode in ("short", "persistent")
else "short"
)
title_duration = max(0.5, min(float(req.title_duration or 4.0), 30.0))
await remotion_service.render(
video_path=str(remotion_input_path),
output_path=str(final_output_local_path),
captions_path=str(captions_path) if captions_path else None,
title=req.title,
title_duration=title_duration,
title_display_mode=title_display_mode,
fps=25,
enable_subtitles=req.enable_subtitles,
subtitle_style=subtitle_style,
title_style=title_style,
secondary_title=req.secondary_title,
secondary_title_style=secondary_title_style,
on_progress=on_remotion_progress
)
print(f"[Pipeline] Remotion render completed")
except Exception as e:
logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
import shutil
shutil.copy(str(remotion_input_path), str(final_output_local_path))
else:
logger.warning(f"Remotion not ready: {remotion_health.get('error')}, using FFmpeg")
import shutil
shutil.copy(str(remotion_input_path), str(final_output_local_path))
else:
_update_task(task_id, message="正在合成最终视频...", progress=90)
if needs_audio_compose:
await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
else:
import shutil
shutil.copy(str(lipsync_video_path), str(final_output_local_path))
total_time = time.time() - start_time
_update_task(task_id, message="正在上传结果...", progress=95)
storage_path = f"{user_id}/{task_id}_output.mp4"
await storage_service.upload_file_from_path(
bucket=storage_service.BUCKET_OUTPUTS,
storage_path=storage_path,
local_file_path=str(final_output_local_path),
content_type="video/mp4"
)
signed_url = await storage_service.get_signed_url(
bucket=storage_service.BUCKET_OUTPUTS,
path=storage_path
)
print(f"[Pipeline] Total generation time: {total_time:.1f}s")
_update_task(
task_id,
status="completed",
progress=100,
message=f"生成完成!耗时 {total_time:.0f}",
output=storage_path,
download_url=signed_url,
)
except Exception as e:
_update_task(
task_id,
status="failed",
message=f"错误: {str(e)}",
error=traceback.format_exc(),
)
logger.error(f"Generate video failed: {e}")
finally:
for f in temp_files:
try:
if f.exists():
f.unlink()
except Exception as e:
print(f"Error cleaning up {f}: {e}")
async def get_lipsync_health():
lipsync = _get_lipsync_service()
return await lipsync.check_health()
async def get_voiceclone_health():
return await voice_clone_service.check_health()