746 lines
31 KiB
Python
746 lines
31 KiB
Python
from typing import Optional, Any, List
|
||
from pathlib import Path
|
||
import time
|
||
import traceback
|
||
import httpx
|
||
from loguru import logger
|
||
|
||
from app.core.config import settings
|
||
from app.services.tts_service import TTSService
|
||
from app.services.video_service import VideoService
|
||
from app.services.lipsync_service import LipSyncService
|
||
from app.services.voice_clone_service import voice_clone_service
|
||
from app.services.assets_service import (
|
||
get_style,
|
||
get_default_style,
|
||
resolve_bgm_path,
|
||
prepare_style_for_remotion,
|
||
)
|
||
from app.services.storage import storage_service
|
||
from app.services.whisper_service import whisper_service
|
||
from app.services.remotion_service import remotion_service
|
||
|
||
from .schemas import GenerateRequest
|
||
from .task_store import task_store
|
||
|
||
|
||
def _locale_to_whisper_lang(locale: str) -> str:
|
||
"""'en-US' → 'en', 'zh-CN' → 'zh'"""
|
||
return locale.split("-")[0] if "-" in locale else locale
|
||
|
||
|
||
def _locale_to_tts_lang(locale: str) -> str:
|
||
"""'zh-CN' → 'Chinese', 'en-US' → 'English', 其他 → 'Auto'"""
|
||
mapping = {"zh": "Chinese", "en": "English"}
|
||
return mapping.get(locale.split("-")[0], "Auto")
|
||
|
||
|
||
_lipsync_service: Optional[LipSyncService] = None
|
||
_lipsync_ready: Optional[bool] = None
|
||
_lipsync_last_check: float = 0
|
||
|
||
|
||
def _get_lipsync_service() -> LipSyncService:
|
||
"""获取或创建 LipSync 服务实例(单例模式,避免重复初始化)"""
|
||
global _lipsync_service
|
||
if _lipsync_service is None:
|
||
_lipsync_service = LipSyncService()
|
||
return _lipsync_service
|
||
|
||
|
||
async def _check_lipsync_ready(force: bool = False) -> bool:
|
||
"""检查 LipSync 是否就绪(带缓存,5分钟内不重复检查)"""
|
||
global _lipsync_ready, _lipsync_last_check
|
||
|
||
now = time.time()
|
||
if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
|
||
return bool(_lipsync_ready)
|
||
|
||
lipsync = _get_lipsync_service()
|
||
health = await lipsync.check_health()
|
||
_lipsync_ready = health.get("ready", False)
|
||
_lipsync_last_check = now
|
||
print(f"[LipSync] Health check: ready={_lipsync_ready}")
|
||
return bool(_lipsync_ready)
|
||
|
||
|
||
async def _download_material(path_or_url: str, temp_path: Path):
|
||
"""下载素材到临时文件 (流式下载,节省内存)"""
|
||
if path_or_url.startswith("http"):
|
||
timeout = httpx.Timeout(None)
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
async with client.stream("GET", path_or_url) as resp:
|
||
resp.raise_for_status()
|
||
with open(temp_path, "wb") as f:
|
||
async for chunk in resp.aiter_bytes():
|
||
f.write(chunk)
|
||
else:
|
||
src = Path(path_or_url)
|
||
if not src.is_absolute():
|
||
src = settings.BASE_DIR.parent / path_or_url
|
||
|
||
if src.exists():
|
||
import shutil
|
||
shutil.copy(src, temp_path)
|
||
else:
|
||
raise FileNotFoundError(f"Material not found: {path_or_url}")
|
||
|
||
|
||
def _update_task(task_id: str, **updates: Any) -> None:
|
||
task_store.update(task_id, updates)
|
||
|
||
|
||
# ── 多素材辅助函数 ──
|
||
|
||
|
||
def _split_equal(segments: List[dict], material_paths: List[str]) -> List[dict]:
|
||
"""按素材数量均分音频时长,对齐到最近的 Whisper 字边界。
|
||
|
||
Args:
|
||
segments: Whisper 产出的 segment 列表, 每个包含 words (字级时间戳)
|
||
material_paths: 素材路径列表
|
||
|
||
Returns:
|
||
[{"material_path": "...", "start": 0.0, "end": 5.2, "index": 0}, ...]
|
||
"""
|
||
# 展平所有 Whisper 字符
|
||
all_chars: List[dict] = []
|
||
for seg in segments:
|
||
for w in seg.get("words", []):
|
||
all_chars.append(w)
|
||
|
||
n = len(material_paths)
|
||
|
||
if not all_chars or n == 0:
|
||
return [{"material_path": material_paths[0] if material_paths else "",
|
||
"start": 0.0, "end": 99999.0, "index": 0}]
|
||
|
||
# 素材数不能超过字符数,否则边界会重复
|
||
if n > len(all_chars):
|
||
logger.warning(f"[MultiMat] 素材数({n}) > 字符数({len(all_chars)}),裁剪为 {len(all_chars)}")
|
||
n = len(all_chars)
|
||
|
||
total_start = all_chars[0]["start"]
|
||
total_end = all_chars[-1]["end"]
|
||
seg_dur = (total_end - total_start) / n
|
||
|
||
# 计算 N-1 个分割点,对齐到最近的字边界
|
||
boundaries = [0] # 第一段从第 0 个字开始
|
||
for i in range(1, n):
|
||
target_time = total_start + i * seg_dur
|
||
# 找到 start 时间最接近 target_time 的字
|
||
best_idx = boundaries[-1] + 1 # 至少比上一个边界后移 1
|
||
best_diff = float("inf")
|
||
for j in range(boundaries[-1] + 1, len(all_chars)):
|
||
diff = abs(all_chars[j]["start"] - target_time)
|
||
if diff < best_diff:
|
||
best_diff = diff
|
||
best_idx = j
|
||
elif diff > best_diff:
|
||
break # 时间递增,差值开始变大后可以停了
|
||
boundaries.append(min(best_idx, len(all_chars) - 1))
|
||
boundaries.append(len(all_chars)) # 最后一段到末尾
|
||
|
||
# 按边界生成分配结果
|
||
assignments: List[dict] = []
|
||
for i in range(n):
|
||
s_idx = boundaries[i]
|
||
e_idx = boundaries[i + 1]
|
||
if s_idx >= len(all_chars) or s_idx >= e_idx:
|
||
continue
|
||
assignments.append({
|
||
"material_path": material_paths[i],
|
||
"start": all_chars[s_idx]["start"],
|
||
"end": all_chars[e_idx - 1]["end"],
|
||
"text": "".join(c["word"] for c in all_chars[s_idx:e_idx]),
|
||
"index": len(assignments),
|
||
})
|
||
|
||
if not assignments:
|
||
return [{"material_path": material_paths[0], "start": 0.0, "end": 99999.0, "index": 0}]
|
||
|
||
logger.info(f"[MultiMat] 均分 {len(all_chars)} 字为 {len(assignments)} 段")
|
||
for a in assignments:
|
||
dur = a["end"] - a["start"]
|
||
logger.info(f" 段{a['index']}: [{a['start']:.2f}-{a['end']:.2f}s] ({dur:.1f}s) {a['text'][:20]}")
|
||
|
||
return assignments
|
||
|
||
|
||
async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
|
||
temp_files = []
|
||
try:
|
||
start_time = time.time()
|
||
|
||
# ── 确定素材列表 ──
|
||
material_paths: List[str] = []
|
||
if req.custom_assignments and len(req.custom_assignments) > 1:
|
||
material_paths = [a.material_path for a in req.custom_assignments if a.material_path]
|
||
elif req.material_paths and len(req.material_paths) > 1:
|
||
material_paths = req.material_paths
|
||
else:
|
||
material_paths = [req.material_path]
|
||
|
||
is_multi = len(material_paths) > 1
|
||
target_resolution = (1080, 1920) if req.output_aspect_ratio == "9:16" else (1920, 1080)
|
||
|
||
logger.info(
|
||
f"[Render] 输出画面比例: {req.output_aspect_ratio}, "
|
||
f"目标分辨率: {target_resolution[0]}x{target_resolution[1]}"
|
||
)
|
||
|
||
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
|
||
|
||
temp_dir = settings.UPLOAD_DIR / "temp"
|
||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||
video = VideoService()
|
||
input_material_path: Optional[Path] = None
|
||
|
||
# 单素材模式:下载主素材
|
||
if not is_multi:
|
||
input_material_path = temp_dir / f"{task_id}_input.mp4"
|
||
temp_files.append(input_material_path)
|
||
await _download_material(material_paths[0], input_material_path)
|
||
|
||
# 归一化旋转元数据(如 iPhone MOV 1920x1080 + rotation=-90)
|
||
normalized_input_path = temp_dir / f"{task_id}_input_norm.mp4"
|
||
normalized_result = video.normalize_orientation(
|
||
str(input_material_path),
|
||
str(normalized_input_path),
|
||
)
|
||
if normalized_result != str(input_material_path):
|
||
temp_files.append(normalized_input_path)
|
||
input_material_path = normalized_input_path
|
||
|
||
_update_task(task_id, message="正在生成语音...", progress=10)
|
||
|
||
audio_path = temp_dir / f"{task_id}_audio.wav"
|
||
temp_files.append(audio_path)
|
||
|
||
if req.generated_audio_id:
|
||
# 新流程:使用预生成的配音
|
||
_update_task(task_id, message="正在下载配音...", progress=12)
|
||
audio_url = await storage_service.get_signed_url(
|
||
bucket="generated-audios",
|
||
path=req.generated_audio_id,
|
||
)
|
||
await _download_material(audio_url, audio_path)
|
||
|
||
# 从元数据获取 language
|
||
meta_path = req.generated_audio_id.replace("_audio.wav", "_audio.json")
|
||
try:
|
||
meta_url = await storage_service.get_signed_url(
|
||
bucket="generated-audios", path=meta_path,
|
||
)
|
||
import httpx as _httpx
|
||
async with _httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(meta_url)
|
||
if resp.status_code == 200:
|
||
meta = resp.json()
|
||
req.language = meta.get("language", req.language)
|
||
# 无条件用配音元数据覆盖文案,确保字幕与配音语言一致
|
||
meta_text = meta.get("text", "")
|
||
if meta_text:
|
||
req.text = meta_text
|
||
except Exception as e:
|
||
logger.warning(f"读取配音元数据失败: {e}")
|
||
|
||
elif req.tts_mode == "voiceclone":
|
||
if not req.ref_audio_id or not req.ref_text:
|
||
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
|
||
|
||
_update_task(task_id, message="正在下载参考音频...")
|
||
|
||
ref_audio_local = temp_dir / f"{task_id}_ref.wav"
|
||
temp_files.append(ref_audio_local)
|
||
|
||
ref_audio_url = await storage_service.get_signed_url(
|
||
bucket="ref-audios",
|
||
path=req.ref_audio_id
|
||
)
|
||
await _download_material(ref_audio_url, ref_audio_local)
|
||
|
||
_update_task(task_id, message="正在克隆声音...")
|
||
await voice_clone_service.generate_audio(
|
||
text=req.text,
|
||
ref_audio_path=str(ref_audio_local),
|
||
ref_text=req.ref_text,
|
||
output_path=str(audio_path),
|
||
language=_locale_to_tts_lang(req.language)
|
||
)
|
||
else:
|
||
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
|
||
tts = TTSService()
|
||
await tts.generate_audio(req.text, req.voice, str(audio_path))
|
||
|
||
tts_time = time.time() - start_time
|
||
print(f"[Pipeline] TTS completed in {tts_time:.1f}s")
|
||
|
||
lipsync = _get_lipsync_service()
|
||
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
|
||
temp_files.append(lipsync_video_path)
|
||
|
||
captions_path = None
|
||
|
||
if is_multi:
|
||
# ══════════════════════════════════════
|
||
# 多素材流水线
|
||
# ══════════════════════════════════════
|
||
_update_task(task_id, progress=12, message="正在分配素材...")
|
||
|
||
if req.custom_assignments and len(req.custom_assignments) == len(material_paths):
|
||
# 用户自定义分配,跳过 Whisper 均分
|
||
assignments = [
|
||
{
|
||
"material_path": a.material_path,
|
||
"start": a.start,
|
||
"end": a.end,
|
||
"source_start": a.source_start,
|
||
"source_end": a.source_end,
|
||
"index": i,
|
||
}
|
||
for i, a in enumerate(req.custom_assignments)
|
||
]
|
||
# 仍然需要 Whisper 生成字幕(如果启用)
|
||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||
temp_files.append(captions_path)
|
||
if req.enable_subtitles:
|
||
_update_task(task_id, message="正在生成字幕 (Whisper)...")
|
||
try:
|
||
await whisper_service.align(
|
||
audio_path=str(audio_path),
|
||
text=req.text,
|
||
output_path=str(captions_path),
|
||
language=_locale_to_whisper_lang(req.language),
|
||
original_text=req.text,
|
||
)
|
||
print(f"[Pipeline] Whisper alignment completed (custom assignments)")
|
||
except Exception as e:
|
||
logger.warning(f"Whisper alignment failed: {e}")
|
||
captions_path = None
|
||
else:
|
||
captions_path = None
|
||
elif req.custom_assignments:
|
||
logger.warning(
|
||
f"[MultiMat] custom_assignments 数量({len(req.custom_assignments)})"
|
||
f" 与素材数量({len(material_paths)})不一致,回退自动分配"
|
||
)
|
||
|
||
# 原有逻辑:Whisper → _split_equal
|
||
_update_task(task_id, message="正在生成字幕 (Whisper)...")
|
||
|
||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||
temp_files.append(captions_path)
|
||
|
||
try:
|
||
captions_data = await whisper_service.align(
|
||
audio_path=str(audio_path),
|
||
text=req.text,
|
||
output_path=str(captions_path),
|
||
language=_locale_to_whisper_lang(req.language),
|
||
original_text=req.text,
|
||
)
|
||
print(f"[Pipeline] Whisper alignment completed (multi-material)")
|
||
except Exception as e:
|
||
logger.warning(f"Whisper alignment failed: {e}")
|
||
captions_data = None
|
||
captions_path = None
|
||
|
||
_update_task(task_id, progress=15, message="正在分配素材...")
|
||
|
||
if captions_data and captions_data.get("segments"):
|
||
assignments = _split_equal(captions_data["segments"], material_paths)
|
||
else:
|
||
# Whisper 失败 → 按时长均分(不依赖字符对齐)
|
||
logger.warning("[MultiMat] Whisper 无数据,按时长均分")
|
||
audio_dur = video._get_duration(str(audio_path))
|
||
if audio_dur <= 0:
|
||
audio_dur = 30.0 # 安全兜底
|
||
seg_dur = audio_dur / len(material_paths)
|
||
assignments = [
|
||
{"material_path": material_paths[i], "start": i * seg_dur,
|
||
"end": (i + 1) * seg_dur, "index": i}
|
||
for i in range(len(material_paths))
|
||
]
|
||
|
||
else:
|
||
# 原有逻辑:Whisper → _split_equal
|
||
_update_task(task_id, message="正在生成字幕 (Whisper)...")
|
||
|
||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||
temp_files.append(captions_path)
|
||
|
||
try:
|
||
captions_data = await whisper_service.align(
|
||
audio_path=str(audio_path),
|
||
text=req.text,
|
||
output_path=str(captions_path),
|
||
language=_locale_to_whisper_lang(req.language),
|
||
original_text=req.text,
|
||
)
|
||
print(f"[Pipeline] Whisper alignment completed (multi-material)")
|
||
except Exception as e:
|
||
logger.warning(f"Whisper alignment failed: {e}")
|
||
captions_data = None
|
||
captions_path = None
|
||
|
||
_update_task(task_id, progress=15, message="正在分配素材...")
|
||
|
||
if captions_data and captions_data.get("segments"):
|
||
assignments = _split_equal(captions_data["segments"], material_paths)
|
||
else:
|
||
# Whisper 失败 → 按时长均分(不依赖字符对齐)
|
||
logger.warning("[MultiMat] Whisper 无数据,按时长均分")
|
||
audio_dur = video._get_duration(str(audio_path))
|
||
if audio_dur <= 0:
|
||
audio_dur = 30.0 # 安全兜底
|
||
seg_dur = audio_dur / len(material_paths)
|
||
assignments = [
|
||
{"material_path": material_paths[i], "start": i * seg_dur,
|
||
"end": (i + 1) * seg_dur, "index": i}
|
||
for i in range(len(material_paths))
|
||
]
|
||
|
||
# 扩展段覆盖完整音频范围:首段从0开始,末段到音频结尾
|
||
audio_duration = video._get_duration(str(audio_path))
|
||
if assignments and audio_duration > 0:
|
||
assignments[0]["start"] = 0.0
|
||
assignments[-1]["end"] = audio_duration
|
||
|
||
num_segments = len(assignments)
|
||
print(f"[Pipeline] Multi-material: {num_segments} segments, {len(material_paths)} materials")
|
||
|
||
if num_segments == 0:
|
||
raise RuntimeError("Multi-material: no valid segments after splitting")
|
||
|
||
lipsync_start = time.time()
|
||
|
||
# ── 第一步:下载所有素材并检测分辨率 ──
|
||
material_locals: List[Path] = []
|
||
resolutions = []
|
||
|
||
for i, assignment in enumerate(assignments):
|
||
material_local = temp_dir / f"{task_id}_material_{i}.mp4"
|
||
temp_files.append(material_local)
|
||
await _download_material(assignment["material_path"], material_local)
|
||
|
||
# 归一化旋转元数据,确保分辨率判断与后续推理一致
|
||
normalized_material = temp_dir / f"{task_id}_material_{i}_norm.mp4"
|
||
normalized_result = video.normalize_orientation(
|
||
str(material_local),
|
||
str(normalized_material),
|
||
)
|
||
if normalized_result != str(material_local):
|
||
temp_files.append(normalized_material)
|
||
material_local = normalized_material
|
||
|
||
material_locals.append(material_local)
|
||
resolutions.append(video.get_resolution(str(material_local)))
|
||
|
||
# 按用户选择的画面比例统一分辨率
|
||
base_res = target_resolution
|
||
need_scale = any(r != base_res for r in resolutions)
|
||
if need_scale:
|
||
logger.info(f"[MultiMat] 素材分辨率不一致,统一到 {base_res[0]}x{base_res[1]}")
|
||
|
||
# ── 第二步:裁剪每段素材到对应时长 ──
|
||
prepared_segments: List[Path] = []
|
||
|
||
for i, assignment in enumerate(assignments):
|
||
seg_progress = 15 + int((i / num_segments) * 30) # 15% → 45%
|
||
seg_dur = assignment["end"] - assignment["start"]
|
||
_update_task(
|
||
task_id,
|
||
progress=seg_progress,
|
||
message=f"正在准备素材 {i+1}/{num_segments}..."
|
||
)
|
||
|
||
prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
|
||
temp_files.append(prepared_path)
|
||
video.prepare_segment(
|
||
str(material_locals[i]), seg_dur, str(prepared_path),
|
||
# 多素材拼接前统一重编码为同分辨率/同编码,避免 concat 仅保留首段
|
||
target_resolution=base_res,
|
||
source_start=assignment.get("source_start", 0.0),
|
||
source_end=assignment.get("source_end"),
|
||
target_fps=25,
|
||
)
|
||
prepared_segments.append(prepared_path)
|
||
|
||
# ── 第二步:拼接所有素材片段 ──
|
||
_update_task(task_id, progress=50, message="正在拼接素材片段...")
|
||
concat_path = temp_dir / f"{task_id}_concat.mp4"
|
||
temp_files.append(concat_path)
|
||
video.concat_videos(
|
||
[str(p) for p in prepared_segments],
|
||
str(concat_path),
|
||
target_fps=25,
|
||
)
|
||
|
||
# ── 第三步:一次 LatentSync 推理 ──
|
||
is_ready = await _check_lipsync_ready()
|
||
|
||
if is_ready:
|
||
_update_task(task_id, progress=55, message="正在合成唇形 (LatentSync)...")
|
||
print(f"[LipSync] Multi-material: single LatentSync on concatenated video")
|
||
try:
|
||
await lipsync.generate(str(concat_path), str(audio_path), str(lipsync_video_path))
|
||
except Exception as e:
|
||
logger.warning(f"[LipSync] Failed, fallback to concat without lipsync: {e}")
|
||
import shutil
|
||
shutil.copy(str(concat_path), str(lipsync_video_path))
|
||
else:
|
||
print(f"[LipSync] Not ready, using concatenated video without lipsync")
|
||
import shutil
|
||
shutil.copy(str(concat_path), str(lipsync_video_path))
|
||
|
||
lipsync_time = time.time() - lipsync_start
|
||
print(f"[Pipeline] Multi-material prepare + concat + LipSync completed in {lipsync_time:.1f}s")
|
||
_update_task(task_id, progress=80)
|
||
|
||
# 如果用户关闭了字幕,清除 captions_path(Whisper 仅用于句子切分)
|
||
if not req.enable_subtitles:
|
||
captions_path = None
|
||
|
||
else:
|
||
# ══════════════════════════════════════
|
||
# 单素材流水线(原有逻辑)
|
||
# ══════════════════════════════════════
|
||
|
||
if input_material_path is None:
|
||
raise RuntimeError("单素材流程缺少输入素材")
|
||
|
||
# 单素材:按用户选择画面比例统一到目标分辨率,并应用 source_start
|
||
single_source_start = 0.0
|
||
single_source_end = None
|
||
if req.custom_assignments and len(req.custom_assignments) == 1:
|
||
single_source_start = req.custom_assignments[0].source_start
|
||
single_source_end = req.custom_assignments[0].source_end
|
||
|
||
_update_task(task_id, progress=20, message="正在准备素材片段...")
|
||
audio_dur = video._get_duration(str(audio_path))
|
||
if audio_dur <= 0:
|
||
audio_dur = 30.0
|
||
prepared_single_path = temp_dir / f"{task_id}_prepared_single.mp4"
|
||
temp_files.append(prepared_single_path)
|
||
video.prepare_segment(
|
||
str(input_material_path),
|
||
audio_dur,
|
||
str(prepared_single_path),
|
||
target_resolution=target_resolution,
|
||
source_start=single_source_start,
|
||
source_end=single_source_end,
|
||
)
|
||
input_material_path = prepared_single_path
|
||
|
||
_update_task(task_id, progress=25)
|
||
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
|
||
|
||
lipsync_start = time.time()
|
||
is_ready = await _check_lipsync_ready()
|
||
|
||
if is_ready:
|
||
print(f"[LipSync] Starting LatentSync inference...")
|
||
_update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
|
||
await lipsync.generate(str(input_material_path), str(audio_path), str(lipsync_video_path))
|
||
else:
|
||
print(f"[LipSync] LatentSync not ready, copying original video")
|
||
_update_task(task_id, message="唇形同步不可用,使用原始视频...")
|
||
import shutil
|
||
shutil.copy(str(input_material_path), lipsync_video_path)
|
||
|
||
lipsync_time = time.time() - lipsync_start
|
||
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
|
||
_update_task(task_id, progress=80)
|
||
|
||
# 单素材模式:Whisper 在 LatentSync 之后
|
||
if req.enable_subtitles:
|
||
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
|
||
|
||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||
temp_files.append(captions_path)
|
||
|
||
try:
|
||
await whisper_service.align(
|
||
audio_path=str(audio_path),
|
||
text=req.text,
|
||
output_path=str(captions_path),
|
||
language=_locale_to_whisper_lang(req.language),
|
||
original_text=req.text,
|
||
)
|
||
print(f"[Pipeline] Whisper alignment completed")
|
||
except Exception as e:
|
||
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
|
||
captions_path = None
|
||
|
||
_update_task(task_id, progress=85)
|
||
|
||
final_audio_path = audio_path
|
||
if req.bgm_id:
|
||
_update_task(task_id, message="正在合成背景音乐...", progress=86)
|
||
|
||
bgm_path = resolve_bgm_path(req.bgm_id)
|
||
if bgm_path:
|
||
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
|
||
temp_files.append(mix_output_path)
|
||
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
|
||
volume = max(0.0, min(float(volume), 1.0))
|
||
try:
|
||
video.mix_audio(
|
||
voice_path=str(audio_path),
|
||
bgm_path=str(bgm_path),
|
||
output_path=str(mix_output_path),
|
||
bgm_volume=volume
|
||
)
|
||
final_audio_path = mix_output_path
|
||
except Exception as e:
|
||
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
|
||
else:
|
||
logger.warning(f"BGM not found: {req.bgm_id}")
|
||
|
||
use_remotion = (captions_path and captions_path.exists()) or req.title
|
||
|
||
subtitle_style = None
|
||
title_style = None
|
||
if req.enable_subtitles:
|
||
subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
|
||
if req.title:
|
||
title_style = get_style("title", req.title_style_id) or get_default_style("title")
|
||
|
||
if req.subtitle_font_size and req.enable_subtitles:
|
||
if subtitle_style is None:
|
||
subtitle_style = {}
|
||
subtitle_style["font_size"] = int(req.subtitle_font_size)
|
||
|
||
if req.title_font_size and req.title:
|
||
if title_style is None:
|
||
title_style = {}
|
||
title_style["font_size"] = int(req.title_font_size)
|
||
|
||
if req.title_top_margin is not None and req.title:
|
||
if title_style is None:
|
||
title_style = {}
|
||
title_style["top_margin"] = int(req.title_top_margin)
|
||
|
||
if req.subtitle_bottom_margin is not None and req.enable_subtitles:
|
||
if subtitle_style is None:
|
||
subtitle_style = {}
|
||
subtitle_style["bottom_margin"] = int(req.subtitle_bottom_margin)
|
||
|
||
if use_remotion:
|
||
subtitle_style = prepare_style_for_remotion(
|
||
subtitle_style,
|
||
temp_dir,
|
||
f"{task_id}_subtitle_font"
|
||
)
|
||
title_style = prepare_style_for_remotion(
|
||
title_style,
|
||
temp_dir,
|
||
f"{task_id}_title_font"
|
||
)
|
||
|
||
final_output_local_path = temp_dir / f"{task_id}_output.mp4"
|
||
temp_files.append(final_output_local_path)
|
||
|
||
if use_remotion:
|
||
_update_task(task_id, message="正在合成视频 (Remotion)...", progress=87)
|
||
|
||
composed_video_path = temp_dir / f"{task_id}_composed.mp4"
|
||
temp_files.append(composed_video_path)
|
||
|
||
await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
|
||
|
||
remotion_health = await remotion_service.check_health()
|
||
if remotion_health.get("ready"):
|
||
try:
|
||
def on_remotion_progress(percent):
|
||
mapped = 87 + int(percent * 0.08)
|
||
_update_task(task_id, progress=mapped)
|
||
|
||
title_display_mode = (
|
||
req.title_display_mode
|
||
if req.title_display_mode in ("short", "persistent")
|
||
else "short"
|
||
)
|
||
title_duration = max(0.5, min(float(req.title_duration or 4.0), 30.0))
|
||
|
||
await remotion_service.render(
|
||
video_path=str(composed_video_path),
|
||
output_path=str(final_output_local_path),
|
||
captions_path=str(captions_path) if captions_path else None,
|
||
title=req.title,
|
||
title_duration=title_duration,
|
||
title_display_mode=title_display_mode,
|
||
fps=25,
|
||
enable_subtitles=req.enable_subtitles,
|
||
subtitle_style=subtitle_style,
|
||
title_style=title_style,
|
||
on_progress=on_remotion_progress
|
||
)
|
||
print(f"[Pipeline] Remotion render completed")
|
||
except Exception as e:
|
||
logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
|
||
import shutil
|
||
shutil.copy(str(composed_video_path), final_output_local_path)
|
||
else:
|
||
logger.warning(f"Remotion not ready: {remotion_health.get('error')}, using FFmpeg")
|
||
import shutil
|
||
shutil.copy(str(composed_video_path), final_output_local_path)
|
||
else:
|
||
_update_task(task_id, message="正在合成最终视频...", progress=90)
|
||
|
||
await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
|
||
|
||
total_time = time.time() - start_time
|
||
|
||
_update_task(task_id, message="正在上传结果...", progress=95)
|
||
|
||
storage_path = f"{user_id}/{task_id}_output.mp4"
|
||
await storage_service.upload_file_from_path(
|
||
bucket=storage_service.BUCKET_OUTPUTS,
|
||
storage_path=storage_path,
|
||
local_file_path=str(final_output_local_path),
|
||
content_type="video/mp4"
|
||
)
|
||
|
||
signed_url = await storage_service.get_signed_url(
|
||
bucket=storage_service.BUCKET_OUTPUTS,
|
||
path=storage_path
|
||
)
|
||
|
||
print(f"[Pipeline] Total generation time: {total_time:.1f}s")
|
||
|
||
_update_task(
|
||
task_id,
|
||
status="completed",
|
||
progress=100,
|
||
message=f"生成完成!耗时 {total_time:.0f} 秒",
|
||
output=storage_path,
|
||
download_url=signed_url,
|
||
)
|
||
|
||
except Exception as e:
|
||
_update_task(
|
||
task_id,
|
||
status="failed",
|
||
message=f"错误: {str(e)}",
|
||
error=traceback.format_exc(),
|
||
)
|
||
logger.error(f"Generate video failed: {e}")
|
||
finally:
|
||
for f in temp_files:
|
||
try:
|
||
if f.exists():
|
||
f.unlink()
|
||
except Exception as e:
|
||
print(f"Error cleaning up {f}: {e}")
|
||
|
||
|
||
async def get_lipsync_health():
|
||
lipsync = _get_lipsync_service()
|
||
return await lipsync.check_health()
|
||
|
||
|
||
async def get_voiceclone_health():
|
||
return await voice_clone_service.check_health()
|