ViGent2/backend/app/modules/videos/workflow.py

from typing import Optional, Any, List
from pathlib import Path
import asyncio
import time
import traceback
import httpx
from loguru import logger

from app.core.config import settings
from app.services.tts_service import TTSService
from app.services.video_service import VideoService
from app.services.lipsync_service import LipSyncService
from app.services.voice_clone_service import voice_clone_service
from app.services.assets_service import (
    get_style,
    get_default_style,
    resolve_bgm_path,
    prepare_style_for_remotion,
)
from app.services.storage import storage_service
from app.services.whisper_service import whisper_service
from app.services.remotion_service import remotion_service

from .schemas import GenerateRequest
from .task_store import task_store

# 全局并发限制：最多同时运行 2 个视频生成任务
_generation_semaphore = asyncio.Semaphore(2)


def _locale_to_whisper_lang(locale: str) -> str:
    """'en-US' → 'en', 'zh-CN' → 'zh'"""
    return locale.split("-")[0] if "-" in locale else locale


def _locale_to_tts_lang(locale: str) -> str:
    """'zh-CN' → 'Chinese', 'en-US' → 'English', 其他 → 'Auto'"""
    mapping = {"zh": "Chinese", "en": "English"}
    return mapping.get(locale.split("-")[0], "Auto")


_lipsync_service: Optional[LipSyncService] = None
_lipsync_ready: Optional[bool] = None
_lipsync_last_check: float = 0


def _get_lipsync_service() -> LipSyncService:
    """获取或创建 LipSync 服务实例（单例模式，避免重复初始化）"""
    global _lipsync_service
    if _lipsync_service is None:
        _lipsync_service = LipSyncService()
    return _lipsync_service


async def _check_lipsync_ready(force: bool = False) -> bool:
    """检查 LipSync 是否就绪（带缓存，5分钟内不重复检查）"""
    global _lipsync_ready, _lipsync_last_check

    now = time.time()
    if not force and _lipsync_ready is not None and (now - _lipsync_last_check) < 300:
        return bool(_lipsync_ready)

    lipsync = _get_lipsync_service()
    health = await lipsync.check_health()
    _lipsync_ready = health.get("ready", False)
    _lipsync_last_check = now
    print(f"[LipSync] Health check: ready={_lipsync_ready}")
    return bool(_lipsync_ready)


async def _download_material(path_or_url: str, temp_path: Path):
    """下载素材到临时文件 (流式下载，节省内存)"""
    if path_or_url.startswith("http"):
        timeout = httpx.Timeout(None)
        async with httpx.AsyncClient(timeout=timeout) as client:
            async with client.stream("GET", path_or_url) as resp:
                resp.raise_for_status()
                with open(temp_path, "wb") as f:
                    async for chunk in resp.aiter_bytes():
                        f.write(chunk)
    else:
        src = Path(path_or_url)
        if not src.is_absolute():
            src = settings.BASE_DIR.parent / path_or_url

        if src.exists():
            import shutil
            shutil.copy(src, temp_path)
        else:
            raise FileNotFoundError(f"Material not found: {path_or_url}")


def _update_task(task_id: str, **updates: Any) -> None:
    task_store.update(task_id, updates)


async def _run_blocking(func, *args):
    """在线程池执行阻塞函数，避免卡住事件循环。"""
    loop = asyncio.get_running_loop()
    return await loop.run_in_executor(None, func, *args)


# ── 多素材辅助函数 ──


def _split_equal(segments: List[dict], material_paths: List[str]) -> List[dict]:
    """按素材数量均分音频时长，对齐到最近的 Whisper 字边界。

    Args:
        segments: Whisper 产出的 segment 列表, 每个包含 words (字级时间戳)
        material_paths: 素材路径列表

    Returns:
        [{"material_path": "...", "start": 0.0, "end": 5.2, "index": 0}, ...]
    """
    # 展平所有 Whisper 字符
    all_chars: List[dict] = []
    for seg in segments:
        for w in seg.get("words", []):
            all_chars.append(w)

    n = len(material_paths)

    if not all_chars or n == 0:
        return [{"material_path": material_paths[0] if material_paths else "",
                 "start": 0.0, "end": 99999.0, "index": 0}]

    # 素材数不能超过字符数，否则边界会重复
    if n > len(all_chars):
        logger.warning(f"[MultiMat] 素材数({n}) > 字符数({len(all_chars)})，裁剪为 {len(all_chars)}")
        n = len(all_chars)

    total_start = all_chars[0]["start"]
    total_end = all_chars[-1]["end"]
    seg_dur = (total_end - total_start) / n

    # 计算 N-1 个分割点，对齐到最近的字边界
    boundaries = [0]  # 第一段从第 0 个字开始
    for i in range(1, n):
        target_time = total_start + i * seg_dur
        # 找到 start 时间最接近 target_time 的字
        best_idx = boundaries[-1] + 1  # 至少比上一个边界后移 1
        best_diff = float("inf")
        for j in range(boundaries[-1] + 1, len(all_chars)):
            diff = abs(all_chars[j]["start"] - target_time)
            if diff < best_diff:
                best_diff = diff
                best_idx = j
            elif diff > best_diff:
                break  # 时间递增，差值开始变大后可以停了
        boundaries.append(min(best_idx, len(all_chars) - 1))
    boundaries.append(len(all_chars))  # 最后一段到末尾

    # 按边界生成分配结果
    assignments: List[dict] = []
    for i in range(n):
        s_idx = boundaries[i]
        e_idx = boundaries[i + 1]
        if s_idx >= len(all_chars) or s_idx >= e_idx:
            continue
        assignments.append({
            "material_path": material_paths[i],
            "start": all_chars[s_idx]["start"],
            "end": all_chars[e_idx - 1]["end"],
            "text": "".join(c["word"] for c in all_chars[s_idx:e_idx]),
            "index": len(assignments),
        })

    if not assignments:
        return [{"material_path": material_paths[0], "start": 0.0, "end": 99999.0, "index": 0}]

    logger.info(f"[MultiMat] 均分 {len(all_chars)} 字为 {len(assignments)} 段")
    for a in assignments:
        dur = a["end"] - a["start"]
        logger.info(f"  段{a['index']}: [{a['start']:.2f}-{a['end']:.2f}s] ({dur:.1f}s) {a['text'][:20]}")

    return assignments


async def process_video_generation(task_id: str, req: GenerateRequest, user_id: str):
    _update_task(task_id, message="排队中...")
    async with _generation_semaphore:
        await _process_video_generation_inner(task_id, req, user_id)


async def _process_video_generation_inner(task_id: str, req: GenerateRequest, user_id: str):
    temp_files = []
    try:
        start_time = time.time()

        # ── 确定素材列表 ──
        material_paths: List[str] = []
        if req.custom_assignments and len(req.custom_assignments) > 1:
            material_paths = [a.material_path for a in req.custom_assignments if a.material_path]
        elif req.material_paths and len(req.material_paths) > 1:
            material_paths = req.material_paths
        else:
            material_paths = [req.material_path]

        is_multi = len(material_paths) > 1
        target_resolution = (1080, 1920) if req.output_aspect_ratio == "9:16" else (1920, 1080)

        logger.info(
            f"[Render] 输出画面比例: {req.output_aspect_ratio}, "
            f"目标分辨率: {target_resolution[0]}x{target_resolution[1]}"
        )

        _update_task(task_id, status="processing", progress=5, message="正在下载素材...")

        temp_dir = settings.UPLOAD_DIR / "temp"
        temp_dir.mkdir(parents=True, exist_ok=True)
        video = VideoService()
        input_material_path: Optional[Path] = None

        # 单素材模式：下载主素材
        if not is_multi:
            input_material_path = temp_dir / f"{task_id}_input.mp4"
            temp_files.append(input_material_path)
            await _download_material(material_paths[0], input_material_path)

            # 归一化旋转元数据（如 iPhone MOV 1920x1080 + rotation=-90）
            normalized_input_path = temp_dir / f"{task_id}_input_norm.mp4"
            normalized_result = await _run_blocking(
                video.normalize_orientation,
                str(input_material_path),
                str(normalized_input_path),
            )
            if normalized_result != str(input_material_path):
                temp_files.append(normalized_input_path)
                input_material_path = normalized_input_path

        _update_task(task_id, message="正在生成语音...", progress=10)

        audio_path = temp_dir / f"{task_id}_audio.wav"
        temp_files.append(audio_path)

        if req.generated_audio_id:
            # 新流程：使用预生成的配音
            _update_task(task_id, message="正在下载配音...", progress=12)
            audio_url = await storage_service.get_signed_url(
                bucket="generated-audios",
                path=req.generated_audio_id,
            )
            await _download_material(audio_url, audio_path)

            # 从元数据获取 language
            meta_path = req.generated_audio_id.replace("_audio.wav", "_audio.json")
            try:
                meta_url = await storage_service.get_signed_url(
                    bucket="generated-audios", path=meta_path,
                )
                import httpx as _httpx
                async with _httpx.AsyncClient(timeout=5.0) as client:
                    resp = await client.get(meta_url)
                    if resp.status_code == 200:
                        meta = resp.json()
                        req.language = meta.get("language", req.language)
                        # 无条件用配音元数据覆盖文案，确保字幕与配音语言一致
                        meta_text = meta.get("text", "")
                        if meta_text:
                            req.text = meta_text
            except Exception as e:
                logger.warning(f"读取配音元数据失败: {e}")

        elif req.tts_mode == "voiceclone":
            if not req.ref_audio_id or not req.ref_text:
                raise ValueError("声音克隆模式需要提供参考音频和参考文字")

            _update_task(task_id, message="正在下载参考音频...")

            ref_audio_local = temp_dir / f"{task_id}_ref.wav"
            temp_files.append(ref_audio_local)

            ref_audio_url = await storage_service.get_signed_url(
                bucket="ref-audios",
                path=req.ref_audio_id
            )
            await _download_material(ref_audio_url, ref_audio_local)

            _update_task(task_id, message="正在克隆声音...")
            await voice_clone_service.generate_audio(
                text=req.text,
                ref_audio_path=str(ref_audio_local),
                ref_text=req.ref_text,
                output_path=str(audio_path),
                language=_locale_to_tts_lang(req.language)
            )
        else:
            _update_task(task_id, message="正在生成语音 (EdgeTTS)...")
            tts = TTSService()
            await tts.generate_audio(req.text, req.voice, str(audio_path))

        tts_time = time.time() - start_time
        print(f"[Pipeline] TTS completed in {tts_time:.1f}s")

        lipsync = _get_lipsync_service()
        lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
        temp_files.append(lipsync_video_path)

        captions_path = None

        async def _whisper_and_split():
            """Whisper 对齐 → _split_equal 均分素材（公共逻辑）"""
            _update_task(task_id, message="正在生成字幕 (Whisper)...")
            _captions_path = temp_dir / f"{task_id}_captions.json"
            temp_files.append(_captions_path)
            captions_data = None
            try:
                captions_data = await whisper_service.align(
                    audio_path=str(audio_path),
                    text=req.text,
                    output_path=str(_captions_path),
                    language=_locale_to_whisper_lang(req.language),
                    original_text=req.text,
                )
                print(f"[Pipeline] Whisper alignment completed (multi-material)")
            except Exception as e:
                logger.warning(f"Whisper alignment failed: {e}")
                _captions_path = None

            _update_task(task_id, progress=15, message="正在分配素材...")

            if captions_data and captions_data.get("segments"):
                result = _split_equal(captions_data["segments"], material_paths)
            else:
                logger.warning("[MultiMat] Whisper 无数据，按时长均分")
                audio_dur = await _run_blocking(video._get_duration, str(audio_path))
                if audio_dur <= 0:
                    audio_dur = 30.0
                seg_dur = audio_dur / len(material_paths)
                result = [
                    {"material_path": material_paths[i], "start": i * seg_dur,
                     "end": (i + 1) * seg_dur, "index": i}
                    for i in range(len(material_paths))
                ]
            return result, _captions_path

        if is_multi:
            # ══════════════════════════════════════
            # 多素材流水线
            # ══════════════════════════════════════
            _update_task(task_id, progress=12, message="正在分配素材...")

            if req.custom_assignments and len(req.custom_assignments) == len(material_paths):
                # 用户自定义分配，跳过 Whisper 均分
                assignments = [
                    {
                        "material_path": a.material_path,
                        "start": a.start,
                        "end": a.end,
                        "source_start": a.source_start,
                        "source_end": a.source_end,
                        "index": i,
                    }
                    for i, a in enumerate(req.custom_assignments)
                ]
                # 仍然需要 Whisper 生成字幕（如果启用）
                captions_path = temp_dir / f"{task_id}_captions.json"
                temp_files.append(captions_path)
                if req.enable_subtitles:
                    _update_task(task_id, message="正在生成字幕 (Whisper)...")
                    try:
                        await whisper_service.align(
                            audio_path=str(audio_path),
                            text=req.text,
                            output_path=str(captions_path),
                            language=_locale_to_whisper_lang(req.language),
                            original_text=req.text,
                        )
                        print(f"[Pipeline] Whisper alignment completed (custom assignments)")
                    except Exception as e:
                        logger.warning(f"Whisper alignment failed: {e}")
                        captions_path = None
                else:
                    captions_path = None
            elif req.custom_assignments:
                logger.warning(
                    f"[MultiMat] custom_assignments 数量({len(req.custom_assignments)})"
                    f" 与素材数量({len(material_paths)})不一致，回退自动分配"
                )

                assignments, captions_path = await _whisper_and_split()

            else:
                assignments, captions_path = await _whisper_and_split()

            # 扩展段覆盖完整音频范围：首段从0开始，末段到音频结尾
            audio_duration = await _run_blocking(video._get_duration, str(audio_path))
            if assignments and audio_duration > 0:
                assignments[0]["start"] = 0.0
                assignments[-1]["end"] = audio_duration

            num_segments = len(assignments)
            print(f"[Pipeline] Multi-material: {num_segments} segments, {len(material_paths)} materials")

            if num_segments == 0:
                raise RuntimeError("Multi-material: no valid segments after splitting")

            lipsync_start = time.time()

            # ── 第一步：并行下载所有素材并检测分辨率 ──
            material_locals: List[Path] = []
            resolutions = []

            async def _download_and_normalize(i: int, assignment: dict):
                """下载单个素材并归一化方向"""
                material_local = temp_dir / f"{task_id}_material_{i}.mp4"
                temp_files.append(material_local)
                await _download_material(assignment["material_path"], material_local)

                normalized_material = temp_dir / f"{task_id}_material_{i}_norm.mp4"
                normalized_result = await _run_blocking(
                    video.normalize_orientation,
                    str(material_local),
                    str(normalized_material),
                )
                if normalized_result != str(material_local):
                    temp_files.append(normalized_material)
                    material_local = normalized_material

                res = video.get_resolution(str(material_local))
                return material_local, res

            download_tasks = [
                _download_and_normalize(i, assignment)
                for i, assignment in enumerate(assignments)
            ]
            download_results = await asyncio.gather(*download_tasks)
            for local, res in download_results:
                material_locals.append(local)
                resolutions.append(res)

            # 按用户选择的画面比例统一分辨率
            base_res = target_resolution
            need_scale = any(r != base_res for r in resolutions)
            if need_scale:
                logger.info(f"[MultiMat] 素材分辨率不一致，统一到 {base_res[0]}x{base_res[1]}")

            # ── 第二步：并行裁剪每段素材到对应时长 ──
            prepared_segments: List[Optional[Path]] = [None] * num_segments

            async def _prepare_one_segment(i: int, assignment: dict):
                """将单个素材裁剪/循环到对应时长"""
                seg_dur = assignment["end"] - assignment["start"]
                prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
                temp_files.append(prepared_path)
                prepare_target_res = None if resolutions[i] == base_res else base_res

                await _run_blocking(
                    video.prepare_segment,
                    str(material_locals[i]),
                    seg_dur,
                    str(prepared_path),
                    prepare_target_res,
                    assignment.get("source_start", 0.0),
                    assignment.get("source_end"),
                    25,
                )
                return i, prepared_path

            _update_task(
                task_id,
                progress=15,
                message=f"正在并行准备 {num_segments} 个素材片段..."
            )

            prepare_tasks = [
                _prepare_one_segment(i, assignment)
                for i, assignment in enumerate(assignments)
            ]
            prepare_results = await asyncio.gather(*prepare_tasks)
            for i, path in prepare_results:
                prepared_segments[i] = path

            # ── 第二步：拼接所有素材片段 ──
            _update_task(task_id, progress=50, message="正在拼接素材片段...")
            concat_path = temp_dir / f"{task_id}_concat.mp4"
            temp_files.append(concat_path)
            prepared_segment_paths = [str(p) for p in prepared_segments if p is not None]
            if len(prepared_segment_paths) != num_segments:
                raise RuntimeError("Multi-material: prepared segments mismatch")
            await _run_blocking(
                video.concat_videos,
                prepared_segment_paths,
                str(concat_path),
                25,
            )

            # ── 第三步：一次 LatentSync 推理 ──
            is_ready = await _check_lipsync_ready()

            if is_ready:
                _update_task(task_id, progress=55, message="正在合成唇形 (LatentSync)...")
                print(f"[LipSync] Multi-material: single LatentSync on concatenated video")
                try:
                    await lipsync.generate(
                        str(concat_path),
                        str(audio_path),
                        str(lipsync_video_path),
                        model_mode=req.lipsync_model,
                    )
                except Exception as e:
                    logger.warning(f"[LipSync] Failed, fallback to concat without lipsync: {e}")
                    import shutil
                    shutil.copy(str(concat_path), str(lipsync_video_path))
            else:
                print(f"[LipSync] Not ready, using concatenated video without lipsync")
                import shutil
                shutil.copy(str(concat_path), str(lipsync_video_path))

            lipsync_time = time.time() - lipsync_start
            print(f"[Pipeline] Multi-material prepare + concat + LipSync completed in {lipsync_time:.1f}s")
            _update_task(task_id, progress=80)

            # 如果用户关闭了字幕，清除 captions_path（Whisper 仅用于句子切分）
            if not req.enable_subtitles:
                captions_path = None

        else:
            # ══════════════════════════════════════
            # 单素材流水线（原有逻辑）
            # ══════════════════════════════════════

            if input_material_path is None:
                raise RuntimeError("单素材流程缺少输入素材")

            # 单素材：按用户选择画面比例统一到目标分辨率，并应用 source_start
            single_source_start = 0.0
            single_source_end = None
            if req.custom_assignments and len(req.custom_assignments) == 1:
                single_source_start = req.custom_assignments[0].source_start
                single_source_end = req.custom_assignments[0].source_end

            _update_task(task_id, progress=20, message="正在准备素材片段...")
            audio_dur = await _run_blocking(video._get_duration, str(audio_path))
            if audio_dur <= 0:
                audio_dur = 30.0
            single_res = await _run_blocking(video.get_resolution, str(input_material_path))
            single_target_res = None if single_res == target_resolution else target_resolution
            prepared_single_path = temp_dir / f"{task_id}_prepared_single.mp4"
            temp_files.append(prepared_single_path)
            await _run_blocking(
                video.prepare_segment,
                str(input_material_path),
                audio_dur,
                str(prepared_single_path),
                single_target_res,
                single_source_start,
                single_source_end,
                None,
            )
            input_material_path = prepared_single_path

            _update_task(task_id, progress=25)
            _update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)

            lipsync_start = time.time()
            is_ready = await _check_lipsync_ready()

            if is_ready:
                print(f"[LipSync] Starting LatentSync inference...")
                _update_task(task_id, progress=35, message="正在运行 LatentSync 推理...")
                try:
                    await lipsync.generate(
                        str(input_material_path),
                        str(audio_path),
                        str(lipsync_video_path),
                        model_mode=req.lipsync_model,
                    )
                except Exception as e:
                    logger.warning(f"[LipSync] Failed on single-material, fallback to prepared video: {e}")
                    _update_task(task_id, message="唇形同步失败，使用原始视频...")
                    import shutil
                    shutil.copy(str(input_material_path), str(lipsync_video_path))
            else:
                print(f"[LipSync] LatentSync not ready, copying original video")
                _update_task(task_id, message="唇形同步不可用，使用原始视频...")
                import shutil
                shutil.copy(str(input_material_path), lipsync_video_path)

            lipsync_time = time.time() - lipsync_start
            print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
            _update_task(task_id, progress=80)

            # 单素材模式：Whisper 延迟到下方与 BGM 并行执行
            if not req.enable_subtitles:
                captions_path = None

        _update_task(task_id, progress=85)

        # ── Whisper 字幕 + BGM 混音 并行（两者都只依赖 audio_path）──
        final_audio_path = audio_path
        _whisper_task = None
        _bgm_task = None
        mix_output_path: Optional[Path] = None

        # 单素材模式下 Whisper 尚未执行，这里与 BGM 并行启动
        need_whisper = not is_multi and req.enable_subtitles and captions_path is None
        if need_whisper:
            captions_path = temp_dir / f"{task_id}_captions.json"
            temp_files.append(captions_path)
            _captions_path_str = str(captions_path)

            async def _run_whisper():
                _update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
                try:
                    await whisper_service.align(
                        audio_path=str(audio_path),
                        text=req.text,
                        output_path=_captions_path_str,
                        language=_locale_to_whisper_lang(req.language),
                        original_text=req.text,
                    )
                    print(f"[Pipeline] Whisper alignment completed")
                    return True
                except Exception as e:
                    logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
                    return False

            _whisper_task = _run_whisper()

        if req.bgm_id:
            bgm_path = resolve_bgm_path(req.bgm_id)
            if bgm_path:
                mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
                temp_files.append(mix_output_path)
                volume = req.bgm_volume if req.bgm_volume is not None else 0.2
                volume = max(0.0, min(float(volume), 1.0))
                _mix_output = str(mix_output_path)
                _bgm_path = str(bgm_path)
                _voice_path = str(audio_path)
                _volume = volume

                async def _run_bgm():
                    _update_task(task_id, message="正在合成背景音乐...", progress=86)
                    try:
                        await _run_blocking(
                            video.mix_audio,
                            _voice_path,
                            _bgm_path,
                            _mix_output,
                            _volume,
                        )
                        return True
                    except Exception as e:
                        logger.warning(f"BGM mix failed, fallback to voice only: {e}")
                        return False

                _bgm_task = _run_bgm()
            else:
                logger.warning(f"BGM not found: {req.bgm_id}")

        # 并行等待 Whisper + BGM
        parallel_tasks = [t for t in (_whisper_task, _bgm_task) if t is not None]
        if parallel_tasks:
            results = await asyncio.gather(*parallel_tasks)
            result_idx = 0
            if _whisper_task is not None:
                if not results[result_idx]:
                    captions_path = None
                result_idx += 1
            if _bgm_task is not None:
                if results[result_idx] and mix_output_path is not None:
                    final_audio_path = mix_output_path


        use_remotion = (captions_path and captions_path.exists()) or req.title or req.secondary_title

        subtitle_style = None
        title_style = None
        secondary_title_style = None
        if req.enable_subtitles:
            subtitle_style = get_style("subtitle", req.subtitle_style_id) or get_default_style("subtitle")
        if req.title:
            title_style = get_style("title", req.title_style_id) or get_default_style("title")
        if req.secondary_title:
            secondary_title_style = get_style("title", req.secondary_title_style_id) or get_default_style("title")

        if req.subtitle_font_size and req.enable_subtitles:
            if subtitle_style is None:
                subtitle_style = {}
            subtitle_style["font_size"] = int(req.subtitle_font_size)

        if req.title_font_size and req.title:
            if title_style is None:
                title_style = {}
            title_style["font_size"] = int(req.title_font_size)

        if req.title_top_margin is not None and req.title:
            if title_style is None:
                title_style = {}
            title_style["top_margin"] = int(req.title_top_margin)

        if req.subtitle_bottom_margin is not None and req.enable_subtitles:
            if subtitle_style is None:
                subtitle_style = {}
            subtitle_style["bottom_margin"] = int(req.subtitle_bottom_margin)

        if req.secondary_title_font_size and req.secondary_title:
            if secondary_title_style is None:
                secondary_title_style = {}
            secondary_title_style["font_size"] = int(req.secondary_title_font_size)

        if req.secondary_title_top_margin is not None and req.secondary_title:
            if secondary_title_style is None:
                secondary_title_style = {}
            secondary_title_style["top_margin"] = int(req.secondary_title_top_margin)

        if use_remotion:
            subtitle_style = prepare_style_for_remotion(
                subtitle_style,
                temp_dir,
                f"{task_id}_subtitle_font"
            )
            title_style = prepare_style_for_remotion(
                title_style,
                temp_dir,
                f"{task_id}_title_font"
            )
            secondary_title_style = prepare_style_for_remotion(
                secondary_title_style,
                temp_dir,
                f"{task_id}_secondary_title_font"
            )

            # 清理字体临时文件
            for prefix in [f"{task_id}_subtitle_font", f"{task_id}_title_font", f"{task_id}_secondary_title_font"]:
                for ext in [".ttf", ".otf", ".woff", ".woff2"]:
                    font_tmp = temp_dir / f"{prefix}{ext}"
                    if font_tmp.exists():
                        temp_files.append(font_tmp)

        final_output_local_path = temp_dir / f"{task_id}_output.mp4"
        temp_files.append(final_output_local_path)
        needs_audio_compose = str(final_audio_path) != str(audio_path)

        if use_remotion:
            _update_task(task_id, message="正在合成视频 (Remotion)...", progress=87)
            remotion_input_path = lipsync_video_path

            if needs_audio_compose:
                composed_video_path = temp_dir / f"{task_id}_composed.mp4"
                temp_files.append(composed_video_path)
                await video.compose(str(lipsync_video_path), str(final_audio_path), str(composed_video_path))
                remotion_input_path = composed_video_path
            else:
                logger.info("[Pipeline] Audio unchanged, skip pre-Remotion compose")

            remotion_health = await remotion_service.check_health()
            if remotion_health.get("ready"):
                try:
                    def on_remotion_progress(percent):
                        mapped = 87 + int(percent * 0.08)
                        _update_task(task_id, progress=mapped)

                    title_display_mode = (
                        req.title_display_mode
                        if req.title_display_mode in ("short", "persistent")
                        else "short"
                    )
                    title_duration = max(0.5, min(float(req.title_duration or 4.0), 30.0))

                    await remotion_service.render(
                        video_path=str(remotion_input_path),
                        output_path=str(final_output_local_path),
                        captions_path=str(captions_path) if captions_path else None,
                        title=req.title,
                        title_duration=title_duration,
                        title_display_mode=title_display_mode,
                        fps=25,
                        enable_subtitles=req.enable_subtitles,
                        subtitle_style=subtitle_style,
                        title_style=title_style,
                        secondary_title=req.secondary_title,
                        secondary_title_style=secondary_title_style,
                        on_progress=on_remotion_progress
                    )
                    print(f"[Pipeline] Remotion render completed")
                except Exception as e:
                    logger.warning(f"Remotion render failed, using FFmpeg fallback: {e}")
                    import shutil
                    shutil.copy(str(remotion_input_path), str(final_output_local_path))
            else:
                logger.warning(f"Remotion not ready: {remotion_health.get('error')}, using FFmpeg")
                import shutil
                shutil.copy(str(remotion_input_path), str(final_output_local_path))
        else:
            _update_task(task_id, message="正在合成最终视频...", progress=90)
            if needs_audio_compose:
                await video.compose(str(lipsync_video_path), str(final_audio_path), str(final_output_local_path))
            else:
                import shutil
                shutil.copy(str(lipsync_video_path), str(final_output_local_path))

        total_time = time.time() - start_time

        _update_task(task_id, message="正在上传结果...", progress=95)

        storage_path = f"{user_id}/{task_id}_output.mp4"
        await storage_service.upload_file_from_path(
            bucket=storage_service.BUCKET_OUTPUTS,
            storage_path=storage_path,
            local_file_path=str(final_output_local_path),
            content_type="video/mp4"
        )

        signed_url = await storage_service.get_signed_url(
            bucket=storage_service.BUCKET_OUTPUTS,
            path=storage_path
        )

        print(f"[Pipeline] Total generation time: {total_time:.1f}s")

        _update_task(
            task_id,
            status="completed",
            progress=100,
            message=f"生成完成！耗时 {total_time:.0f} 秒",
            output=storage_path,
            download_url=signed_url,
        )

    except Exception as e:
        _update_task(
            task_id,
            status="failed",
            message=f"错误: {str(e)}",
            error=traceback.format_exc(),
        )
        logger.error(f"Generate video failed: {e}")
    finally:
        for f in temp_files:
            try:
                if f.exists():
                    f.unlink()
            except Exception as e:
                print(f"Error cleaning up {f}: {e}")


async def get_lipsync_health():
    lipsync = _get_lipsync_service()
    return await lipsync.check_health()


async def get_voiceclone_health():
    return await voice_clone_service.check_health()