ViGent/backend/app/services/lipsync_service.py

"""
唇形同步服务
通过 subprocess 调用 MuseTalk conda 环境进行推理
配置为使用 GPU1 (CUDA:1)
"""
import os
import shutil
import subprocess
import tempfile
import httpx
from pathlib import Path
from loguru import logger
from typing import Optional

from app.core.config import settings


class LipSyncService:
    """唇形同步服务 - MuseTalk 集成 (Subprocess 方式)"""

    def __init__(self):
        self.use_local = settings.MUSETALK_LOCAL
        self.api_url = settings.MUSETALK_API_URL
        self.version = settings.MUSETALK_VERSION
        self.musetalk_dir = settings.MUSETALK_DIR
        self.gpu_id = settings.MUSETALK_GPU_ID

        # Conda 环境 Python 路径
        # 根据服务器实际情况调整
        self.conda_python = Path.home() / "ProgramFiles" / "miniconda3" / "envs" / "musetalk" / "bin" / "python"

        # 运行时检测
        self._weights_available: Optional[bool] = None

    def _check_weights(self) -> bool:
        """检查模型权重是否存在"""
        if self._weights_available is not None:
            return self._weights_available

        required_dirs = [
            self.musetalk_dir / "models" / "musetalkV15",
            self.musetalk_dir / "models" / "whisper",
            self.musetalk_dir / "models" / "sd-vae-ft-mse",
        ]

        self._weights_available = all(d.exists() for d in required_dirs)

        if self._weights_available:
            logger.info("✅ MuseTalk 权重文件已就绪")
        else:
            missing = [str(d) for d in required_dirs if not d.exists()]
            logger.warning(f"⚠️ 缺少权重文件: {missing}")

        return self._weights_available

    def _check_conda_env(self) -> bool:
        """检查 conda 环境是否可用"""
        if not self.conda_python.exists():
            logger.warning(f"⚠️ Conda Python 不存在: {self.conda_python}")
            return False
        return True

    async def generate(
        self,
        video_path: str,
        audio_path: str,
        output_path: str,
        fps: int = 25
    ) -> str:
        """生成唇形同步视频"""
        logger.info(f"🎬 唇形同步任务: {Path(video_path).name} + {Path(audio_path).name}")
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)

        if self.use_local:
            return await self._local_generate(video_path, audio_path, output_path, fps)
        else:
            return await self._remote_generate(video_path, audio_path, output_path, fps)

    async def _local_generate(
        self,
        video_path: str,
        audio_path: str,
        output_path: str,
        fps: int
    ) -> str:
        """使用 subprocess 调用 MuseTalk conda 环境"""

        # 检查前置条件
        if not self._check_conda_env():
            logger.warning("⚠️ Conda 环境不可用，使用 Fallback")
            shutil.copy(video_path, output_path)
            return output_path

        if not self._check_weights():
            logger.warning("⚠️ 模型权重不存在，使用 Fallback")
            shutil.copy(video_path, output_path)
            return output_path

        logger.info("🔄 调用 MuseTalk 推理 (subprocess)...")

        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = Path(tmpdir)

            # 创建推理配置文件
            config_path = tmpdir / "inference_config.yaml"
            result_dir = tmpdir / "results"
            result_dir.mkdir()

            # 配置文件内容
            config_content = f"""
task_0:
  video_path: "{video_path}"
  audio_path: "{audio_path}"
"""
            config_path.write_text(config_content)

            # 构建命令
            cmd = [
                str(self.conda_python),
                "-m", "scripts.inference",
                "--inference_config", str(config_path),
                "--result_dir", str(result_dir),
                "--version", self.version,
                "--gpu_id", "0",  # CUDA_VISIBLE_DEVICES 设置后，可见设备为 0
                "--batch_size", str(settings.MUSETALK_BATCH_SIZE),
            ]

            if settings.MUSETALK_USE_FLOAT16:
                cmd.append("--use_float16")

            # 设置环境变量
            env = os.environ.copy()
            env["CUDA_VISIBLE_DEVICES"] = str(self.gpu_id)

            logger.info(f"🖥️ 执行命令: {' '.join(cmd[:6])}...")

            try:
                result = subprocess.run(
                    cmd,
                    cwd=str(self.musetalk_dir),
                    env=env,
                    capture_output=True,
                    text=True,
                    timeout=600  # 10分钟超时
                )

                if result.returncode != 0:
                    logger.error(f"MuseTalk 推理失败:\n{result.stderr}")
                    # Fallback
                    shutil.copy(video_path, output_path)
                    return output_path

                logger.info(f"MuseTalk 输出:\n{result.stdout[-500:]}")

                # 查找输出文件
                output_files = list(result_dir.rglob("*.mp4"))
                if output_files:
                    shutil.copy(output_files[0], output_path)
                    logger.info(f"✅ 唇形同步完成: {output_path}")
                    return output_path
                else:
                    logger.warning("⚠️ 未找到输出文件，使用 Fallback")
                    shutil.copy(video_path, output_path)
                    return output_path

            except subprocess.TimeoutExpired:
                logger.error("⏰ MuseTalk 推理超时")
                shutil.copy(video_path, output_path)
                return output_path
            except Exception as e:
                logger.error(f"❌ 推理异常: {e}")
                shutil.copy(video_path, output_path)
                return output_path

    async def _remote_generate(
        self,
        video_path: str,
        audio_path: str,
        output_path: str,
        fps: int
    ) -> str:
        """调用远程 MuseTalk API 服务"""
        logger.info(f"📡 调用远程 API: {self.api_url}")

        try:
            async with httpx.AsyncClient(timeout=300.0) as client:
                with open(video_path, "rb") as vf, open(audio_path, "rb") as af:
                    files = {
                        "video": (Path(video_path).name, vf, "video/mp4"),
                        "audio": (Path(audio_path).name, af, "audio/mpeg"),
                    }
                    data = {"fps": fps}

                    response = await client.post(
                        f"{self.api_url}/lipsync",
                        files=files,
                        data=data
                    )

                if response.status_code == 200:
                    with open(output_path, "wb") as f:
                        f.write(response.content)
                    logger.info(f"✅ 远程推理完成: {output_path}")
                    return output_path
                else:
                    raise RuntimeError(f"API 错误: {response.status_code}")

        except Exception as e:
            logger.error(f"远程 API 调用失败: {e}")
            shutil.copy(video_path, output_path)
            return output_path

    async def check_health(self) -> dict:
        """健康检查"""
        conda_ok = self._check_conda_env()
        weights_ok = self._check_weights()

        # 检查 GPU
        gpu_ok = False
        gpu_name = "Unknown"
        if conda_ok:
            try:
                result = subprocess.run(
                    [str(self.conda_python), "-c",
                     "import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A')"],
                    capture_output=True,
                    text=True,
                    env={**os.environ, "CUDA_VISIBLE_DEVICES": str(self.gpu_id)},
                    timeout=10
                )
                gpu_name = result.stdout.strip()
                gpu_ok = gpu_name != "N/A" and result.returncode == 0
            except:
                pass

        return {
            "conda_env": conda_ok,
            "weights": weights_ok,
            "gpu": gpu_ok,
            "gpu_name": gpu_name,
            "gpu_id": self.gpu_id,
            "ready": conda_ok and weights_ok and gpu_ok
        }