Files
ViGent/backend/app/services/lipsync_service.py
2026-01-14 17:07:17 +08:00

244 lines
8.8 KiB
Python

"""
唇形同步服务
通过 subprocess 调用 MuseTalk conda 环境进行推理
配置为使用 GPU1 (CUDA:1)
"""
import os
import shutil
import subprocess
import tempfile
import httpx
from pathlib import Path
from loguru import logger
from typing import Optional
from app.core.config import settings
class LipSyncService:
"""唇形同步服务 - MuseTalk 集成 (Subprocess 方式)"""
def __init__(self):
self.use_local = settings.MUSETALK_LOCAL
self.api_url = settings.MUSETALK_API_URL
self.version = settings.MUSETALK_VERSION
self.musetalk_dir = settings.MUSETALK_DIR
self.gpu_id = settings.MUSETALK_GPU_ID
# Conda 环境 Python 路径
# 根据服务器实际情况调整
self.conda_python = Path.home() / "ProgramFiles" / "miniconda3" / "envs" / "musetalk" / "bin" / "python"
# 运行时检测
self._weights_available: Optional[bool] = None
def _check_weights(self) -> bool:
"""检查模型权重是否存在"""
if self._weights_available is not None:
return self._weights_available
required_dirs = [
self.musetalk_dir / "models" / "musetalkV15",
self.musetalk_dir / "models" / "whisper",
self.musetalk_dir / "models" / "sd-vae-ft-mse",
]
self._weights_available = all(d.exists() for d in required_dirs)
if self._weights_available:
logger.info("✅ MuseTalk 权重文件已就绪")
else:
missing = [str(d) for d in required_dirs if not d.exists()]
logger.warning(f"⚠️ 缺少权重文件: {missing}")
return self._weights_available
def _check_conda_env(self) -> bool:
"""检查 conda 环境是否可用"""
if not self.conda_python.exists():
logger.warning(f"⚠️ Conda Python 不存在: {self.conda_python}")
return False
return True
async def generate(
self,
video_path: str,
audio_path: str,
output_path: str,
fps: int = 25
) -> str:
"""生成唇形同步视频"""
logger.info(f"🎬 唇形同步任务: {Path(video_path).name} + {Path(audio_path).name}")
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
if self.use_local:
return await self._local_generate(video_path, audio_path, output_path, fps)
else:
return await self._remote_generate(video_path, audio_path, output_path, fps)
async def _local_generate(
self,
video_path: str,
audio_path: str,
output_path: str,
fps: int
) -> str:
"""使用 subprocess 调用 MuseTalk conda 环境"""
# 检查前置条件
if not self._check_conda_env():
logger.warning("⚠️ Conda 环境不可用,使用 Fallback")
shutil.copy(video_path, output_path)
return output_path
if not self._check_weights():
logger.warning("⚠️ 模型权重不存在,使用 Fallback")
shutil.copy(video_path, output_path)
return output_path
logger.info("🔄 调用 MuseTalk 推理 (subprocess)...")
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
# 创建推理配置文件
config_path = tmpdir / "inference_config.yaml"
result_dir = tmpdir / "results"
result_dir.mkdir()
# 配置文件内容
config_content = f"""
task_0:
video_path: "{video_path}"
audio_path: "{audio_path}"
"""
config_path.write_text(config_content)
# 构建命令
cmd = [
str(self.conda_python),
"-m", "scripts.inference",
"--inference_config", str(config_path),
"--result_dir", str(result_dir),
"--version", self.version,
"--gpu_id", "0", # CUDA_VISIBLE_DEVICES 设置后,可见设备为 0
"--batch_size", str(settings.MUSETALK_BATCH_SIZE),
]
if settings.MUSETALK_USE_FLOAT16:
cmd.append("--use_float16")
# 设置环境变量
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = str(self.gpu_id)
logger.info(f"🖥️ 执行命令: {' '.join(cmd[:6])}...")
try:
result = subprocess.run(
cmd,
cwd=str(self.musetalk_dir),
env=env,
capture_output=True,
text=True,
timeout=600 # 10分钟超时
)
if result.returncode != 0:
logger.error(f"MuseTalk 推理失败:\n{result.stderr}")
# Fallback
shutil.copy(video_path, output_path)
return output_path
logger.info(f"MuseTalk 输出:\n{result.stdout[-500:]}")
# 查找输出文件
output_files = list(result_dir.rglob("*.mp4"))
if output_files:
shutil.copy(output_files[0], output_path)
logger.info(f"✅ 唇形同步完成: {output_path}")
return output_path
else:
logger.warning("⚠️ 未找到输出文件,使用 Fallback")
shutil.copy(video_path, output_path)
return output_path
except subprocess.TimeoutExpired:
logger.error("⏰ MuseTalk 推理超时")
shutil.copy(video_path, output_path)
return output_path
except Exception as e:
logger.error(f"❌ 推理异常: {e}")
shutil.copy(video_path, output_path)
return output_path
async def _remote_generate(
self,
video_path: str,
audio_path: str,
output_path: str,
fps: int
) -> str:
"""调用远程 MuseTalk API 服务"""
logger.info(f"📡 调用远程 API: {self.api_url}")
try:
async with httpx.AsyncClient(timeout=300.0) as client:
with open(video_path, "rb") as vf, open(audio_path, "rb") as af:
files = {
"video": (Path(video_path).name, vf, "video/mp4"),
"audio": (Path(audio_path).name, af, "audio/mpeg"),
}
data = {"fps": fps}
response = await client.post(
f"{self.api_url}/lipsync",
files=files,
data=data
)
if response.status_code == 200:
with open(output_path, "wb") as f:
f.write(response.content)
logger.info(f"✅ 远程推理完成: {output_path}")
return output_path
else:
raise RuntimeError(f"API 错误: {response.status_code}")
except Exception as e:
logger.error(f"远程 API 调用失败: {e}")
shutil.copy(video_path, output_path)
return output_path
async def check_health(self) -> dict:
"""健康检查"""
conda_ok = self._check_conda_env()
weights_ok = self._check_weights()
# 检查 GPU
gpu_ok = False
gpu_name = "Unknown"
if conda_ok:
try:
result = subprocess.run(
[str(self.conda_python), "-c",
"import torch; print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A')"],
capture_output=True,
text=True,
env={**os.environ, "CUDA_VISIBLE_DEVICES": str(self.gpu_id)},
timeout=10
)
gpu_name = result.stdout.strip()
gpu_ok = gpu_name != "N/A" and result.returncode == 0
except:
pass
return {
"conda_env": conda_ok,
"weights": weights_ok,
"gpu": gpu_ok,
"gpu_name": gpu_name,
"gpu_id": self.gpu_id,
"ready": conda_ok and weights_ok and gpu_ok
}