ViGent2/models/LatentSync/scripts/server.py


import os
import argparse
from pathlib import Path

# --- 自动加载 GPU 配置 (必须在 torch 导入前) ---
def load_gpu_config():
    """尝试从后端 .env 文件读取 LATENTSYNC_GPU_ID"""
    try:
        # 路径: scripts/server.py -> scripts -> LatentSync -> models -> ViGent2 -> backend -> .env
        current_dir = Path(__file__).resolve().parent
        env_path = current_dir.parent.parent.parent / "backend" / ".env"

        target_gpu = "1" # 默认 fallback

        if env_path.exists():
            print(f"📖 读取配置文件: {env_path}")
            with open(env_path, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line.startswith("LATENTSYNC_GPU_ID="):
                        val = line.split("=")[1].strip().split("#")[0].strip()
                        if val:
                            target_gpu = val
                            print(f"⚙️  发现配置 LATENTSYNC_GPU_ID={target_gpu}")
                        break

        # 设置环境变量
        if "CUDA_VISIBLE_DEVICES" not in os.environ:
            os.environ["CUDA_VISIBLE_DEVICES"] = target_gpu
            print(f"✅ 已自动设置: CUDA_VISIBLE_DEVICES={target_gpu}")
        else:
             print(f"ℹ️  检测到外部 CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}，跳过自动配置")

    except Exception as e:
        print(f"⚠️ 读取 GPU 配置失败: {e}，将使用默认设置")

load_gpu_config()

# --- 性能优化: 限制 CPU 线程数 ---
# 防止 PyTorch 默认占用所有 CPU 核心 (56线程) 导致系统卡顿
# 预留资源给 Backend, Frontend 和 SSH
os.environ["OMP_NUM_THREADS"] = "8"
os.environ["MKL_NUM_THREADS"] = "8"
os.environ["TORCH_NUM_THREADS"] = "8"
print("⚙️  已限制 PyTorch CPU 线程数为 8，防止系统卡顿")

import torch
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from omegaconf import OmegaConf
from diffusers import AutoencoderKL, DDIMScheduler
from latentsync.models.unet import UNet3DConditionModel
from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
from latentsync.whisper.audio2feature import Audio2Feature
from accelerate.utils import set_seed
from DeepCache import DeepCacheSDHelper

# 全局模型缓存
models = {}

@asynccontextmanager
async def lifespan(app: FastAPI):
    # --- 模型加载逻辑 (参考 inference.py) ---
    print("⏳ 正在加载 LatentSync 模型...")

    # 默认配置路径 (相对于根目录)
    unet_config_path = "configs/unet/stage2_512.yaml"
    ckpt_path = "checkpoints/latentsync_unet.pt"

    if not os.path.exists(unet_config_path):
        print(f"⚠️ 找不到配置文件: {unet_config_path}，请确保在 models/LatentSync 根目录运行")

    config = OmegaConf.load(unet_config_path)

    # Check GPU
    is_fp16_supported = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] > 7
    dtype = torch.float16 if is_fp16_supported else torch.float32
    device = "cuda" if torch.cuda.is_available() else "cpu"

    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        print(f"🖥️  正在使用 GPU: {gpu_name} (CUDA_VISIBLE_DEVICES 已生效)")
    else:
        print("⚠️  警告: 未检测到 GPU，将使用 CPU 进行推理 (速度极慢)")

    scheduler = DDIMScheduler.from_pretrained("configs")

    # Whisper Model
    if config.model.cross_attention_dim == 768:
        whisper_path = "checkpoints/whisper/small.pt"
    else:
        whisper_path = "checkpoints/whisper/tiny.pt"

    audio_encoder = Audio2Feature(
        model_path=whisper_path,
        device=device,
        num_frames=config.data.num_frames,
        audio_feat_length=config.data.audio_feat_length,
    )

    # VAE
    vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype)
    vae.config.scaling_factor = 0.18215
    vae.config.shift_factor = 0

    # UNet
    unet, _ = UNet3DConditionModel.from_pretrained(
        OmegaConf.to_container(config.model),
        ckpt_path,
        device="cpu", # Load to CPU first to save memory during init
    )
    unet = unet.to(dtype=dtype)

    # Pipeline
    pipeline = LipsyncPipeline(
        vae=vae,
        audio_encoder=audio_encoder,
        unet=unet,
        scheduler=scheduler,
    ).to(device)

    # DeepCache (默认启用)
    helper = DeepCacheSDHelper(pipe=pipeline)
    helper.set_params(cache_interval=3, cache_branch_id=0)
    helper.enable()

    models["pipeline"] = pipeline
    models["config"] = config
    models["dtype"] = dtype

    print("✅ LatentSync 模型加载完成，服务就绪！")
    yield
    # Clean up if needed
    models.clear()
    torch.cuda.empty_cache()

app = FastAPI(lifespan=lifespan)

class LipSyncRequest(BaseModel):
    video_path: str
    audio_path: str
    video_out_path: str
    inference_steps: int = 20
    guidance_scale: float = 1.5
    seed: int = 1247
    temp_dir: str = "temp"

@app.get("/health")
def health_check():
    return {"status": "ok", "model_loaded": "pipeline" in models}

@app.post("/lipsync")
async def generate_lipsync(req: LipSyncRequest):
    if "pipeline" not in models:
        raise HTTPException(status_code=503, detail="Model not loaded")

    if not os.path.exists(req.video_path):
        raise HTTPException(status_code=404, detail=f"Video not found: {req.video_path}")
    if not os.path.exists(req.audio_path):
        raise HTTPException(status_code=404, detail=f"Audio not found: {req.audio_path}")

    print(f"🎬 收到任务: {Path(req.video_path).name} -> {Path(req.video_out_path).name}")

    try:
        pipeline = models["pipeline"]
        config = models["config"]
        dtype = models["dtype"]

        # Set seed
        if req.seed != -1:
            set_seed(req.seed)
        else:
            torch.seed()

        # Run Inference
        pipeline(
            video_path=req.video_path,
            audio_path=req.audio_path,
            video_out_path=req.video_out_path,
            num_frames=config.data.num_frames,
            num_inference_steps=req.inference_steps,
            guidance_scale=req.guidance_scale,
            weight_dtype=dtype,
            width=config.data.resolution,
            height=config.data.resolution,
            mask_image_path=config.data.mask_image_path,
            temp_dir=req.temp_dir,
        )

        if os.path.exists(req.video_out_path):
            return {"status": "success", "output_path": req.video_out_path}
        else:
            raise HTTPException(status_code=500, detail="Output file generation failed")

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8007)