更新
This commit is contained in:
@@ -57,7 +57,17 @@ class Settings(BaseSettings):
|
||||
LATENTSYNC_ENABLE_DEEPCACHE: bool = True # 启用 DeepCache 加速
|
||||
LATENTSYNC_SEED: int = 1247 # 随机种子 (-1 则随机)
|
||||
LATENTSYNC_USE_SERVER: bool = True # 使用常驻服务 (Persistent Server) 加速
|
||||
|
||||
|
||||
# MuseTalk 配置
|
||||
MUSETALK_GPU_ID: int = 0 # GPU ID (默认使用 GPU0)
|
||||
MUSETALK_API_URL: str = "http://localhost:8011" # 常驻服务地址
|
||||
MUSETALK_BATCH_SIZE: int = 8 # 推理批大小
|
||||
MUSETALK_VERSION: str = "v15" # 模型版本
|
||||
MUSETALK_USE_FLOAT16: bool = True # 半精度加速
|
||||
|
||||
# 混合唇形同步路由
|
||||
LIPSYNC_DURATION_THRESHOLD: float = 120.0 # 秒,>=此值用 MuseTalk
|
||||
|
||||
# Supabase 配置
|
||||
SUPABASE_URL: str = ""
|
||||
SUPABASE_PUBLIC_URL: str = "" # 公网访问地址,用于生成前端可访问的 URL
|
||||
@@ -93,6 +103,11 @@ class Settings(BaseSettings):
|
||||
"""LatentSync 目录路径 (动态计算)"""
|
||||
return self.BASE_DIR.parent.parent / "models" / "LatentSync"
|
||||
|
||||
@property
|
||||
def MUSETALK_DIR(self) -> Path:
|
||||
"""MuseTalk 目录路径 (动态计算)"""
|
||||
return self.BASE_DIR.parent.parent / "models" / "MuseTalk"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
extra = "ignore" # 忽略未知的环境变量
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
AI 相关 API 路由
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from loguru import logger
|
||||
@@ -25,6 +27,12 @@ class GenerateMetaResponse(BaseModel):
|
||||
tags: list[str]
|
||||
|
||||
|
||||
class RewriteRequest(BaseModel):
|
||||
"""改写请求"""
|
||||
text: str
|
||||
custom_prompt: Optional[str] = None
|
||||
|
||||
|
||||
class TranslateRequest(BaseModel):
|
||||
"""翻译请求"""
|
||||
text: str
|
||||
@@ -73,3 +81,18 @@ async def generate_meta(req: GenerateMetaRequest):
|
||||
except Exception as e:
|
||||
logger.error(f"Generate meta failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/rewrite")
|
||||
async def rewrite_script(req: RewriteRequest):
|
||||
"""AI 改写文案"""
|
||||
if not req.text or not req.text.strip():
|
||||
raise HTTPException(status_code=400, detail="文案不能为空")
|
||||
|
||||
try:
|
||||
logger.info(f"Rewriting text: {req.text[:50]}...")
|
||||
rewritten = await glm_service.rewrite_script(req.text.strip(), req.custom_prompt)
|
||||
return success_response({"rewritten_text": rewritten})
|
||||
except Exception as e:
|
||||
logger.error(f"Rewrite failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -63,11 +63,15 @@ async def extract_script(file=None, url: Optional[str] = None, rewrite: bool = T
|
||||
# 2. 提取文案 (Whisper)
|
||||
script = await whisper_service.transcribe(str(audio_path))
|
||||
|
||||
# 3. AI 改写 (GLM)
|
||||
# 3. AI 改写 (GLM) — 失败时降级返回原文
|
||||
rewritten = None
|
||||
if rewrite and script and len(script.strip()) > 0:
|
||||
logger.info("Rewriting script...")
|
||||
rewritten = await glm_service.rewrite_script(script, custom_prompt)
|
||||
try:
|
||||
rewritten = await glm_service.rewrite_script(script, custom_prompt)
|
||||
except Exception as e:
|
||||
logger.warning(f"GLM rewrite failed, returning original script: {e}")
|
||||
rewritten = None
|
||||
|
||||
return {
|
||||
"original_script": script,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import Optional, Any, List
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
import time
|
||||
import traceback
|
||||
import httpx
|
||||
@@ -415,18 +416,21 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
|
||||
lipsync_start = time.time()
|
||||
|
||||
# ── 第一步:下载所有素材并检测分辨率 ──
|
||||
# ── 第一步:并行下载所有素材并检测分辨率 ──
|
||||
material_locals: List[Path] = []
|
||||
resolutions = []
|
||||
|
||||
for i, assignment in enumerate(assignments):
|
||||
async def _download_and_normalize(i: int, assignment: dict):
|
||||
"""下载单个素材并归一化方向"""
|
||||
material_local = temp_dir / f"{task_id}_material_{i}.mp4"
|
||||
temp_files.append(material_local)
|
||||
await _download_material(assignment["material_path"], material_local)
|
||||
|
||||
# 归一化旋转元数据,确保分辨率判断与后续推理一致
|
||||
normalized_material = temp_dir / f"{task_id}_material_{i}_norm.mp4"
|
||||
normalized_result = video.normalize_orientation(
|
||||
loop = asyncio.get_event_loop()
|
||||
normalized_result = await loop.run_in_executor(
|
||||
None,
|
||||
video.normalize_orientation,
|
||||
str(material_local),
|
||||
str(normalized_material),
|
||||
)
|
||||
@@ -434,8 +438,17 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
temp_files.append(normalized_material)
|
||||
material_local = normalized_material
|
||||
|
||||
material_locals.append(material_local)
|
||||
resolutions.append(video.get_resolution(str(material_local)))
|
||||
res = video.get_resolution(str(material_local))
|
||||
return material_local, res
|
||||
|
||||
download_tasks = [
|
||||
_download_and_normalize(i, assignment)
|
||||
for i, assignment in enumerate(assignments)
|
||||
]
|
||||
download_results = await asyncio.gather(*download_tasks)
|
||||
for local, res in download_results:
|
||||
material_locals.append(local)
|
||||
resolutions.append(res)
|
||||
|
||||
# 按用户选择的画面比例统一分辨率
|
||||
base_res = target_resolution
|
||||
@@ -443,29 +456,42 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
if need_scale:
|
||||
logger.info(f"[MultiMat] 素材分辨率不一致,统一到 {base_res[0]}x{base_res[1]}")
|
||||
|
||||
# ── 第二步:裁剪每段素材到对应时长 ──
|
||||
prepared_segments: List[Path] = []
|
||||
# ── 第二步:并行裁剪每段素材到对应时长 ──
|
||||
prepared_segments: List[Path] = [None] * num_segments
|
||||
|
||||
for i, assignment in enumerate(assignments):
|
||||
seg_progress = 15 + int((i / num_segments) * 30) # 15% → 45%
|
||||
async def _prepare_one_segment(i: int, assignment: dict):
|
||||
"""将单个素材裁剪/循环到对应时长"""
|
||||
seg_dur = assignment["end"] - assignment["start"]
|
||||
_update_task(
|
||||
task_id,
|
||||
progress=seg_progress,
|
||||
message=f"正在准备素材 {i+1}/{num_segments}..."
|
||||
)
|
||||
|
||||
prepared_path = temp_dir / f"{task_id}_prepared_{i}.mp4"
|
||||
temp_files.append(prepared_path)
|
||||
video.prepare_segment(
|
||||
str(material_locals[i]), seg_dur, str(prepared_path),
|
||||
# 多素材拼接前统一重编码为同分辨率/同编码,避免 concat 仅保留首段
|
||||
target_resolution=base_res,
|
||||
source_start=assignment.get("source_start", 0.0),
|
||||
source_end=assignment.get("source_end"),
|
||||
target_fps=25,
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
video.prepare_segment,
|
||||
str(material_locals[i]),
|
||||
seg_dur,
|
||||
str(prepared_path),
|
||||
base_res,
|
||||
assignment.get("source_start", 0.0),
|
||||
assignment.get("source_end"),
|
||||
25,
|
||||
)
|
||||
prepared_segments.append(prepared_path)
|
||||
return i, prepared_path
|
||||
|
||||
_update_task(
|
||||
task_id,
|
||||
progress=15,
|
||||
message=f"正在并行准备 {num_segments} 个素材片段..."
|
||||
)
|
||||
|
||||
prepare_tasks = [
|
||||
_prepare_one_segment(i, assignment)
|
||||
for i, assignment in enumerate(assignments)
|
||||
]
|
||||
prepare_results = await asyncio.gather(*prepare_tasks)
|
||||
for i, path in prepare_results:
|
||||
prepared_segments[i] = path
|
||||
|
||||
# ── 第二步:拼接所有素材片段 ──
|
||||
_update_task(task_id, progress=50, message="正在拼接素材片段...")
|
||||
@@ -553,51 +579,89 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
print(f"[Pipeline] LipSync completed in {lipsync_time:.1f}s")
|
||||
_update_task(task_id, progress=80)
|
||||
|
||||
# 单素材模式:Whisper 在 LatentSync 之后
|
||||
if req.enable_subtitles:
|
||||
# 单素材模式:Whisper 延迟到下方与 BGM 并行执行
|
||||
if not req.enable_subtitles:
|
||||
captions_path = None
|
||||
|
||||
_update_task(task_id, progress=85)
|
||||
|
||||
# ── Whisper 字幕 + BGM 混音 并行(两者都只依赖 audio_path)──
|
||||
final_audio_path = audio_path
|
||||
_whisper_task = None
|
||||
_bgm_task = None
|
||||
|
||||
# 单素材模式下 Whisper 尚未执行,这里与 BGM 并行启动
|
||||
need_whisper = not is_multi and req.enable_subtitles and captions_path is None
|
||||
if need_whisper:
|
||||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||||
temp_files.append(captions_path)
|
||||
_captions_path_str = str(captions_path)
|
||||
|
||||
async def _run_whisper():
|
||||
_update_task(task_id, message="正在生成字幕 (Whisper)...", progress=82)
|
||||
|
||||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||||
temp_files.append(captions_path)
|
||||
|
||||
try:
|
||||
await whisper_service.align(
|
||||
audio_path=str(audio_path),
|
||||
text=req.text,
|
||||
output_path=str(captions_path),
|
||||
output_path=_captions_path_str,
|
||||
language=_locale_to_whisper_lang(req.language),
|
||||
original_text=req.text,
|
||||
)
|
||||
print(f"[Pipeline] Whisper alignment completed")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Whisper alignment failed, skipping subtitles: {e}")
|
||||
captions_path = None
|
||||
return False
|
||||
|
||||
_update_task(task_id, progress=85)
|
||||
_whisper_task = _run_whisper()
|
||||
|
||||
final_audio_path = audio_path
|
||||
if req.bgm_id:
|
||||
_update_task(task_id, message="正在合成背景音乐...", progress=86)
|
||||
|
||||
bgm_path = resolve_bgm_path(req.bgm_id)
|
||||
if bgm_path:
|
||||
mix_output_path = temp_dir / f"{task_id}_audio_mix.wav"
|
||||
temp_files.append(mix_output_path)
|
||||
volume = req.bgm_volume if req.bgm_volume is not None else 0.2
|
||||
volume = max(0.0, min(float(volume), 1.0))
|
||||
try:
|
||||
video.mix_audio(
|
||||
voice_path=str(audio_path),
|
||||
bgm_path=str(bgm_path),
|
||||
output_path=str(mix_output_path),
|
||||
bgm_volume=volume
|
||||
)
|
||||
final_audio_path = mix_output_path
|
||||
except Exception as e:
|
||||
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
|
||||
_mix_output = str(mix_output_path)
|
||||
_bgm_path = str(bgm_path)
|
||||
_voice_path = str(audio_path)
|
||||
_volume = volume
|
||||
|
||||
async def _run_bgm():
|
||||
_update_task(task_id, message="正在合成背景音乐...", progress=86)
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
video.mix_audio,
|
||||
_voice_path,
|
||||
_bgm_path,
|
||||
_mix_output,
|
||||
_volume,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"BGM mix failed, fallback to voice only: {e}")
|
||||
return False
|
||||
|
||||
_bgm_task = _run_bgm()
|
||||
else:
|
||||
logger.warning(f"BGM not found: {req.bgm_id}")
|
||||
|
||||
# 并行等待 Whisper + BGM
|
||||
parallel_tasks = [t for t in (_whisper_task, _bgm_task) if t is not None]
|
||||
if parallel_tasks:
|
||||
results = await asyncio.gather(*parallel_tasks)
|
||||
result_idx = 0
|
||||
if _whisper_task is not None:
|
||||
if not results[result_idx]:
|
||||
captions_path = None
|
||||
result_idx += 1
|
||||
if _bgm_task is not None:
|
||||
if results[result_idx]:
|
||||
final_audio_path = mix_output_path
|
||||
|
||||
|
||||
use_remotion = (captions_path and captions_path.exists()) or req.title or req.secondary_title
|
||||
|
||||
subtitle_style = None
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
唇形同步服务
|
||||
通过 subprocess 调用 LatentSync conda 环境进行推理
|
||||
配置为使用 GPU1 (CUDA:1)
|
||||
混合方案: 短视频用 LatentSync (高质量), 长视频用 MuseTalk (高速度)
|
||||
路由阈值: LIPSYNC_DURATION_THRESHOLD (默认 120s)
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
@@ -17,15 +17,18 @@ from app.core.config import settings
|
||||
|
||||
|
||||
class LipSyncService:
|
||||
"""唇形同步服务 - LatentSync 1.6 集成 (Subprocess 方式)"""
|
||||
|
||||
"""唇形同步服务 - LatentSync 1.6 + MuseTalk 1.5 混合方案"""
|
||||
|
||||
def __init__(self):
|
||||
self.use_local = settings.LATENTSYNC_LOCAL
|
||||
self.api_url = settings.LATENTSYNC_API_URL
|
||||
self.latentsync_dir = settings.LATENTSYNC_DIR
|
||||
self.gpu_id = settings.LATENTSYNC_GPU_ID
|
||||
self.use_server = settings.LATENTSYNC_USE_SERVER
|
||||
|
||||
|
||||
# MuseTalk 配置
|
||||
self.musetalk_api_url = settings.MUSETALK_API_URL
|
||||
|
||||
# GPU 并发锁 (Serial Queue)
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
@@ -103,7 +106,7 @@ class LipSyncService:
|
||||
"-t", str(target_duration), # 截取到目标时长
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "18",
|
||||
"-crf", "23",
|
||||
"-an", # 去掉原音频
|
||||
output_path
|
||||
]
|
||||
@@ -268,6 +271,18 @@ class LipSyncService:
|
||||
else:
|
||||
actual_video_path = video_path
|
||||
|
||||
# 混合路由: 长视频走 MuseTalk,短视频走 LatentSync
|
||||
if audio_duration and audio_duration >= settings.LIPSYNC_DURATION_THRESHOLD:
|
||||
logger.info(
|
||||
f"🔄 音频 {audio_duration:.1f}s >= {settings.LIPSYNC_DURATION_THRESHOLD}s,路由到 MuseTalk"
|
||||
)
|
||||
musetalk_result = await self._call_musetalk_server(
|
||||
actual_video_path, audio_path, output_path
|
||||
)
|
||||
if musetalk_result:
|
||||
return musetalk_result
|
||||
logger.warning("⚠️ MuseTalk 不可用,回退到 LatentSync(长视频,会较慢)")
|
||||
|
||||
if self.use_server:
|
||||
# 模式 A: 调用常驻服务 (加速模式)
|
||||
return await self._call_persistent_server(actual_video_path, audio_path, output_path)
|
||||
@@ -352,6 +367,55 @@ class LipSyncService:
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
async def _call_musetalk_server(
|
||||
self, video_path: str, audio_path: str, output_path: str
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
调用 MuseTalk 常驻服务。
|
||||
成功返回 output_path,不可用返回 None(信号上层回退到 LatentSync)。
|
||||
"""
|
||||
server_url = self.musetalk_api_url
|
||||
logger.info(f"⚡ 调用 MuseTalk 服务: {server_url}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=3600.0) as client:
|
||||
# 健康检查
|
||||
try:
|
||||
resp = await client.get(f"{server_url}/health", timeout=5.0)
|
||||
if resp.status_code != 200:
|
||||
logger.warning("⚠️ MuseTalk 健康检查失败")
|
||||
return None
|
||||
health = resp.json()
|
||||
if not health.get("model_loaded"):
|
||||
logger.warning("⚠️ MuseTalk 模型未加载")
|
||||
return None
|
||||
except Exception:
|
||||
logger.warning("⚠️ 无法连接 MuseTalk 服务")
|
||||
return None
|
||||
|
||||
# 发送推理请求
|
||||
payload = {
|
||||
"video_path": str(Path(video_path).resolve()),
|
||||
"audio_path": str(Path(audio_path).resolve()),
|
||||
"video_out_path": str(Path(output_path).resolve()),
|
||||
"batch_size": settings.MUSETALK_BATCH_SIZE,
|
||||
}
|
||||
|
||||
response = await client.post(f"{server_url}/lipsync", json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
if Path(result["output_path"]).exists():
|
||||
logger.info(f"✅ MuseTalk 推理完成: {output_path}")
|
||||
return output_path
|
||||
|
||||
logger.error(f"❌ MuseTalk 服务报错: {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ MuseTalk 调用失败: {e}")
|
||||
return None
|
||||
|
||||
async def _call_persistent_server(self, video_path: str, audio_path: str, output_path: str) -> str:
|
||||
"""调用本地常驻服务 (server.py)"""
|
||||
server_url = "http://localhost:8007"
|
||||
@@ -477,8 +541,18 @@ class LipSyncService:
|
||||
except:
|
||||
pass
|
||||
|
||||
# 检查 MuseTalk 服务
|
||||
musetalk_ready = False
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(f"{self.musetalk_api_url}/health")
|
||||
if resp.status_code == 200:
|
||||
musetalk_ready = resp.json().get("model_loaded", False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"model": "LatentSync 1.6",
|
||||
"model": "LatentSync 1.6 + MuseTalk 1.5",
|
||||
"conda_env": conda_ok,
|
||||
"weights": weights_ok,
|
||||
"gpu": gpu_ok,
|
||||
@@ -486,5 +560,7 @@ class LipSyncService:
|
||||
"gpu_id": self.gpu_id,
|
||||
"inference_steps": settings.LATENTSYNC_INFERENCE_STEPS,
|
||||
"guidance_scale": settings.LATENTSYNC_GUIDANCE_SCALE,
|
||||
"ready": conda_ok and weights_ok and gpu_ok
|
||||
"ready": conda_ok and weights_ok and gpu_ok,
|
||||
"musetalk_ready": musetalk_ready,
|
||||
"lipsync_threshold": settings.LIPSYNC_DURATION_THRESHOLD,
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
"""
|
||||
视频合成服务
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from typing import Optional
|
||||
|
||||
"""
|
||||
视频合成服务
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
from typing import Optional
|
||||
|
||||
class VideoService:
|
||||
def __init__(self):
|
||||
pass
|
||||
@@ -96,7 +96,7 @@ class VideoService:
|
||||
"-map", "0:a?",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "18",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
@@ -113,146 +113,146 @@ class VideoService:
|
||||
|
||||
logger.warning("视频方向归一化失败,回退使用原视频")
|
||||
return video_path
|
||||
|
||||
def _run_ffmpeg(self, cmd: list) -> bool:
|
||||
cmd_str = ' '.join(shlex.quote(str(c)) for c in cmd)
|
||||
logger.debug(f"FFmpeg CMD: {cmd_str}")
|
||||
try:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"FFmpeg Error: {result.stderr}")
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg Exception: {e}")
|
||||
return False
|
||||
|
||||
def _get_duration(self, file_path: str) -> float:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
# 使用参数列表形式避免 shell=True 的命令注入风险
|
||||
cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-show_entries', 'format=duration',
|
||||
'-of', 'default=noprint_wrappers=1:nokey=1',
|
||||
file_path
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
def mix_audio(
|
||||
self,
|
||||
voice_path: str,
|
||||
bgm_path: str,
|
||||
output_path: str,
|
||||
bgm_volume: float = 0.2
|
||||
) -> str:
|
||||
"""混合人声与背景音乐"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
volume = max(0.0, min(float(bgm_volume), 1.0))
|
||||
filter_complex = (
|
||||
f"[0:a]volume=1.0[a0];"
|
||||
f"[1:a]volume={volume}[a1];"
|
||||
f"[a0][a1]amix=inputs=2:duration=first:dropout_transition=2:normalize=0[aout]"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", voice_path,
|
||||
"-stream_loop", "-1", "-i", bgm_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[aout]",
|
||||
"-c:a", "pcm_s16le",
|
||||
"-shortest",
|
||||
output_path,
|
||||
]
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError("FFmpeg audio mix failed")
|
||||
|
||||
async def compose(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
subtitle_path: Optional[str] = None
|
||||
) -> str:
|
||||
"""合成视频"""
|
||||
# Ensure output dir
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_duration = self._get_duration(video_path)
|
||||
audio_duration = self._get_duration(audio_path)
|
||||
|
||||
# Audio loop if needed
|
||||
loop_count = 1
|
||||
if audio_duration > video_duration and video_duration > 0:
|
||||
loop_count = int(audio_duration / video_duration) + 1
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
|
||||
# Input video (stream_loop must be before -i)
|
||||
if loop_count > 1:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
cmd.extend(["-i", video_path])
|
||||
|
||||
# Input audio
|
||||
cmd.extend(["-i", audio_path])
|
||||
|
||||
# Filter complex
|
||||
filter_complex = []
|
||||
|
||||
# Subtitles (skip for now to mimic previous state or implement basic)
|
||||
# Previous state: subtitles disabled due to font issues
|
||||
# if subtitle_path: ...
|
||||
|
||||
# Audio map with high quality encoding
|
||||
cmd.extend([
|
||||
"-c:v", "libx264",
|
||||
"-preset", "slow", # 慢速预设,更好的压缩效率
|
||||
"-crf", "18", # 高质量(与 LatentSync 一致)
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k", # 音频比特率
|
||||
"-shortest"
|
||||
])
|
||||
# Use audio from input 1
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError("FFmpeg composition failed")
|
||||
|
||||
|
||||
def _run_ffmpeg(self, cmd: list) -> bool:
|
||||
cmd_str = ' '.join(shlex.quote(str(c)) for c in cmd)
|
||||
logger.debug(f"FFmpeg CMD: {cmd_str}")
|
||||
try:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding='utf-8',
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"FFmpeg Error: {result.stderr}")
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"FFmpeg Exception: {e}")
|
||||
return False
|
||||
|
||||
def _get_duration(self, file_path: str) -> float:
|
||||
# Synchronous call for BackgroundTasks compatibility
|
||||
# 使用参数列表形式避免 shell=True 的命令注入风险
|
||||
cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-show_entries', 'format=duration',
|
||||
'-of', 'default=noprint_wrappers=1:nokey=1',
|
||||
file_path
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
def mix_audio(
|
||||
self,
|
||||
voice_path: str,
|
||||
bgm_path: str,
|
||||
output_path: str,
|
||||
bgm_volume: float = 0.2
|
||||
) -> str:
|
||||
"""混合人声与背景音乐"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
volume = max(0.0, min(float(bgm_volume), 1.0))
|
||||
filter_complex = (
|
||||
f"[0:a]volume=1.0[a0];"
|
||||
f"[1:a]volume={volume}[a1];"
|
||||
f"[a0][a1]amix=inputs=2:duration=first:dropout_transition=2:normalize=0[aout]"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", voice_path,
|
||||
"-stream_loop", "-1", "-i", bgm_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[aout]",
|
||||
"-c:a", "pcm_s16le",
|
||||
"-shortest",
|
||||
output_path,
|
||||
]
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError("FFmpeg audio mix failed")
|
||||
|
||||
async def compose(
|
||||
self,
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
subtitle_path: Optional[str] = None
|
||||
) -> str:
|
||||
"""合成视频"""
|
||||
# Ensure output dir
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_duration = self._get_duration(video_path)
|
||||
audio_duration = self._get_duration(audio_path)
|
||||
|
||||
# Audio loop if needed
|
||||
loop_count = 1
|
||||
if audio_duration > video_duration and video_duration > 0:
|
||||
loop_count = int(audio_duration / video_duration) + 1
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
|
||||
# Input video (stream_loop must be before -i)
|
||||
if loop_count > 1:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
cmd.extend(["-i", video_path])
|
||||
|
||||
# Input audio
|
||||
cmd.extend(["-i", audio_path])
|
||||
|
||||
# Filter complex
|
||||
filter_complex = []
|
||||
|
||||
# Subtitles (skip for now to mimic previous state or implement basic)
|
||||
# Previous state: subtitles disabled due to font issues
|
||||
# if subtitle_path: ...
|
||||
|
||||
# Audio map with high quality encoding
|
||||
cmd.extend([
|
||||
"-c:v", "libx264",
|
||||
"-preset", "medium", # 平衡速度与压缩效率
|
||||
"-crf", "20", # 最终输出:高质量(肉眼无损)
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k", # 音频比特率
|
||||
"-shortest"
|
||||
])
|
||||
# Use audio from input 1
|
||||
cmd.extend(["-map", "0:v", "-map", "1:a"])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError("FFmpeg composition failed")
|
||||
|
||||
def concat_videos(self, video_paths: list, output_path: str, target_fps: int = 25) -> str:
|
||||
"""使用 FFmpeg concat demuxer 拼接多个视频片段"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video segments to concat")
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 生成 concat list 文件
|
||||
list_path = Path(output_path).parent / f"{Path(output_path).stem}_concat.txt"
|
||||
with open(list_path, "w", encoding="utf-8") as f:
|
||||
for vp in video_paths:
|
||||
f.write(f"file '{vp}'\n")
|
||||
|
||||
if not video_paths:
|
||||
raise ValueError("No video segments to concat")
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 生成 concat list 文件
|
||||
list_path = Path(output_path).parent / f"{Path(output_path).stem}_concat.txt"
|
||||
with open(list_path, "w", encoding="utf-8") as f:
|
||||
for vp in video_paths:
|
||||
f.write(f"file '{vp}'\n")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat",
|
||||
@@ -264,44 +264,44 @@ class VideoService:
|
||||
"-r", str(target_fps),
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "18",
|
||||
"-crf", "23",
|
||||
"-pix_fmt", "yuv420p",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
try:
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError("FFmpeg concat failed")
|
||||
finally:
|
||||
try:
|
||||
list_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def split_audio(self, audio_path: str, start: float, end: float, output_path: str) -> str:
|
||||
"""用 FFmpeg 按时间范围切分音频"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
duration = end - start
|
||||
if duration <= 0:
|
||||
raise ValueError(f"Invalid audio split range: start={start}, end={end}, duration={duration}")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(start),
|
||||
"-t", str(duration),
|
||||
"-i", audio_path,
|
||||
"-c", "copy",
|
||||
output_path,
|
||||
]
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError(f"FFmpeg audio split failed: {start}-{end}")
|
||||
|
||||
|
||||
try:
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
else:
|
||||
raise RuntimeError("FFmpeg concat failed")
|
||||
finally:
|
||||
try:
|
||||
list_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def split_audio(self, audio_path: str, start: float, end: float, output_path: str) -> str:
|
||||
"""用 FFmpeg 按时间范围切分音频"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
duration = end - start
|
||||
if duration <= 0:
|
||||
raise ValueError(f"Invalid audio split range: start={start}, end={end}, duration={duration}")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(start),
|
||||
"-t", str(duration),
|
||||
"-i", audio_path,
|
||||
"-c", "copy",
|
||||
output_path,
|
||||
]
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError(f"FFmpeg audio split failed: {start}-{end}")
|
||||
|
||||
def get_resolution(self, file_path: str) -> tuple[int, int]:
|
||||
"""获取视频有效显示分辨率(考虑旋转元数据)。"""
|
||||
info = self.get_video_metadata(file_path)
|
||||
@@ -309,7 +309,7 @@ class VideoService:
|
||||
int(info.get("effective_width") or 0),
|
||||
int(info.get("effective_height") or 0),
|
||||
)
|
||||
|
||||
|
||||
def prepare_segment(self, video_path: str, target_duration: float, output_path: str,
|
||||
target_resolution: Optional[tuple] = None, source_start: float = 0.0,
|
||||
source_end: Optional[float] = None, target_fps: Optional[int] = None) -> str:
|
||||
@@ -353,21 +353,21 @@ class VideoService:
|
||||
"-i", video_path,
|
||||
"-t", str(available),
|
||||
"-an",
|
||||
"-c:v", "libx264", "-preset", "fast", "-crf", "18",
|
||||
"-c:v", "libx264", "-preset", "fast", "-crf", "23",
|
||||
trim_temp,
|
||||
]
|
||||
if not self._run_ffmpeg(trim_cmd):
|
||||
raise RuntimeError(f"FFmpeg trim for loop failed: {video_path}")
|
||||
actual_input = trim_temp
|
||||
source_start = 0.0 # 已裁剪,不需要再 seek
|
||||
# 重新计算循环次数(基于裁剪后文件)
|
||||
available = self._get_duration(trim_temp) or available
|
||||
|
||||
loop_count = int(target_duration / available) + 1 if needs_loop else 0
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
if needs_loop:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
if not self._run_ffmpeg(trim_cmd):
|
||||
raise RuntimeError(f"FFmpeg trim for loop failed: {video_path}")
|
||||
actual_input = trim_temp
|
||||
source_start = 0.0 # 已裁剪,不需要再 seek
|
||||
# 重新计算循环次数(基于裁剪后文件)
|
||||
available = self._get_duration(trim_temp) or available
|
||||
|
||||
loop_count = int(target_duration / available) + 1 if needs_loop else 0
|
||||
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
if needs_loop:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
if source_start > 0:
|
||||
cmd.extend(["-ss", str(source_start)])
|
||||
cmd.extend(["-i", actual_input, "-t", str(target_duration), "-an"])
|
||||
@@ -386,20 +386,20 @@ class VideoService:
|
||||
|
||||
# 需要循环、缩放或指定起点时必须重编码,否则用 stream copy 保持原画质
|
||||
if needs_loop or needs_scale or source_start > 0 or has_source_end or needs_fps:
|
||||
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18"])
|
||||
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "23"])
|
||||
else:
|
||||
cmd.extend(["-c:v", "copy"])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
try:
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError(f"FFmpeg prepare_segment failed: {video_path}")
|
||||
finally:
|
||||
# 清理裁剪临时文件
|
||||
if trim_temp:
|
||||
try:
|
||||
Path(trim_temp).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
try:
|
||||
if self._run_ffmpeg(cmd):
|
||||
return output_path
|
||||
raise RuntimeError(f"FFmpeg prepare_segment failed: {video_path}")
|
||||
finally:
|
||||
# 清理裁剪临时文件
|
||||
if trim_temp:
|
||||
try:
|
||||
Path(trim_temp).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user