更新
This commit is contained in:
@@ -2,11 +2,11 @@
|
||||
依赖注入模块:认证和用户获取
|
||||
"""
|
||||
from typing import Optional, Any, Dict, cast
|
||||
from fastapi import Request, HTTPException, Depends, status
|
||||
from app.core.security import decode_access_token, TokenData
|
||||
from app.repositories.sessions import get_session
|
||||
from app.repositories.users import get_user_by_id
|
||||
from loguru import logger
|
||||
from fastapi import Request, HTTPException, Depends, status
|
||||
from app.core.security import decode_access_token
|
||||
from app.repositories.sessions import get_session, delete_sessions
|
||||
from app.repositories.users import get_user_by_id, deactivate_user_if_expired
|
||||
from loguru import logger
|
||||
|
||||
|
||||
async def get_token_from_cookie(request: Request) -> Optional[str]:
|
||||
@@ -35,8 +35,12 @@ async def get_current_user_optional(
|
||||
logger.warning(f"Session token 无效: user_id={token_data.user_id}")
|
||||
return None
|
||||
|
||||
user = get_user_by_id(token_data.user_id)
|
||||
return cast(Optional[Dict[str, Any]], user)
|
||||
user = cast(Optional[Dict[str, Any]], get_user_by_id(token_data.user_id))
|
||||
if user and deactivate_user_if_expired(user):
|
||||
delete_sessions(token_data.user_id)
|
||||
return None
|
||||
|
||||
return user
|
||||
except Exception as e:
|
||||
logger.error(f"获取用户信息失败: {e}")
|
||||
return None
|
||||
@@ -82,14 +86,12 @@ async def get_current_user(
|
||||
)
|
||||
user = cast(Dict[str, Any], user)
|
||||
|
||||
if user.get("expires_at"):
|
||||
from datetime import datetime, timezone
|
||||
expires_at = datetime.fromisoformat(user["expires_at"].replace("Z", "+00:00"))
|
||||
if datetime.now(timezone.utc) > expires_at:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="授权已过期,请联系管理员续期"
|
||||
)
|
||||
if deactivate_user_if_expired(user):
|
||||
delete_sessions(token_data.user_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="会员已到期,请续费"
|
||||
)
|
||||
|
||||
return user
|
||||
except HTTPException:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
认证 API:注册、登录、登出、修改密码
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Response, status, Request
|
||||
from fastapi import APIRouter, HTTPException, Response, status, Request, Depends
|
||||
from pydantic import BaseModel, field_validator
|
||||
from app.core.security import (
|
||||
get_password_hash,
|
||||
@@ -13,7 +13,15 @@ from app.core.security import (
|
||||
decode_access_token
|
||||
)
|
||||
from app.repositories.sessions import create_session, delete_sessions
|
||||
from app.repositories.users import create_user, get_user_by_id, get_user_by_phone, user_exists_by_phone, update_user
|
||||
from app.repositories.users import (
|
||||
create_user,
|
||||
get_user_by_id,
|
||||
get_user_by_phone,
|
||||
user_exists_by_phone,
|
||||
update_user,
|
||||
deactivate_user_if_expired,
|
||||
)
|
||||
from app.core.deps import get_current_user
|
||||
from app.core.response import success_response
|
||||
from loguru import logger
|
||||
from typing import Optional, Any, cast
|
||||
@@ -130,22 +138,20 @@ async def login(request: LoginRequest, response: Response):
|
||||
detail="手机号或密码错误"
|
||||
)
|
||||
|
||||
# 检查是否激活
|
||||
if not user["is_active"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="账号未激活,请等待管理员审核"
|
||||
)
|
||||
|
||||
# 检查授权是否过期
|
||||
if user.get("expires_at"):
|
||||
from datetime import datetime, timezone
|
||||
expires_at = datetime.fromisoformat(user["expires_at"].replace("Z", "+00:00"))
|
||||
if datetime.now(timezone.utc) > expires_at:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="授权已过期,请联系管理员续期"
|
||||
)
|
||||
# 授权过期时自动停用账号
|
||||
if deactivate_user_if_expired(user):
|
||||
delete_sessions(user["id"])
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="会员已到期,请续费"
|
||||
)
|
||||
|
||||
# 检查是否激活
|
||||
if not user["is_active"]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="账号未激活,请等待管理员审核"
|
||||
)
|
||||
|
||||
# 生成新的 session_token (后踢前)
|
||||
session_token = generate_session_token()
|
||||
@@ -258,31 +264,9 @@ async def change_password(request: ChangePasswordRequest, req: Request, response
|
||||
)
|
||||
|
||||
|
||||
@router.get("/me")
|
||||
async def get_me(request: Request):
|
||||
"""获取当前用户信息"""
|
||||
# 从 Cookie 获取用户
|
||||
token = request.cookies.get("access_token")
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="未登录"
|
||||
)
|
||||
|
||||
token_data = decode_access_token(token)
|
||||
if not token_data:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Token 无效"
|
||||
)
|
||||
|
||||
user = cast(dict[str, Any], get_user_by_id(token_data.user_id) or {})
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="用户不存在"
|
||||
)
|
||||
|
||||
@router.get("/me")
|
||||
async def get_me(user: dict = Depends(get_current_user)):
|
||||
"""获取当前用户信息"""
|
||||
return success_response(UserResponse(
|
||||
id=user["id"],
|
||||
phone=user["phone"],
|
||||
|
||||
@@ -9,6 +9,7 @@ class GenerateAudioRequest(BaseModel):
|
||||
ref_audio_id: Optional[str] = None
|
||||
ref_text: Optional[str] = None
|
||||
language: str = "zh-CN"
|
||||
speed: float = 1.0
|
||||
|
||||
|
||||
class RenameAudioRequest(BaseModel):
|
||||
|
||||
@@ -25,7 +25,7 @@ from app.modules.generated_audios.schemas import (
|
||||
BUCKET = "generated-audios"
|
||||
|
||||
|
||||
def _locale_to_qwen_lang(locale: str) -> str:
|
||||
def _locale_to_tts_lang(locale: str) -> str:
|
||||
mapping = {"zh": "Chinese", "en": "English"}
|
||||
return mapping.get(locale.split("-")[0], "Auto")
|
||||
|
||||
@@ -73,19 +73,20 @@ async def generate_audio_task(task_id: str, req: GenerateAudioRequest, user_id:
|
||||
async for chunk in resp.aiter_bytes():
|
||||
f.write(chunk)
|
||||
|
||||
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音 (Qwen3-TTS)..."})
|
||||
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音..."})
|
||||
await voice_clone_service.generate_audio(
|
||||
text=req.text,
|
||||
ref_audio_path=ref_local,
|
||||
ref_text=req.ref_text,
|
||||
output_path=audio_path,
|
||||
language=_locale_to_qwen_lang(req.language),
|
||||
language=_locale_to_tts_lang(req.language),
|
||||
speed=req.speed,
|
||||
)
|
||||
finally:
|
||||
if os.path.exists(ref_local):
|
||||
os.unlink(ref_local)
|
||||
else:
|
||||
task_store.update(task_id, {"progress": 30, "message": "正在生成语音 (EdgeTTS)..."})
|
||||
task_store.update(task_id, {"progress": 30, "message": "正在生成语音..."})
|
||||
tts = TTSService()
|
||||
await tts.generate_audio(req.text, req.voice, audio_path)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ router = APIRouter()
|
||||
@router.post("")
|
||||
async def upload_ref_audio(
|
||||
file: UploadFile = File(...),
|
||||
ref_text: str = Form(...),
|
||||
ref_text: str = Form(""),
|
||||
user: dict = Depends(get_current_user)
|
||||
):
|
||||
"""上传参考音频"""
|
||||
@@ -68,3 +68,21 @@ async def rename_ref_audio(
|
||||
except Exception as e:
|
||||
logger.error(f"重命名失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/{audio_id:path}/retranscribe")
|
||||
async def retranscribe_ref_audio(
|
||||
audio_id: str,
|
||||
user: dict = Depends(get_current_user)
|
||||
):
|
||||
"""重新识别参考音频的文字内容"""
|
||||
try:
|
||||
result = await service.retranscribe_ref_audio(audio_id, user["id"])
|
||||
return success_response(result, message="识别完成")
|
||||
except PermissionError as e:
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"重新识别失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"识别失败: {str(e)}")
|
||||
|
||||
@@ -41,16 +41,40 @@ def _get_audio_duration(file_path: str) -> float:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _convert_to_wav(input_path: str, output_path: str) -> bool:
|
||||
"""将音频转换为 WAV 格式 (16kHz, mono)"""
|
||||
def _find_silence_cut_point(file_path: str, max_duration: float) -> float:
|
||||
"""在 max_duration 附近找一个静音点作为截取位置,找不到则回退到 max_duration"""
|
||||
try:
|
||||
subprocess.run([
|
||||
'ffmpeg', '-y', '-i', input_path,
|
||||
'-ar', '16000',
|
||||
'-ac', '1',
|
||||
'-acodec', 'pcm_s16le',
|
||||
output_path
|
||||
], capture_output=True, timeout=60, check=True)
|
||||
# 用 silencedetect 找所有静音段(阈值 -30dB,最短 0.3 秒)
|
||||
result = subprocess.run(
|
||||
['ffmpeg', '-i', file_path, '-af',
|
||||
'silencedetect=noise=-30dB:d=0.3', '-f', 'null', '-'],
|
||||
capture_output=True, text=True, timeout=30
|
||||
)
|
||||
# 解析 silence_end 时间点
|
||||
import re as _re
|
||||
ends = [float(m) for m in _re.findall(r'silence_end:\s*([\d.]+)', result.stderr)]
|
||||
# 找 max_duration 之前最后一个静音结束点(至少 3 秒)
|
||||
candidates = [t for t in ends if 3.0 <= t <= max_duration]
|
||||
if candidates:
|
||||
cut = candidates[-1]
|
||||
logger.info(f"Found silence cut point at {cut:.1f}s (max={max_duration}s)")
|
||||
return cut
|
||||
except Exception as e:
|
||||
logger.warning(f"Silence detection failed: {e}")
|
||||
return max_duration
|
||||
|
||||
|
||||
def _convert_to_wav(input_path: str, output_path: str, max_duration: float = 0) -> bool:
|
||||
"""将音频转换为 WAV 格式 (16kHz, mono),可选截取前 max_duration 秒并淡出"""
|
||||
try:
|
||||
cmd = ['ffmpeg', '-y', '-i', input_path]
|
||||
if max_duration > 0:
|
||||
cmd += ['-t', str(max_duration)]
|
||||
# 末尾 0.1 秒淡出,避免截断爆音
|
||||
fade_start = max(0, max_duration - 0.1)
|
||||
cmd += ['-af', f'afade=t=out:st={fade_start}:d=0.1']
|
||||
cmd += ['-ar', '16000', '-ac', '1', '-acodec', 'pcm_s16le', output_path]
|
||||
subprocess.run(cmd, capture_output=True, timeout=60, check=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"音频转换失败: {e}")
|
||||
@@ -67,9 +91,6 @@ async def upload_ref_audio(file, ref_text: str, user_id: str) -> dict:
|
||||
if ext not in ALLOWED_AUDIO_EXTENSIONS:
|
||||
raise ValueError(f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}")
|
||||
|
||||
if not ref_text or len(ref_text.strip()) < 2:
|
||||
raise ValueError("参考文字不能为空")
|
||||
|
||||
# 创建临时文件
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
|
||||
content = await file.read()
|
||||
@@ -86,8 +107,31 @@ async def upload_ref_audio(file, ref_text: str, user_id: str) -> dict:
|
||||
duration = _get_audio_duration(tmp_wav_path)
|
||||
if duration < 1.0:
|
||||
raise ValueError("音频时长过短,至少需要 1 秒")
|
||||
if duration > 60.0:
|
||||
raise ValueError("音频时长过长,最多 60 秒")
|
||||
|
||||
# 超过 10 秒自动在静音点截取(CosyVoice 对 3-10 秒效果最好)
|
||||
MAX_REF_DURATION = 10.0
|
||||
if duration > MAX_REF_DURATION:
|
||||
cut_point = _find_silence_cut_point(tmp_wav_path, MAX_REF_DURATION)
|
||||
logger.info(f"Ref audio {duration:.1f}s > {MAX_REF_DURATION}s, trimming at {cut_point:.1f}s")
|
||||
trimmed_path = tmp_input_path + "_trimmed.wav"
|
||||
if not _convert_to_wav(tmp_wav_path, trimmed_path, max_duration=cut_point):
|
||||
raise RuntimeError("音频截取失败")
|
||||
os.unlink(tmp_wav_path)
|
||||
tmp_wav_path = trimmed_path
|
||||
duration = _get_audio_duration(tmp_wav_path)
|
||||
|
||||
# 自动转写参考音频内容
|
||||
try:
|
||||
from app.services.whisper_service import whisper_service
|
||||
transcribed = await whisper_service.transcribe(tmp_wav_path)
|
||||
if transcribed.strip():
|
||||
ref_text = transcribed.strip()
|
||||
logger.info(f"Auto-transcribed ref audio: {ref_text[:50]}...")
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto-transcribe failed: {e}")
|
||||
|
||||
if not ref_text or not ref_text.strip():
|
||||
raise ValueError("无法识别音频内容,请确保音频包含清晰的语音")
|
||||
|
||||
# 检查重名
|
||||
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
|
||||
@@ -267,3 +311,85 @@ async def rename_ref_audio(audio_id: str, new_name: str, user_id: str) -> dict:
|
||||
)
|
||||
|
||||
return {"name": new_name}
|
||||
|
||||
|
||||
async def retranscribe_ref_audio(audio_id: str, user_id: str) -> dict:
|
||||
"""重新转写参考音频的 ref_text,并截取前 10 秒重新上传(用于迁移旧数据)"""
|
||||
if not audio_id.startswith(f"{user_id}/"):
|
||||
raise PermissionError("无权修改此文件")
|
||||
|
||||
# 下载音频到临时文件
|
||||
audio_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, audio_id)
|
||||
tmp_wav_path = None
|
||||
trimmed_path = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
||||
tmp_wav_path = tmp.name
|
||||
timeout = httpx.Timeout(None)
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
async with client.stream("GET", audio_url) as resp:
|
||||
resp.raise_for_status()
|
||||
async for chunk in resp.aiter_bytes():
|
||||
tmp.write(chunk)
|
||||
|
||||
# 超过 10 秒则截取前 10 秒并重新上传音频
|
||||
MAX_REF_DURATION = 10.0
|
||||
duration = _get_audio_duration(tmp_wav_path)
|
||||
transcribe_path = tmp_wav_path
|
||||
need_reupload = False
|
||||
|
||||
if duration > MAX_REF_DURATION:
|
||||
cut_point = _find_silence_cut_point(tmp_wav_path, MAX_REF_DURATION)
|
||||
logger.info(f"Retranscribe: trimming {audio_id} from {duration:.1f}s at {cut_point:.1f}s")
|
||||
trimmed_path = tmp_wav_path + "_trimmed.wav"
|
||||
if _convert_to_wav(tmp_wav_path, trimmed_path, max_duration=cut_point):
|
||||
transcribe_path = trimmed_path
|
||||
duration = _get_audio_duration(trimmed_path)
|
||||
need_reupload = True
|
||||
|
||||
# Whisper 转写
|
||||
from app.services.whisper_service import whisper_service
|
||||
transcribed = await whisper_service.transcribe(transcribe_path)
|
||||
if not transcribed or not transcribed.strip():
|
||||
raise ValueError("无法识别音频内容")
|
||||
|
||||
ref_text = transcribed.strip()
|
||||
logger.info(f"Re-transcribed ref audio {audio_id}: {ref_text[:50]}...")
|
||||
|
||||
# 截取过的音频重新上传覆盖原文件
|
||||
if need_reupload and trimmed_path:
|
||||
with open(trimmed_path, "rb") as f:
|
||||
await storage_service.upload_file(
|
||||
bucket=BUCKET_REF_AUDIOS, path=audio_id,
|
||||
file_data=f.read(), content_type="audio/wav",
|
||||
)
|
||||
logger.info(f"Re-uploaded trimmed audio: {audio_id} ({duration:.1f}s)")
|
||||
|
||||
# 更新 metadata
|
||||
metadata_path = audio_id.replace(".wav", ".json")
|
||||
try:
|
||||
meta_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(meta_url)
|
||||
if resp.status_code == 200:
|
||||
metadata = resp.json()
|
||||
else:
|
||||
raise Exception(f"status {resp.status_code}")
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
metadata["ref_text"] = ref_text
|
||||
metadata["duration_sec"] = duration
|
||||
await storage_service.upload_file(
|
||||
bucket=BUCKET_REF_AUDIOS,
|
||||
path=metadata_path,
|
||||
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
return {"ref_text": ref_text, "duration_sec": duration}
|
||||
finally:
|
||||
if tmp_wav_path and os.path.exists(tmp_wav_path):
|
||||
os.unlink(tmp_wav_path)
|
||||
if trimmed_path and os.path.exists(trimmed_path):
|
||||
os.unlink(trimmed_path)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
from typing import Optional, List, Literal
|
||||
|
||||
|
||||
class CustomAssignment(BaseModel):
|
||||
@@ -7,6 +7,7 @@ class CustomAssignment(BaseModel):
|
||||
start: float # 音频时间轴起点
|
||||
end: float # 音频时间轴终点
|
||||
source_start: float = 0.0 # 源视频截取起点
|
||||
source_end: Optional[float] = None # 源视频截取终点(可选)
|
||||
|
||||
|
||||
class GenerateRequest(BaseModel):
|
||||
@@ -30,3 +31,4 @@ class GenerateRequest(BaseModel):
|
||||
bgm_id: Optional[str] = None
|
||||
bgm_volume: Optional[float] = 0.2
|
||||
custom_assignments: Optional[List[CustomAssignment]] = None
|
||||
output_aspect_ratio: Literal["9:16", "16:9"] = "9:16"
|
||||
|
||||
@@ -29,7 +29,7 @@ def _locale_to_whisper_lang(locale: str) -> str:
|
||||
return locale.split("-")[0] if "-" in locale else locale
|
||||
|
||||
|
||||
def _locale_to_qwen_lang(locale: str) -> str:
|
||||
def _locale_to_tts_lang(locale: str) -> str:
|
||||
"""'zh-CN' → 'Chinese', 'en-US' → 'English', 其他 → 'Auto'"""
|
||||
mapping = {"zh": "Chinese", "en": "English"}
|
||||
return mapping.get(locale.split("-")[0], "Auto")
|
||||
@@ -174,17 +174,27 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
|
||||
# ── 确定素材列表 ──
|
||||
material_paths: List[str] = []
|
||||
if req.material_paths and len(req.material_paths) > 1:
|
||||
if req.custom_assignments and len(req.custom_assignments) > 1:
|
||||
material_paths = [a.material_path for a in req.custom_assignments if a.material_path]
|
||||
elif req.material_paths and len(req.material_paths) > 1:
|
||||
material_paths = req.material_paths
|
||||
else:
|
||||
material_paths = [req.material_path]
|
||||
|
||||
is_multi = len(material_paths) > 1
|
||||
target_resolution = (1080, 1920) if req.output_aspect_ratio == "9:16" else (1920, 1080)
|
||||
|
||||
logger.info(
|
||||
f"[Render] 输出画面比例: {req.output_aspect_ratio}, "
|
||||
f"目标分辨率: {target_resolution[0]}x{target_resolution[1]}"
|
||||
)
|
||||
|
||||
_update_task(task_id, status="processing", progress=5, message="正在下载素材...")
|
||||
|
||||
temp_dir = settings.UPLOAD_DIR / "temp"
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
video = VideoService()
|
||||
input_material_path: Optional[Path] = None
|
||||
|
||||
# 单素材模式:下载主素材
|
||||
if not is_multi:
|
||||
@@ -192,6 +202,16 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
temp_files.append(input_material_path)
|
||||
await _download_material(material_paths[0], input_material_path)
|
||||
|
||||
# 归一化旋转元数据(如 iPhone MOV 1920x1080 + rotation=-90)
|
||||
normalized_input_path = temp_dir / f"{task_id}_input_norm.mp4"
|
||||
normalized_result = video.normalize_orientation(
|
||||
str(input_material_path),
|
||||
str(normalized_input_path),
|
||||
)
|
||||
if normalized_result != str(input_material_path):
|
||||
temp_files.append(normalized_input_path)
|
||||
input_material_path = normalized_input_path
|
||||
|
||||
_update_task(task_id, message="正在生成语音...", progress=10)
|
||||
|
||||
audio_path = temp_dir / f"{task_id}_audio.wav"
|
||||
@@ -218,8 +238,10 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
if resp.status_code == 200:
|
||||
meta = resp.json()
|
||||
req.language = meta.get("language", req.language)
|
||||
if not req.text.strip():
|
||||
req.text = meta.get("text", req.text)
|
||||
# 无条件用配音元数据覆盖文案,确保字幕与配音语言一致
|
||||
meta_text = meta.get("text", "")
|
||||
if meta_text:
|
||||
req.text = meta_text
|
||||
except Exception as e:
|
||||
logger.warning(f"读取配音元数据失败: {e}")
|
||||
|
||||
@@ -238,13 +260,13 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
)
|
||||
await _download_material(ref_audio_url, ref_audio_local)
|
||||
|
||||
_update_task(task_id, message="正在克隆声音 (Qwen3-TTS)...")
|
||||
_update_task(task_id, message="正在克隆声音...")
|
||||
await voice_clone_service.generate_audio(
|
||||
text=req.text,
|
||||
ref_audio_path=str(ref_audio_local),
|
||||
ref_text=req.ref_text,
|
||||
output_path=str(audio_path),
|
||||
language=_locale_to_qwen_lang(req.language)
|
||||
language=_locale_to_tts_lang(req.language)
|
||||
)
|
||||
else:
|
||||
_update_task(task_id, message="正在生成语音 (EdgeTTS)...")
|
||||
@@ -258,7 +280,6 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
lipsync_video_path = temp_dir / f"{task_id}_lipsync.mp4"
|
||||
temp_files.append(lipsync_video_path)
|
||||
|
||||
video = VideoService()
|
||||
captions_path = None
|
||||
|
||||
if is_multi:
|
||||
@@ -267,7 +288,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
# ══════════════════════════════════════
|
||||
_update_task(task_id, progress=12, message="正在分配素材...")
|
||||
|
||||
if req.custom_assignments:
|
||||
if req.custom_assignments and len(req.custom_assignments) == len(material_paths):
|
||||
# 用户自定义分配,跳过 Whisper 均分
|
||||
assignments = [
|
||||
{
|
||||
@@ -275,6 +296,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
"start": a.start,
|
||||
"end": a.end,
|
||||
"source_start": a.source_start,
|
||||
"source_end": a.source_end,
|
||||
"index": i,
|
||||
}
|
||||
for i, a in enumerate(req.custom_assignments)
|
||||
@@ -290,6 +312,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
text=req.text,
|
||||
output_path=str(captions_path),
|
||||
language=_locale_to_whisper_lang(req.language),
|
||||
original_text=req.text,
|
||||
)
|
||||
print(f"[Pipeline] Whisper alignment completed (custom assignments)")
|
||||
except Exception as e:
|
||||
@@ -297,6 +320,49 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
captions_path = None
|
||||
else:
|
||||
captions_path = None
|
||||
elif req.custom_assignments:
|
||||
logger.warning(
|
||||
f"[MultiMat] custom_assignments 数量({len(req.custom_assignments)})"
|
||||
f" 与素材数量({len(material_paths)})不一致,回退自动分配"
|
||||
)
|
||||
|
||||
# 原有逻辑:Whisper → _split_equal
|
||||
_update_task(task_id, message="正在生成字幕 (Whisper)...")
|
||||
|
||||
captions_path = temp_dir / f"{task_id}_captions.json"
|
||||
temp_files.append(captions_path)
|
||||
|
||||
try:
|
||||
captions_data = await whisper_service.align(
|
||||
audio_path=str(audio_path),
|
||||
text=req.text,
|
||||
output_path=str(captions_path),
|
||||
language=_locale_to_whisper_lang(req.language),
|
||||
original_text=req.text,
|
||||
)
|
||||
print(f"[Pipeline] Whisper alignment completed (multi-material)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Whisper alignment failed: {e}")
|
||||
captions_data = None
|
||||
captions_path = None
|
||||
|
||||
_update_task(task_id, progress=15, message="正在分配素材...")
|
||||
|
||||
if captions_data and captions_data.get("segments"):
|
||||
assignments = _split_equal(captions_data["segments"], material_paths)
|
||||
else:
|
||||
# Whisper 失败 → 按时长均分(不依赖字符对齐)
|
||||
logger.warning("[MultiMat] Whisper 无数据,按时长均分")
|
||||
audio_dur = video._get_duration(str(audio_path))
|
||||
if audio_dur <= 0:
|
||||
audio_dur = 30.0 # 安全兜底
|
||||
seg_dur = audio_dur / len(material_paths)
|
||||
assignments = [
|
||||
{"material_path": material_paths[i], "start": i * seg_dur,
|
||||
"end": (i + 1) * seg_dur, "index": i}
|
||||
for i in range(len(material_paths))
|
||||
]
|
||||
|
||||
else:
|
||||
# 原有逻辑:Whisper → _split_equal
|
||||
_update_task(task_id, message="正在生成字幕 (Whisper)...")
|
||||
@@ -310,6 +376,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
text=req.text,
|
||||
output_path=str(captions_path),
|
||||
language=_locale_to_whisper_lang(req.language),
|
||||
original_text=req.text,
|
||||
)
|
||||
print(f"[Pipeline] Whisper alignment completed (multi-material)")
|
||||
except Exception as e:
|
||||
@@ -356,12 +423,23 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
material_local = temp_dir / f"{task_id}_material_{i}.mp4"
|
||||
temp_files.append(material_local)
|
||||
await _download_material(assignment["material_path"], material_local)
|
||||
|
||||
# 归一化旋转元数据,确保分辨率判断与后续推理一致
|
||||
normalized_material = temp_dir / f"{task_id}_material_{i}_norm.mp4"
|
||||
normalized_result = video.normalize_orientation(
|
||||
str(material_local),
|
||||
str(normalized_material),
|
||||
)
|
||||
if normalized_result != str(material_local):
|
||||
temp_files.append(normalized_material)
|
||||
material_local = normalized_material
|
||||
|
||||
material_locals.append(material_local)
|
||||
resolutions.append(video.get_resolution(str(material_local)))
|
||||
|
||||
# 分辨率不一致时,统一到第一个素材的分辨率
|
||||
base_res = resolutions[0] if resolutions else (0, 0)
|
||||
need_scale = any(r != base_res for r in resolutions) and base_res[0] > 0
|
||||
# 按用户选择的画面比例统一分辨率
|
||||
base_res = target_resolution
|
||||
need_scale = any(r != base_res for r in resolutions)
|
||||
if need_scale:
|
||||
logger.info(f"[MultiMat] 素材分辨率不一致,统一到 {base_res[0]}x{base_res[1]}")
|
||||
|
||||
@@ -381,8 +459,11 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
temp_files.append(prepared_path)
|
||||
video.prepare_segment(
|
||||
str(material_locals[i]), seg_dur, str(prepared_path),
|
||||
target_resolution=base_res if need_scale else None,
|
||||
# 多素材拼接前统一重编码为同分辨率/同编码,避免 concat 仅保留首段
|
||||
target_resolution=base_res,
|
||||
source_start=assignment.get("source_start", 0.0),
|
||||
source_end=assignment.get("source_end"),
|
||||
target_fps=25,
|
||||
)
|
||||
prepared_segments.append(prepared_path)
|
||||
|
||||
@@ -392,7 +473,8 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
temp_files.append(concat_path)
|
||||
video.concat_videos(
|
||||
[str(p) for p in prepared_segments],
|
||||
str(concat_path)
|
||||
str(concat_path),
|
||||
target_fps=25,
|
||||
)
|
||||
|
||||
# ── 第三步:一次 LatentSync 推理 ──
|
||||
@@ -425,23 +507,31 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
# 单素材流水线(原有逻辑)
|
||||
# ══════════════════════════════════════
|
||||
|
||||
# 单素材 + source_start:先截取片段
|
||||
if input_material_path is None:
|
||||
raise RuntimeError("单素材流程缺少输入素材")
|
||||
|
||||
# 单素材:按用户选择画面比例统一到目标分辨率,并应用 source_start
|
||||
single_source_start = 0.0
|
||||
single_source_end = None
|
||||
if req.custom_assignments and len(req.custom_assignments) == 1:
|
||||
single_source_start = req.custom_assignments[0].source_start
|
||||
single_source_end = req.custom_assignments[0].source_end
|
||||
|
||||
if single_source_start > 0:
|
||||
_update_task(task_id, progress=20, message="正在截取素材片段...")
|
||||
audio_dur = video._get_duration(str(audio_path))
|
||||
if audio_dur <= 0:
|
||||
audio_dur = 30.0
|
||||
trimmed_path = temp_dir / f"{task_id}_trimmed.mp4"
|
||||
temp_files.append(trimmed_path)
|
||||
video.prepare_segment(
|
||||
str(input_material_path), audio_dur, str(trimmed_path),
|
||||
source_start=single_source_start,
|
||||
)
|
||||
input_material_path = trimmed_path
|
||||
_update_task(task_id, progress=20, message="正在准备素材片段...")
|
||||
audio_dur = video._get_duration(str(audio_path))
|
||||
if audio_dur <= 0:
|
||||
audio_dur = 30.0
|
||||
prepared_single_path = temp_dir / f"{task_id}_prepared_single.mp4"
|
||||
temp_files.append(prepared_single_path)
|
||||
video.prepare_segment(
|
||||
str(input_material_path),
|
||||
audio_dur,
|
||||
str(prepared_single_path),
|
||||
target_resolution=target_resolution,
|
||||
source_start=single_source_start,
|
||||
source_end=single_source_end,
|
||||
)
|
||||
input_material_path = prepared_single_path
|
||||
|
||||
_update_task(task_id, progress=25)
|
||||
_update_task(task_id, message="正在合成唇形 (LatentSync)...", progress=30)
|
||||
@@ -476,6 +566,7 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
|
||||
text=req.text,
|
||||
output_path=str(captions_path),
|
||||
language=_locale_to_whisper_lang(req.language),
|
||||
original_text=req.text,
|
||||
)
|
||||
print(f"[Pipeline] Whisper alignment completed")
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional, cast
|
||||
|
||||
from app.core.supabase import get_supabase
|
||||
@@ -37,3 +38,33 @@ def update_user(user_id: str, payload: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
supabase = get_supabase()
|
||||
result = supabase.table("users").update(payload).eq("id", user_id).execute()
|
||||
return cast(List[Dict[str, Any]], result.data or [])
|
||||
|
||||
|
||||
def _parse_expires_at(expires_at: Any) -> Optional[datetime]:
|
||||
try:
|
||||
expires_at_dt = datetime.fromisoformat(str(expires_at).replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if expires_at_dt.tzinfo is None:
|
||||
expires_at_dt = expires_at_dt.replace(tzinfo=timezone.utc)
|
||||
return expires_at_dt.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def deactivate_user_if_expired(user: Dict[str, Any]) -> bool:
|
||||
expires_at = user.get("expires_at")
|
||||
if not expires_at:
|
||||
return False
|
||||
|
||||
expires_at_dt = _parse_expires_at(expires_at)
|
||||
if not expires_at_dt:
|
||||
return False
|
||||
|
||||
if datetime.now(timezone.utc) <= expires_at_dt:
|
||||
return False
|
||||
|
||||
user_id = user.get("id")
|
||||
if user.get("is_active") and user_id:
|
||||
update_user(cast(str, user_id), {"is_active": False})
|
||||
|
||||
return True
|
||||
|
||||
@@ -9,9 +9,110 @@ from pathlib import Path
|
||||
from loguru import logger
|
||||
from typing import Optional
|
||||
|
||||
class VideoService:
|
||||
def __init__(self):
|
||||
pass
|
||||
class VideoService:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_video_metadata(self, file_path: str) -> dict:
|
||||
"""获取视频元信息(含旋转角与有效显示分辨率)"""
|
||||
cmd = [
|
||||
"ffprobe", "-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=width,height:stream_side_data=rotation",
|
||||
"-of", "json",
|
||||
file_path,
|
||||
]
|
||||
default_info = {
|
||||
"width": 0,
|
||||
"height": 0,
|
||||
"rotation": 0,
|
||||
"effective_width": 0,
|
||||
"effective_height": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
if result.returncode != 0:
|
||||
return default_info
|
||||
|
||||
payload = json.loads(result.stdout or "{}")
|
||||
streams = payload.get("streams") or []
|
||||
if not streams:
|
||||
return default_info
|
||||
|
||||
stream = streams[0]
|
||||
width = int(stream.get("width") or 0)
|
||||
height = int(stream.get("height") or 0)
|
||||
|
||||
rotation = 0
|
||||
for side_data in stream.get("side_data_list") or []:
|
||||
if not isinstance(side_data, dict):
|
||||
continue
|
||||
raw_rotation = side_data.get("rotation")
|
||||
if raw_rotation is None:
|
||||
continue
|
||||
try:
|
||||
rotation = int(round(float(str(raw_rotation))))
|
||||
except Exception:
|
||||
rotation = 0
|
||||
break
|
||||
|
||||
norm_rotation = rotation % 360
|
||||
if norm_rotation > 180:
|
||||
norm_rotation -= 360
|
||||
swap_wh = abs(norm_rotation) == 90
|
||||
|
||||
effective_width = height if swap_wh else width
|
||||
effective_height = width if swap_wh else height
|
||||
|
||||
return {
|
||||
"width": width,
|
||||
"height": height,
|
||||
"rotation": norm_rotation,
|
||||
"effective_width": effective_width,
|
||||
"effective_height": effective_height,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"获取视频元信息失败: {e}")
|
||||
return default_info
|
||||
|
||||
def normalize_orientation(self, video_path: str, output_path: str) -> str:
|
||||
"""将带旋转元数据的视频转为物理方向,避免后续流程忽略 rotation。"""
|
||||
info = self.get_video_metadata(video_path)
|
||||
rotation = int(info.get("rotation") or 0)
|
||||
if rotation == 0:
|
||||
return video_path
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(
|
||||
f"检测到旋转元数据 rotation={rotation},归一化方向: "
|
||||
f"{info.get('effective_width', 0)}x{info.get('effective_height', 0)}"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", video_path,
|
||||
"-map", "0:v:0",
|
||||
"-map", "0:a?",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "18",
|
||||
"-c:a", "copy",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
if self._run_ffmpeg(cmd):
|
||||
normalized = self.get_video_metadata(output_path)
|
||||
logger.info(
|
||||
"视频方向归一化完成: "
|
||||
f"coded={normalized.get('width', 0)}x{normalized.get('height', 0)}, "
|
||||
f"rotation={normalized.get('rotation', 0)}"
|
||||
)
|
||||
return output_path
|
||||
|
||||
logger.warning("视频方向归一化失败,回退使用原视频")
|
||||
return video_path
|
||||
|
||||
def _run_ffmpeg(self, cmd: list) -> bool:
|
||||
cmd_str = ' '.join(shlex.quote(str(c)) for c in cmd)
|
||||
@@ -139,8 +240,8 @@ class VideoService:
|
||||
else:
|
||||
raise RuntimeError("FFmpeg composition failed")
|
||||
|
||||
def concat_videos(self, video_paths: list, output_path: str) -> str:
|
||||
"""使用 FFmpeg concat demuxer 拼接多个视频片段"""
|
||||
def concat_videos(self, video_paths: list, output_path: str, target_fps: int = 25) -> str:
|
||||
"""使用 FFmpeg concat demuxer 拼接多个视频片段"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video segments to concat")
|
||||
|
||||
@@ -152,14 +253,22 @@ class VideoService:
|
||||
for vp in video_paths:
|
||||
f.write(f"file '{vp}'\n")
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-i", str(list_path),
|
||||
"-c", "copy",
|
||||
output_path,
|
||||
]
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-fflags", "+genpts",
|
||||
"-i", str(list_path),
|
||||
"-an",
|
||||
"-vsync", "cfr",
|
||||
"-r", str(target_fps),
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "18",
|
||||
"-pix_fmt", "yuv420p",
|
||||
"-movflags", "+faststart",
|
||||
output_path,
|
||||
]
|
||||
|
||||
try:
|
||||
if self._run_ffmpeg(cmd):
|
||||
@@ -193,54 +302,60 @@ class VideoService:
|
||||
return output_path
|
||||
raise RuntimeError(f"FFmpeg audio split failed: {start}-{end}")
|
||||
|
||||
def get_resolution(self, file_path: str) -> tuple:
|
||||
"""获取视频分辨率,返回 (width, height)"""
|
||||
cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-select_streams', 'v:0',
|
||||
'-show_entries', 'stream=width,height',
|
||||
'-of', 'csv=p=0',
|
||||
file_path
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
parts = result.stdout.strip().split(',')
|
||||
return (int(parts[0]), int(parts[1]))
|
||||
except Exception:
|
||||
return (0, 0)
|
||||
def get_resolution(self, file_path: str) -> tuple[int, int]:
|
||||
"""获取视频有效显示分辨率(考虑旋转元数据)。"""
|
||||
info = self.get_video_metadata(file_path)
|
||||
return (
|
||||
int(info.get("effective_width") or 0),
|
||||
int(info.get("effective_height") or 0),
|
||||
)
|
||||
|
||||
def prepare_segment(self, video_path: str, target_duration: float, output_path: str,
|
||||
target_resolution: tuple = None, source_start: float = 0.0) -> str:
|
||||
"""将素材视频裁剪或循环到指定时长(无音频)。
|
||||
target_resolution: (width, height) 如需统一分辨率则传入,否则保持原分辨率。
|
||||
source_start: 源视频截取起点(秒),默认 0。
|
||||
"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_dur = self._get_duration(video_path)
|
||||
if video_dur <= 0:
|
||||
video_dur = target_duration
|
||||
|
||||
# 可用时长 = 从 source_start 到视频结尾
|
||||
available = max(video_dur - source_start, 0.1)
|
||||
needs_loop = target_duration > available
|
||||
needs_scale = target_resolution is not None
|
||||
|
||||
# 当需要循环且有 source_start 时,先裁剪出片段,再循环裁剪后的文件
|
||||
# 避免 stream_loop 循环整个视频(而不是从 source_start 开始的片段)
|
||||
actual_input = video_path
|
||||
trim_temp = None
|
||||
if needs_loop and source_start > 0:
|
||||
trim_temp = str(Path(output_path).parent / (Path(output_path).stem + "_trim_tmp.mp4"))
|
||||
trim_cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(source_start),
|
||||
"-i", video_path,
|
||||
"-t", str(available),
|
||||
"-an",
|
||||
"-c:v", "libx264", "-preset", "fast", "-crf", "18",
|
||||
trim_temp,
|
||||
]
|
||||
def prepare_segment(self, video_path: str, target_duration: float, output_path: str,
|
||||
target_resolution: Optional[tuple] = None, source_start: float = 0.0,
|
||||
source_end: Optional[float] = None, target_fps: Optional[int] = None) -> str:
|
||||
"""将素材视频裁剪或循环到指定时长(无音频)。
|
||||
target_resolution: (width, height) 如需统一分辨率则传入,否则保持原分辨率。
|
||||
source_start: 源视频截取起点(秒),默认 0。
|
||||
source_end: 源视频截取终点(秒),默认到素材结尾。
|
||||
target_fps: 输出帧率(可选),用于多素材拼接前统一时间基。
|
||||
"""
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
video_dur = self._get_duration(video_path)
|
||||
if video_dur <= 0:
|
||||
video_dur = target_duration
|
||||
|
||||
clip_end = video_dur
|
||||
if source_end is not None:
|
||||
try:
|
||||
source_end_value = float(source_end)
|
||||
if source_end_value > source_start:
|
||||
clip_end = min(source_end_value, video_dur)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 可用时长 = 从 source_start 到视频结尾
|
||||
available = max(clip_end - source_start, 0.1)
|
||||
needs_loop = target_duration > available
|
||||
needs_scale = target_resolution is not None
|
||||
needs_fps = bool(target_fps and target_fps > 0)
|
||||
has_source_end = clip_end < video_dur
|
||||
|
||||
# 当需要循环且存在截取范围时,先裁剪出片段,再循环裁剪后的文件
|
||||
# 避免 stream_loop 循环整个视频(而不是截取后的片段)
|
||||
actual_input = video_path
|
||||
trim_temp = None
|
||||
if needs_loop and (source_start > 0 or has_source_end):
|
||||
trim_temp = str(Path(output_path).parent / (Path(output_path).stem + "_trim_tmp.mp4"))
|
||||
trim_cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-ss", str(source_start),
|
||||
"-i", video_path,
|
||||
"-t", str(available),
|
||||
"-an",
|
||||
"-c:v", "libx264", "-preset", "fast", "-crf", "18",
|
||||
trim_temp,
|
||||
]
|
||||
if not self._run_ffmpeg(trim_cmd):
|
||||
raise RuntimeError(f"FFmpeg trim for loop failed: {video_path}")
|
||||
actual_input = trim_temp
|
||||
@@ -253,19 +368,27 @@ class VideoService:
|
||||
cmd = ["ffmpeg", "-y"]
|
||||
if needs_loop:
|
||||
cmd.extend(["-stream_loop", str(loop_count)])
|
||||
if source_start > 0:
|
||||
cmd.extend(["-ss", str(source_start)])
|
||||
cmd.extend(["-i", actual_input, "-t", str(target_duration), "-an"])
|
||||
|
||||
if needs_scale:
|
||||
w, h = target_resolution
|
||||
cmd.extend(["-vf", f"scale={w}:{h}:force_original_aspect_ratio=decrease,pad={w}:{h}:(ow-iw)/2:(oh-ih)/2"])
|
||||
|
||||
# 需要循环、缩放或指定起点时必须重编码,否则用 stream copy 保持原画质
|
||||
if needs_loop or needs_scale or source_start > 0:
|
||||
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18"])
|
||||
else:
|
||||
cmd.extend(["-c:v", "copy"])
|
||||
if source_start > 0:
|
||||
cmd.extend(["-ss", str(source_start)])
|
||||
cmd.extend(["-i", actual_input, "-t", str(target_duration), "-an"])
|
||||
|
||||
filters = []
|
||||
if needs_fps:
|
||||
filters.append(f"fps={int(target_fps)}")
|
||||
if needs_scale:
|
||||
w, h = target_resolution
|
||||
filters.append(f"scale={w}:{h}:force_original_aspect_ratio=decrease,pad={w}:{h}:(ow-iw)/2:(oh-ih)/2")
|
||||
|
||||
if filters:
|
||||
cmd.extend(["-vf", ",".join(filters)])
|
||||
if needs_fps:
|
||||
cmd.extend(["-vsync", "cfr", "-r", str(int(target_fps))])
|
||||
|
||||
# 需要循环、缩放或指定起点时必须重编码,否则用 stream copy 保持原画质
|
||||
if needs_loop or needs_scale or source_start > 0 or has_source_end or needs_fps:
|
||||
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18"])
|
||||
else:
|
||||
cmd.extend(["-c:v", "copy"])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
|
||||
@@ -1,37 +1,104 @@
|
||||
"""
|
||||
声音克隆服务
|
||||
通过 HTTP 调用 Qwen3-TTS 独立服务 (端口 8009)
|
||||
通过 HTTP 调用 CosyVoice 3.0 独立服务 (端口 8010)
|
||||
"""
|
||||
import httpx
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
# Qwen3-TTS 服务地址
|
||||
QWEN_TTS_URL = "http://localhost:8009"
|
||||
# CosyVoice 3.0 服务地址
|
||||
VOICE_CLONE_URL = "http://localhost:8010"
|
||||
|
||||
|
||||
class VoiceCloneService:
|
||||
"""声音克隆服务 - 调用 Qwen3-TTS HTTP API"""
|
||||
"""声音克隆服务 - 调用 CosyVoice 3.0 HTTP API"""
|
||||
|
||||
def __init__(self):
|
||||
self.base_url = QWEN_TTS_URL
|
||||
self.base_url = VOICE_CLONE_URL
|
||||
# 健康状态缓存
|
||||
self._health_cache: Optional[dict] = None
|
||||
self._health_cache_time: float = 0
|
||||
# GPU 并发锁 (Serial Queue)
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def _generate_once(
|
||||
self,
|
||||
*,
|
||||
text: str,
|
||||
ref_audio_data: bytes,
|
||||
ref_text: str,
|
||||
language: str,
|
||||
speed: float = 1.0,
|
||||
max_retries: int = 4,
|
||||
) -> bytes:
|
||||
timeout = httpx.Timeout(240.0)
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/generate",
|
||||
files={"ref_audio": ("ref.wav", ref_audio_data, "audio/wav")},
|
||||
data={
|
||||
"text": text,
|
||||
"ref_text": ref_text,
|
||||
"language": language,
|
||||
"speed": str(speed),
|
||||
},
|
||||
)
|
||||
|
||||
retryable = False
|
||||
reason = ""
|
||||
|
||||
if response.status_code in (429, 502, 503, 504):
|
||||
retryable = True
|
||||
reason = f"HTTP {response.status_code}"
|
||||
elif response.status_code == 500 and (
|
||||
"生成超时" in response.text or "timeout" in response.text.lower()
|
||||
):
|
||||
retryable = True
|
||||
reason = "upstream timeout"
|
||||
|
||||
if retryable and attempt < max_retries - 1:
|
||||
wait = 8 * (attempt + 1)
|
||||
logger.warning(
|
||||
f"Voice clone retryable error ({reason}), retrying in {wait}s "
|
||||
f"(attempt {attempt + 1}/{max_retries})"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Voice clone API error: {e.response.status_code} - {e.response.text}")
|
||||
raise RuntimeError(f"声音克隆服务错误: {e.response.text}")
|
||||
except httpx.RequestError as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait = 6 * (attempt + 1)
|
||||
logger.warning(
|
||||
f"Voice clone connection error: {e}; retrying in {wait}s "
|
||||
f"(attempt {attempt + 1}/{max_retries})"
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
logger.error(f"Voice clone connection error: {e}")
|
||||
raise RuntimeError("无法连接声音克隆服务,请检查服务是否启动")
|
||||
|
||||
raise RuntimeError("声音克隆服务繁忙,请稍后重试")
|
||||
|
||||
async def generate_audio(
|
||||
self,
|
||||
text: str,
|
||||
ref_audio_path: str,
|
||||
ref_text: str,
|
||||
output_path: str,
|
||||
language: str = "Chinese"
|
||||
language: str = "Chinese",
|
||||
speed: float = 1.0,
|
||||
) -> str:
|
||||
"""
|
||||
使用声音克隆生成语音
|
||||
@@ -51,60 +118,49 @@ class VoiceCloneService:
|
||||
logger.info(f"🎤 Voice Clone: {text[:30]}... (language={language})")
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 读取参考音频
|
||||
text = text.strip()
|
||||
if not text:
|
||||
raise RuntimeError("文本为空,无法生成语音")
|
||||
|
||||
with open(ref_audio_path, "rb") as f:
|
||||
ref_audio_data = f.read()
|
||||
|
||||
# 调用 Qwen3-TTS 服务
|
||||
timeout = httpx.Timeout(300.0) # 5分钟超时
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/generate",
|
||||
files={"ref_audio": ("ref.wav", ref_audio_data, "audio/wav")},
|
||||
data={
|
||||
"text": text,
|
||||
"ref_text": ref_text,
|
||||
"language": language
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# 保存返回的音频
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.info(f"✅ Voice clone saved: {output_path}")
|
||||
return output_path
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Qwen3-TTS API error: {e.response.status_code} - {e.response.text}")
|
||||
raise RuntimeError(f"声音克隆服务错误: {e.response.text}")
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Qwen3-TTS connection error: {e}")
|
||||
raise RuntimeError("无法连接声音克隆服务,请检查服务是否启动")
|
||||
# CosyVoice 内部自带 text_normalize 分段,无需客户端切分
|
||||
audio_bytes = await self._generate_once(
|
||||
text=text,
|
||||
ref_audio_data=ref_audio_data,
|
||||
ref_text=ref_text,
|
||||
language=language,
|
||||
speed=speed,
|
||||
)
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
logger.info(f"✅ Voice clone saved: {output_path}")
|
||||
return output_path
|
||||
|
||||
async def check_health(self) -> dict:
|
||||
"""健康检查"""
|
||||
import time
|
||||
|
||||
# 5分钟缓存
|
||||
# 30秒缓存
|
||||
now = time.time()
|
||||
if self._health_cache and (now - self._health_cache_time) < 300:
|
||||
return self._health_cache
|
||||
cached = self._health_cache
|
||||
if cached is not None and (now - self._health_cache_time) < 30:
|
||||
return cached
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{self.base_url}/health")
|
||||
response.raise_for_status()
|
||||
self._health_cache = response.json()
|
||||
payload = response.json()
|
||||
self._health_cache = payload
|
||||
self._health_cache_time = now
|
||||
return self._health_cache
|
||||
return payload
|
||||
except Exception as e:
|
||||
logger.warning(f"Qwen3-TTS health check failed: {e}")
|
||||
logger.warning(f"Voice clone health check failed: {e}")
|
||||
return {
|
||||
"service": "Qwen3-TTS Voice Clone",
|
||||
"model": "0.6B-Base",
|
||||
"service": "CosyVoice 3.0 Voice Clone",
|
||||
"model": "unknown",
|
||||
"ready": False,
|
||||
"gpu_id": 0,
|
||||
"error": str(e)
|
||||
|
||||
@@ -39,12 +39,22 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
|
||||
|
||||
tokens = []
|
||||
ascii_buffer = ""
|
||||
pending_space = False # 记录是否有待处理的空格(用于英文单词间距)
|
||||
|
||||
for char in word:
|
||||
if not char.strip():
|
||||
# 空格:flush ascii_buffer,标记下一个 token 需要前导空格
|
||||
if ascii_buffer:
|
||||
tokens.append(ascii_buffer)
|
||||
ascii_buffer = ""
|
||||
if tokens: # 仅在已有 token 时标记(避免开头重复空格)
|
||||
pending_space = True
|
||||
continue
|
||||
|
||||
if char.isascii() and char.isalnum():
|
||||
if pending_space and not ascii_buffer:
|
||||
ascii_buffer = " " # 将空格前置到新英文单词
|
||||
pending_space = False
|
||||
ascii_buffer += char
|
||||
continue
|
||||
|
||||
@@ -52,7 +62,9 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
|
||||
tokens.append(ascii_buffer)
|
||||
ascii_buffer = ""
|
||||
|
||||
tokens.append(char)
|
||||
prefix = " " if pending_space else ""
|
||||
pending_space = False
|
||||
tokens.append(prefix + char)
|
||||
|
||||
if ascii_buffer:
|
||||
tokens.append(ascii_buffer)
|
||||
@@ -175,6 +187,7 @@ class WhisperService:
|
||||
text: str,
|
||||
output_path: Optional[str] = None,
|
||||
language: str = "zh",
|
||||
original_text: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
对音频进行转录,生成字级别时间戳
|
||||
@@ -184,6 +197,8 @@ class WhisperService:
|
||||
text: 原始文本(用于参考,但实际使用 whisper 转录结果)
|
||||
output_path: 可选,输出 JSON 文件路径
|
||||
language: 语言代码 (zh/en 等)
|
||||
original_text: 原始文案。非空时,Whisper 仅用于检测总时间范围,
|
||||
字幕文字用此原文替换(解决语言不匹配问题)
|
||||
|
||||
Returns:
|
||||
包含字级别时间戳的字典
|
||||
@@ -208,16 +223,19 @@ class WhisperService:
|
||||
|
||||
logger.info(f"Detected language: {info.language} (prob: {info.language_probability:.2f})")
|
||||
|
||||
# 收集 Whisper 转录结果(始终需要,用于获取时间范围)
|
||||
all_segments = []
|
||||
whisper_first_start = None
|
||||
whisper_last_end = None
|
||||
for segment in segments_iter:
|
||||
# 提取每个字的时间戳,并拆分成单字
|
||||
all_words = []
|
||||
if segment.words:
|
||||
for word_info in segment.words:
|
||||
word_text = word_info.word
|
||||
if word_text.strip():
|
||||
# 将词拆分成单字,时间戳线性插值
|
||||
# 保留前导空格用于英文词间距
|
||||
if whisper_first_start is None:
|
||||
whisper_first_start = word_info.start
|
||||
whisper_last_end = word_info.end
|
||||
chars = split_word_to_chars(
|
||||
word_text,
|
||||
word_info.start,
|
||||
@@ -225,11 +243,24 @@ class WhisperService:
|
||||
)
|
||||
all_words.extend(chars)
|
||||
|
||||
# 将长段落按标点和字数拆分成多行
|
||||
if all_words:
|
||||
line_segments = split_segment_to_lines(all_words, max_chars)
|
||||
all_segments.extend(line_segments)
|
||||
|
||||
# 如果提供了 original_text,用原文替换 Whisper 转录文字
|
||||
if original_text and original_text.strip() and whisper_first_start is not None:
|
||||
logger.info(f"Using original_text for subtitles (len={len(original_text)}), "
|
||||
f"Whisper time range: {whisper_first_start:.2f}-{whisper_last_end:.2f}s")
|
||||
# 用 split_word_to_chars 拆分原文
|
||||
orig_chars = split_word_to_chars(
|
||||
original_text.strip(),
|
||||
whisper_first_start,
|
||||
whisper_last_end
|
||||
)
|
||||
if orig_chars:
|
||||
all_segments = split_segment_to_lines(orig_chars, max_chars)
|
||||
logger.info(f"Rebuilt {len(all_segments)} subtitle segments from original text")
|
||||
|
||||
logger.info(f"Generated {len(all_segments)} subtitle segments")
|
||||
return {"segments": all_segments}
|
||||
|
||||
@@ -247,12 +278,13 @@ class WhisperService:
|
||||
|
||||
return result
|
||||
|
||||
async def transcribe(self, audio_path: str) -> str:
|
||||
async def transcribe(self, audio_path: str, language: str | None = None) -> str:
|
||||
"""
|
||||
仅转录文本(用于提取文案)
|
||||
|
||||
Args:
|
||||
audio_path: 音频/视频文件路径
|
||||
language: 语言代码,None 表示自动检测
|
||||
|
||||
Returns:
|
||||
纯文本内容
|
||||
@@ -266,7 +298,7 @@ class WhisperService:
|
||||
# 转录 (无需字级时间戳)
|
||||
segments_iter, _ = model.transcribe(
|
||||
audio_path,
|
||||
language="zh",
|
||||
language=language,
|
||||
word_timestamps=False,
|
||||
vad_filter=True,
|
||||
)
|
||||
|
||||
@@ -20,14 +20,14 @@ logger = logging.getLogger("Watchdog")
|
||||
# 服务配置
|
||||
SERVICES = [
|
||||
{
|
||||
"name": "vigent2-qwen-tts",
|
||||
"url": "http://localhost:8009/health",
|
||||
"name": "vigent2-cosyvoice",
|
||||
"url": "http://localhost:8010/health",
|
||||
"failures": 0,
|
||||
"threshold": 5, # 连续5次失败才重启(5×30s = 2.5分钟容忍期)
|
||||
"threshold": 3, # 连续3次失败才重启(3×15s ≈ 45秒容忍期)
|
||||
"timeout": 10.0,
|
||||
"restart_cmd": ["pm2", "restart", "vigent2-qwen-tts"],
|
||||
"restart_cmd": ["pm2", "restart", "vigent2-cosyvoice"],
|
||||
"cooldown_until": 0, # 重启后的冷却截止时间戳
|
||||
"cooldown_sec": 120, # 重启后等待120秒再开始检查
|
||||
"cooldown_sec": 45, # 重启后等待45秒再开始检查
|
||||
}
|
||||
]
|
||||
|
||||
@@ -45,10 +45,20 @@ async def check_service(service):
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
response = await client.get(service["url"])
|
||||
if response.status_code == 200:
|
||||
if service["failures"] > 0:
|
||||
logger.info(f"✅ 服务 {service['name']} 已恢复正常")
|
||||
service["failures"] = 0
|
||||
return True
|
||||
ready = True
|
||||
try:
|
||||
payload = response.json()
|
||||
ready = bool(payload.get("ready", True))
|
||||
except Exception:
|
||||
payload = {}
|
||||
|
||||
if ready:
|
||||
if service["failures"] > 0:
|
||||
logger.info(f"✅ 服务 {service['name']} 已恢复正常")
|
||||
service["failures"] = 0
|
||||
return True
|
||||
|
||||
logger.warning(f"⚠️ 服务 {service['name']} ready=false,健康检查未通过: {payload}")
|
||||
else:
|
||||
logger.warning(f"⚠️ 服务 {service['name']} 返回状态码 {response.status_code}")
|
||||
except Exception as e:
|
||||
@@ -83,8 +93,8 @@ async def main():
|
||||
for service in SERVICES:
|
||||
await check_service(service)
|
||||
|
||||
# 每 30 秒检查一次
|
||||
await asyncio.sleep(30)
|
||||
# 每 15 秒检查一次
|
||||
await asyncio.sleep(15)
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user