Files
ViGent2/backend/app/modules/generated_audios/service.py
Kevin Wong 96a298e51c 更新
2026-02-11 13:48:45 +08:00

265 lines
9.7 KiB
Python

"""生成配音 - 业务逻辑"""
import re
import json
import time
import asyncio
import subprocess
import tempfile
import os
from pathlib import Path
from typing import Optional
import httpx
from loguru import logger
from app.services.storage import storage_service
from app.services.tts_service import TTSService
from app.services.voice_clone_service import voice_clone_service
from app.modules.videos.task_store import task_store
from app.modules.generated_audios.schemas import (
GenerateAudioRequest,
GeneratedAudioItem,
GeneratedAudioListResponse,
)
BUCKET = "generated-audios"
def _locale_to_tts_lang(locale: str) -> str:
mapping = {"zh": "Chinese", "en": "English"}
return mapping.get(locale.split("-")[0], "Auto")
def _get_audio_duration(file_path: str) -> float:
try:
result = subprocess.run(
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
'-of', 'csv=p=0', file_path],
capture_output=True, text=True, timeout=10
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"获取音频时长失败: {e}")
return 0.0
async def generate_audio_task(task_id: str, req: GenerateAudioRequest, user_id: str):
"""后台任务:生成配音"""
try:
task_store.update(task_id, {"status": "processing", "progress": 10, "message": "正在生成配音..."})
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
audio_path = tmp.name
try:
if req.tts_mode == "voiceclone":
if not req.ref_audio_id or not req.ref_text:
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
task_store.update(task_id, {"progress": 20, "message": "正在下载参考音频..."})
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_ref:
ref_local = tmp_ref.name
try:
ref_url = await storage_service.get_signed_url(
bucket="ref-audios", path=req.ref_audio_id
)
timeout = httpx.Timeout(None)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream("GET", ref_url) as resp:
resp.raise_for_status()
with open(ref_local, "wb") as f:
async for chunk in resp.aiter_bytes():
f.write(chunk)
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音..."})
await voice_clone_service.generate_audio(
text=req.text,
ref_audio_path=ref_local,
ref_text=req.ref_text,
output_path=audio_path,
language=_locale_to_tts_lang(req.language),
speed=req.speed,
)
finally:
if os.path.exists(ref_local):
os.unlink(ref_local)
else:
task_store.update(task_id, {"progress": 30, "message": "正在生成语音..."})
tts = TTSService()
await tts.generate_audio(req.text, req.voice, audio_path)
task_store.update(task_id, {"progress": 70, "message": "正在上传配音..."})
duration = _get_audio_duration(audio_path)
timestamp = int(time.time())
audio_id = f"{user_id}/{timestamp}_audio.wav"
meta_id = f"{user_id}/{timestamp}_audio.json"
# 生成 display_name
now = time.strftime("%Y%m%d_%H%M", time.localtime(timestamp))
display_name = f"配音_{now}"
with open(audio_path, "rb") as f:
wav_data = f.read()
await storage_service.upload_file(
bucket=BUCKET, path=audio_id,
file_data=wav_data, content_type="audio/wav",
)
metadata = {
"display_name": display_name,
"text": req.text,
"tts_mode": req.tts_mode,
"voice": req.voice if req.tts_mode == "edgetts" else None,
"ref_audio_id": req.ref_audio_id,
"language": req.language,
"duration_sec": duration,
"created_at": timestamp,
}
await storage_service.upload_file(
bucket=BUCKET, path=meta_id,
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
content_type="application/json",
)
signed_url = await storage_service.get_signed_url(BUCKET, audio_id)
task_store.update(task_id, {
"status": "completed",
"progress": 100,
"message": f"配音生成完成 ({duration:.1f}s)",
"output": {
"audio_id": audio_id,
"name": display_name,
"path": signed_url,
"duration_sec": duration,
"text": req.text,
"tts_mode": req.tts_mode,
"language": req.language,
"created_at": timestamp,
},
})
finally:
if os.path.exists(audio_path):
os.unlink(audio_path)
except Exception as e:
import traceback
task_store.update(task_id, {
"status": "failed",
"message": f"配音生成失败: {str(e)}",
"error": traceback.format_exc(),
})
logger.error(f"Generate audio failed: {e}")
async def list_generated_audios(user_id: str) -> dict:
"""列出用户的所有已生成配音"""
files = await storage_service.list_files(BUCKET, user_id)
wav_files = [f for f in files if f.get("name", "").endswith("_audio.wav")]
if not wav_files:
return GeneratedAudioListResponse(items=[]).model_dump()
async def fetch_info(f):
name = f.get("name", "")
storage_path = f"{user_id}/{name}"
meta_name = name.replace("_audio.wav", "_audio.json")
meta_path = f"{user_id}/{meta_name}"
display_name = name
text = ""
tts_mode = "edgetts"
language = "zh-CN"
duration_sec = 0.0
created_at = 0
try:
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(meta_url)
if resp.status_code == 200:
meta = resp.json()
display_name = meta.get("display_name", name)
text = meta.get("text", "")
tts_mode = meta.get("tts_mode", "edgetts")
language = meta.get("language", "zh-CN")
duration_sec = meta.get("duration_sec", 0.0)
created_at = meta.get("created_at", 0)
except Exception as e:
logger.debug(f"读取配音 metadata 失败: {e}")
try:
created_at = int(name.split("_")[0])
except:
pass
signed_url = await storage_service.get_signed_url(BUCKET, storage_path)
return GeneratedAudioItem(
id=storage_path,
name=display_name,
path=signed_url,
duration_sec=duration_sec,
text=text,
tts_mode=tts_mode,
language=language,
created_at=created_at,
)
items = await asyncio.gather(*[fetch_info(f) for f in wav_files])
items = sorted(items, key=lambda x: x.created_at, reverse=True)
return GeneratedAudioListResponse(items=items).model_dump()
async def delete_generated_audio(audio_id: str, user_id: str) -> None:
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权删除此文件")
await storage_service.delete_file(BUCKET, audio_id)
meta_path = audio_id.replace("_audio.wav", "_audio.json")
try:
await storage_service.delete_file(BUCKET, meta_path)
except:
pass
async def rename_generated_audio(audio_id: str, new_name: str, user_id: str) -> dict:
if not audio_id.startswith(f"{user_id}/"):
raise PermissionError("无权修改此文件")
new_name = new_name.strip()
if not new_name:
raise ValueError("新名称不能为空")
meta_path = audio_id.replace("_audio.wav", "_audio.json")
try:
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
async with httpx.AsyncClient() as client:
resp = await client.get(meta_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取配音元数据: {e}, 将创建新的")
metadata = {
"display_name": new_name,
"text": "",
"tts_mode": "edgetts",
"language": "zh-CN",
"duration_sec": 0.0,
"created_at": int(time.time()),
}
metadata["display_name"] = new_name
await storage_service.upload_file(
bucket=BUCKET,
path=meta_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
content_type="application/json",
)
return {"name": new_name}