290 lines
11 KiB
Python
290 lines
11 KiB
Python
"""生成配音 - 业务逻辑"""
|
||
import re
|
||
import json
|
||
import time
|
||
import asyncio
|
||
import subprocess
|
||
import tempfile
|
||
import os
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
import httpx
|
||
from loguru import logger
|
||
|
||
from app.services.storage import storage_service
|
||
from app.services.tts_service import TTSService
|
||
from app.services.voice_clone_service import voice_clone_service
|
||
from app.modules.videos.task_store import task_store
|
||
from app.modules.generated_audios.schemas import (
|
||
GenerateAudioRequest,
|
||
GeneratedAudioItem,
|
||
GeneratedAudioListResponse,
|
||
)
|
||
|
||
BUCKET = "generated-audios"
|
||
|
||
|
||
def _locale_to_tts_lang(locale: str) -> str:
|
||
mapping = {"zh": "Chinese", "en": "English"}
|
||
return mapping.get(locale.split("-")[0], "Auto")
|
||
|
||
|
||
def _get_audio_duration(file_path: str) -> float:
|
||
try:
|
||
result = subprocess.run(
|
||
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
|
||
'-of', 'csv=p=0', file_path],
|
||
capture_output=True, text=True, timeout=10
|
||
)
|
||
return float(result.stdout.strip())
|
||
except Exception as e:
|
||
logger.warning(f"获取音频时长失败: {e}")
|
||
return 0.0
|
||
|
||
|
||
async def generate_audio_task(task_id: str, req: GenerateAudioRequest, user_id: str):
|
||
"""后台任务:生成配音"""
|
||
try:
|
||
task_store.update(task_id, {"status": "processing", "progress": 10, "message": "正在生成配音..."})
|
||
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
||
audio_path = tmp.name
|
||
|
||
try:
|
||
if req.tts_mode == "voiceclone":
|
||
if not req.ref_audio_id or not req.ref_text:
|
||
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
|
||
|
||
task_store.update(task_id, {"progress": 20, "message": "正在下载参考音频..."})
|
||
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_ref:
|
||
ref_local = tmp_ref.name
|
||
|
||
try:
|
||
ref_url = await storage_service.get_signed_url(
|
||
bucket="ref-audios", path=req.ref_audio_id
|
||
)
|
||
timeout = httpx.Timeout(None)
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
async with client.stream("GET", ref_url) as resp:
|
||
resp.raise_for_status()
|
||
with open(ref_local, "wb") as f:
|
||
async for chunk in resp.aiter_bytes():
|
||
f.write(chunk)
|
||
|
||
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音..."})
|
||
await voice_clone_service.generate_audio(
|
||
text=req.text,
|
||
ref_audio_path=ref_local,
|
||
ref_text=req.ref_text,
|
||
output_path=audio_path,
|
||
language=_locale_to_tts_lang(req.language),
|
||
speed=req.speed,
|
||
instruct_text=req.instruct_text or "",
|
||
)
|
||
finally:
|
||
if os.path.exists(ref_local):
|
||
os.unlink(ref_local)
|
||
else:
|
||
task_store.update(task_id, {"progress": 30, "message": "正在生成语音..."})
|
||
tts = TTSService()
|
||
await tts.generate_audio(req.text, req.voice, audio_path)
|
||
|
||
task_store.update(task_id, {"progress": 70, "message": "正在上传配音..."})
|
||
|
||
duration = _get_audio_duration(audio_path)
|
||
timestamp = int(time.time())
|
||
audio_id = f"{user_id}/{timestamp}_audio.wav"
|
||
meta_id = f"{user_id}/{timestamp}_audio.json"
|
||
|
||
# 生成 display_name
|
||
now = time.strftime("%Y%m%d_%H%M", time.localtime(timestamp))
|
||
display_name = f"配音_{now}"
|
||
|
||
with open(audio_path, "rb") as f:
|
||
wav_data = f.read()
|
||
|
||
await storage_service.upload_file(
|
||
bucket=BUCKET, path=audio_id,
|
||
file_data=wav_data, content_type="audio/wav",
|
||
)
|
||
|
||
metadata = {
|
||
"display_name": display_name,
|
||
"text": req.text,
|
||
"tts_mode": req.tts_mode,
|
||
"voice": req.voice if req.tts_mode == "edgetts" else None,
|
||
"ref_audio_id": req.ref_audio_id,
|
||
"language": req.language,
|
||
"duration_sec": duration,
|
||
"created_at": timestamp,
|
||
}
|
||
await storage_service.upload_file(
|
||
bucket=BUCKET, path=meta_id,
|
||
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
||
content_type="application/json",
|
||
)
|
||
|
||
signed_url = await storage_service.get_signed_url(BUCKET, audio_id)
|
||
|
||
task_store.update(task_id, {
|
||
"status": "completed",
|
||
"progress": 100,
|
||
"message": f"配音生成完成 ({duration:.1f}s)",
|
||
"output": {
|
||
"audio_id": audio_id,
|
||
"name": display_name,
|
||
"path": signed_url,
|
||
"duration_sec": duration,
|
||
"text": req.text,
|
||
"tts_mode": req.tts_mode,
|
||
"language": req.language,
|
||
"created_at": timestamp,
|
||
},
|
||
})
|
||
finally:
|
||
if os.path.exists(audio_path):
|
||
os.unlink(audio_path)
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
task_store.update(task_id, {
|
||
"status": "failed",
|
||
"message": f"配音生成失败: {str(e)}",
|
||
"error": str(e),
|
||
})
|
||
logger.error(f"Generate audio failed: {e}\n{traceback.format_exc()}")
|
||
|
||
|
||
async def list_generated_audios(user_id: str) -> dict:
|
||
"""列出用户的所有已生成配音"""
|
||
files = await storage_service.list_files(BUCKET, user_id)
|
||
wav_files = [f for f in files if f.get("name", "").endswith("_audio.wav")]
|
||
|
||
if not wav_files:
|
||
return GeneratedAudioListResponse(items=[]).model_dump()
|
||
|
||
async def fetch_info(f):
|
||
name = f.get("name", "")
|
||
storage_path = f"{user_id}/{name}"
|
||
meta_name = name.replace("_audio.wav", "_audio.json")
|
||
meta_path = f"{user_id}/{meta_name}"
|
||
|
||
display_name = name
|
||
text = ""
|
||
tts_mode = "edgetts"
|
||
language = "zh-CN"
|
||
duration_sec = 0.0
|
||
created_at = 0
|
||
|
||
try:
|
||
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||
resp = await client.get(meta_url)
|
||
if resp.status_code == 200:
|
||
meta = resp.json()
|
||
display_name = meta.get("display_name", name)
|
||
text = meta.get("text", "")
|
||
tts_mode = meta.get("tts_mode", "edgetts")
|
||
language = meta.get("language", "zh-CN")
|
||
duration_sec = meta.get("duration_sec", 0.0)
|
||
created_at = meta.get("created_at", 0)
|
||
except Exception as e:
|
||
logger.debug(f"读取配音 metadata 失败: {e}")
|
||
try:
|
||
created_at = int(name.split("_")[0])
|
||
except:
|
||
pass
|
||
|
||
signed_url = await storage_service.get_signed_url(BUCKET, storage_path)
|
||
|
||
return GeneratedAudioItem(
|
||
id=storage_path,
|
||
name=display_name,
|
||
path=signed_url,
|
||
duration_sec=duration_sec,
|
||
text=text,
|
||
tts_mode=tts_mode,
|
||
language=language,
|
||
created_at=created_at,
|
||
)
|
||
|
||
items = await asyncio.gather(*[fetch_info(f) for f in wav_files])
|
||
items = sorted(items, key=lambda x: x.created_at, reverse=True)
|
||
return GeneratedAudioListResponse(items=items).model_dump()
|
||
|
||
|
||
async def delete_all_generated_audios(user_id: str) -> tuple[int, int]:
|
||
"""删除用户所有生成的配音(.wav + .json),返回 (删除数量, 失败数量)"""
|
||
try:
|
||
files = await storage_service.list_files(BUCKET, user_id, strict=True)
|
||
deleted_count = 0
|
||
failed_count = 0
|
||
for f in files:
|
||
name = f.get("name", "")
|
||
if not name or name == ".emptyFolderPlaceholder":
|
||
continue
|
||
if name.endswith("_audio.wav") or name.endswith("_audio.json"):
|
||
full_path = f"{user_id}/{name}"
|
||
try:
|
||
await storage_service.delete_file(BUCKET, full_path)
|
||
deleted_count += 1
|
||
except Exception as e:
|
||
failed_count += 1
|
||
logger.warning(f"Delete audio file failed: {full_path}, {e}")
|
||
return deleted_count, failed_count
|
||
except Exception as e:
|
||
logger.error(f"Delete all generated audios failed: {e}")
|
||
return 0, 1
|
||
|
||
|
||
async def delete_generated_audio(audio_id: str, user_id: str) -> None:
|
||
if not audio_id.startswith(f"{user_id}/"):
|
||
raise PermissionError("无权删除此文件")
|
||
|
||
await storage_service.delete_file(BUCKET, audio_id)
|
||
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
||
try:
|
||
await storage_service.delete_file(BUCKET, meta_path)
|
||
except:
|
||
pass
|
||
|
||
|
||
async def rename_generated_audio(audio_id: str, new_name: str, user_id: str) -> dict:
|
||
if not audio_id.startswith(f"{user_id}/"):
|
||
raise PermissionError("无权修改此文件")
|
||
|
||
new_name = new_name.strip()
|
||
if not new_name:
|
||
raise ValueError("新名称不能为空")
|
||
|
||
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
||
try:
|
||
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
||
async with httpx.AsyncClient() as client:
|
||
resp = await client.get(meta_url)
|
||
if resp.status_code == 200:
|
||
metadata = resp.json()
|
||
else:
|
||
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
|
||
except Exception as e:
|
||
logger.warning(f"无法读取配音元数据: {e}, 将创建新的")
|
||
metadata = {
|
||
"display_name": new_name,
|
||
"text": "",
|
||
"tts_mode": "edgetts",
|
||
"language": "zh-CN",
|
||
"duration_sec": 0.0,
|
||
"created_at": int(time.time()),
|
||
}
|
||
|
||
metadata["display_name"] = new_name
|
||
await storage_service.upload_file(
|
||
bucket=BUCKET,
|
||
path=meta_path,
|
||
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
||
content_type="application/json",
|
||
)
|
||
return {"name": new_name}
|