265 lines
9.7 KiB
Python
265 lines
9.7 KiB
Python
"""生成配音 - 业务逻辑"""
|
|
import re
|
|
import json
|
|
import time
|
|
import asyncio
|
|
import subprocess
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
from loguru import logger
|
|
|
|
from app.services.storage import storage_service
|
|
from app.services.tts_service import TTSService
|
|
from app.services.voice_clone_service import voice_clone_service
|
|
from app.modules.videos.task_store import task_store
|
|
from app.modules.generated_audios.schemas import (
|
|
GenerateAudioRequest,
|
|
GeneratedAudioItem,
|
|
GeneratedAudioListResponse,
|
|
)
|
|
|
|
BUCKET = "generated-audios"
|
|
|
|
|
|
def _locale_to_tts_lang(locale: str) -> str:
|
|
mapping = {"zh": "Chinese", "en": "English"}
|
|
return mapping.get(locale.split("-")[0], "Auto")
|
|
|
|
|
|
def _get_audio_duration(file_path: str) -> float:
|
|
try:
|
|
result = subprocess.run(
|
|
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
|
|
'-of', 'csv=p=0', file_path],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
return float(result.stdout.strip())
|
|
except Exception as e:
|
|
logger.warning(f"获取音频时长失败: {e}")
|
|
return 0.0
|
|
|
|
|
|
async def generate_audio_task(task_id: str, req: GenerateAudioRequest, user_id: str):
|
|
"""后台任务:生成配音"""
|
|
try:
|
|
task_store.update(task_id, {"status": "processing", "progress": 10, "message": "正在生成配音..."})
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
|
audio_path = tmp.name
|
|
|
|
try:
|
|
if req.tts_mode == "voiceclone":
|
|
if not req.ref_audio_id or not req.ref_text:
|
|
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
|
|
|
|
task_store.update(task_id, {"progress": 20, "message": "正在下载参考音频..."})
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_ref:
|
|
ref_local = tmp_ref.name
|
|
|
|
try:
|
|
ref_url = await storage_service.get_signed_url(
|
|
bucket="ref-audios", path=req.ref_audio_id
|
|
)
|
|
timeout = httpx.Timeout(None)
|
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
async with client.stream("GET", ref_url) as resp:
|
|
resp.raise_for_status()
|
|
with open(ref_local, "wb") as f:
|
|
async for chunk in resp.aiter_bytes():
|
|
f.write(chunk)
|
|
|
|
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音..."})
|
|
await voice_clone_service.generate_audio(
|
|
text=req.text,
|
|
ref_audio_path=ref_local,
|
|
ref_text=req.ref_text,
|
|
output_path=audio_path,
|
|
language=_locale_to_tts_lang(req.language),
|
|
speed=req.speed,
|
|
)
|
|
finally:
|
|
if os.path.exists(ref_local):
|
|
os.unlink(ref_local)
|
|
else:
|
|
task_store.update(task_id, {"progress": 30, "message": "正在生成语音..."})
|
|
tts = TTSService()
|
|
await tts.generate_audio(req.text, req.voice, audio_path)
|
|
|
|
task_store.update(task_id, {"progress": 70, "message": "正在上传配音..."})
|
|
|
|
duration = _get_audio_duration(audio_path)
|
|
timestamp = int(time.time())
|
|
audio_id = f"{user_id}/{timestamp}_audio.wav"
|
|
meta_id = f"{user_id}/{timestamp}_audio.json"
|
|
|
|
# 生成 display_name
|
|
now = time.strftime("%Y%m%d_%H%M", time.localtime(timestamp))
|
|
display_name = f"配音_{now}"
|
|
|
|
with open(audio_path, "rb") as f:
|
|
wav_data = f.read()
|
|
|
|
await storage_service.upload_file(
|
|
bucket=BUCKET, path=audio_id,
|
|
file_data=wav_data, content_type="audio/wav",
|
|
)
|
|
|
|
metadata = {
|
|
"display_name": display_name,
|
|
"text": req.text,
|
|
"tts_mode": req.tts_mode,
|
|
"voice": req.voice if req.tts_mode == "edgetts" else None,
|
|
"ref_audio_id": req.ref_audio_id,
|
|
"language": req.language,
|
|
"duration_sec": duration,
|
|
"created_at": timestamp,
|
|
}
|
|
await storage_service.upload_file(
|
|
bucket=BUCKET, path=meta_id,
|
|
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
|
content_type="application/json",
|
|
)
|
|
|
|
signed_url = await storage_service.get_signed_url(BUCKET, audio_id)
|
|
|
|
task_store.update(task_id, {
|
|
"status": "completed",
|
|
"progress": 100,
|
|
"message": f"配音生成完成 ({duration:.1f}s)",
|
|
"output": {
|
|
"audio_id": audio_id,
|
|
"name": display_name,
|
|
"path": signed_url,
|
|
"duration_sec": duration,
|
|
"text": req.text,
|
|
"tts_mode": req.tts_mode,
|
|
"language": req.language,
|
|
"created_at": timestamp,
|
|
},
|
|
})
|
|
finally:
|
|
if os.path.exists(audio_path):
|
|
os.unlink(audio_path)
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
task_store.update(task_id, {
|
|
"status": "failed",
|
|
"message": f"配音生成失败: {str(e)}",
|
|
"error": traceback.format_exc(),
|
|
})
|
|
logger.error(f"Generate audio failed: {e}")
|
|
|
|
|
|
async def list_generated_audios(user_id: str) -> dict:
|
|
"""列出用户的所有已生成配音"""
|
|
files = await storage_service.list_files(BUCKET, user_id)
|
|
wav_files = [f for f in files if f.get("name", "").endswith("_audio.wav")]
|
|
|
|
if not wav_files:
|
|
return GeneratedAudioListResponse(items=[]).model_dump()
|
|
|
|
async def fetch_info(f):
|
|
name = f.get("name", "")
|
|
storage_path = f"{user_id}/{name}"
|
|
meta_name = name.replace("_audio.wav", "_audio.json")
|
|
meta_path = f"{user_id}/{meta_name}"
|
|
|
|
display_name = name
|
|
text = ""
|
|
tts_mode = "edgetts"
|
|
language = "zh-CN"
|
|
duration_sec = 0.0
|
|
created_at = 0
|
|
|
|
try:
|
|
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
resp = await client.get(meta_url)
|
|
if resp.status_code == 200:
|
|
meta = resp.json()
|
|
display_name = meta.get("display_name", name)
|
|
text = meta.get("text", "")
|
|
tts_mode = meta.get("tts_mode", "edgetts")
|
|
language = meta.get("language", "zh-CN")
|
|
duration_sec = meta.get("duration_sec", 0.0)
|
|
created_at = meta.get("created_at", 0)
|
|
except Exception as e:
|
|
logger.debug(f"读取配音 metadata 失败: {e}")
|
|
try:
|
|
created_at = int(name.split("_")[0])
|
|
except:
|
|
pass
|
|
|
|
signed_url = await storage_service.get_signed_url(BUCKET, storage_path)
|
|
|
|
return GeneratedAudioItem(
|
|
id=storage_path,
|
|
name=display_name,
|
|
path=signed_url,
|
|
duration_sec=duration_sec,
|
|
text=text,
|
|
tts_mode=tts_mode,
|
|
language=language,
|
|
created_at=created_at,
|
|
)
|
|
|
|
items = await asyncio.gather(*[fetch_info(f) for f in wav_files])
|
|
items = sorted(items, key=lambda x: x.created_at, reverse=True)
|
|
return GeneratedAudioListResponse(items=items).model_dump()
|
|
|
|
|
|
async def delete_generated_audio(audio_id: str, user_id: str) -> None:
|
|
if not audio_id.startswith(f"{user_id}/"):
|
|
raise PermissionError("无权删除此文件")
|
|
|
|
await storage_service.delete_file(BUCKET, audio_id)
|
|
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
|
try:
|
|
await storage_service.delete_file(BUCKET, meta_path)
|
|
except:
|
|
pass
|
|
|
|
|
|
async def rename_generated_audio(audio_id: str, new_name: str, user_id: str) -> dict:
|
|
if not audio_id.startswith(f"{user_id}/"):
|
|
raise PermissionError("无权修改此文件")
|
|
|
|
new_name = new_name.strip()
|
|
if not new_name:
|
|
raise ValueError("新名称不能为空")
|
|
|
|
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
|
try:
|
|
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.get(meta_url)
|
|
if resp.status_code == 200:
|
|
metadata = resp.json()
|
|
else:
|
|
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
|
|
except Exception as e:
|
|
logger.warning(f"无法读取配音元数据: {e}, 将创建新的")
|
|
metadata = {
|
|
"display_name": new_name,
|
|
"text": "",
|
|
"tts_mode": "edgetts",
|
|
"language": "zh-CN",
|
|
"duration_sec": 0.0,
|
|
"created_at": int(time.time()),
|
|
}
|
|
|
|
metadata["display_name"] = new_name
|
|
await storage_service.upload_file(
|
|
bucket=BUCKET,
|
|
path=meta_path,
|
|
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
|
content_type="application/json",
|
|
)
|
|
return {"name": new_name}
|