更新
This commit is contained in:
0
backend/app/modules/generated_audios/__init__.py
Normal file
0
backend/app/modules/generated_audios/__init__.py
Normal file
77
backend/app/modules/generated_audios/router.py
Normal file
77
backend/app/modules/generated_audios/router.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""生成配音 API"""
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
|
||||
import uuid
|
||||
from loguru import logger
|
||||
|
||||
from app.core.deps import get_current_user
|
||||
from app.core.response import success_response
|
||||
from app.modules.videos.task_store import create_task, get_task
|
||||
from app.modules.generated_audios.schemas import GenerateAudioRequest, RenameAudioRequest
|
||||
from app.modules.generated_audios import service
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/generate")
|
||||
async def generate_audio(
|
||||
req: GenerateAudioRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""异步生成配音(返回 task_id)"""
|
||||
task_id = str(uuid.uuid4())
|
||||
create_task(task_id, user["id"])
|
||||
background_tasks.add_task(service.generate_audio_task, task_id, req, user["id"])
|
||||
return success_response({"task_id": task_id})
|
||||
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
async def get_audio_task(task_id: str, user: dict = Depends(get_current_user)):
|
||||
"""轮询配音生成进度"""
|
||||
task = get_task(task_id)
|
||||
if task.get("status") != "not_found" and task.get("user_id") != user["id"]:
|
||||
return success_response({"status": "not_found"})
|
||||
return success_response(task)
|
||||
|
||||
|
||||
@router.get("")
|
||||
async def list_audios(user: dict = Depends(get_current_user)):
|
||||
"""列出当前用户所有已生成配音"""
|
||||
try:
|
||||
result = await service.list_generated_audios(user["id"])
|
||||
return success_response(result)
|
||||
except Exception as e:
|
||||
logger.error(f"列出配音失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"获取列表失败: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/{audio_id:path}")
|
||||
async def delete_audio(audio_id: str, user: dict = Depends(get_current_user)):
|
||||
"""删除配音"""
|
||||
try:
|
||||
await service.delete_generated_audio(audio_id, user["id"])
|
||||
return success_response(message="删除成功")
|
||||
except PermissionError as e:
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"删除配音失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
|
||||
|
||||
|
||||
@router.put("/{audio_id:path}")
|
||||
async def rename_audio(
|
||||
audio_id: str,
|
||||
request: RenameAudioRequest,
|
||||
user: dict = Depends(get_current_user),
|
||||
):
|
||||
"""重命名配音"""
|
||||
try:
|
||||
result = await service.rename_generated_audio(audio_id, request.new_name, user["id"])
|
||||
return success_response(result, message="重命名成功")
|
||||
except PermissionError as e:
|
||||
raise HTTPException(status_code=403, detail=str(e))
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"重命名配音失败: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")
|
||||
30
backend/app/modules/generated_audios/schemas.py
Normal file
30
backend/app/modules/generated_audios/schemas.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
class GenerateAudioRequest(BaseModel):
|
||||
text: str
|
||||
tts_mode: str = "edgetts"
|
||||
voice: str = "zh-CN-YunxiNeural"
|
||||
ref_audio_id: Optional[str] = None
|
||||
ref_text: Optional[str] = None
|
||||
language: str = "zh-CN"
|
||||
|
||||
|
||||
class RenameAudioRequest(BaseModel):
|
||||
new_name: str
|
||||
|
||||
|
||||
class GeneratedAudioItem(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
path: str
|
||||
duration_sec: float
|
||||
text: str
|
||||
tts_mode: str
|
||||
language: str
|
||||
created_at: int
|
||||
|
||||
|
||||
class GeneratedAudioListResponse(BaseModel):
|
||||
items: List[GeneratedAudioItem]
|
||||
263
backend/app/modules/generated_audios/service.py
Normal file
263
backend/app/modules/generated_audios/service.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""生成配音 - 业务逻辑"""
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import subprocess
|
||||
import tempfile
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from app.services.storage import storage_service
|
||||
from app.services.tts_service import TTSService
|
||||
from app.services.voice_clone_service import voice_clone_service
|
||||
from app.modules.videos.task_store import task_store
|
||||
from app.modules.generated_audios.schemas import (
|
||||
GenerateAudioRequest,
|
||||
GeneratedAudioItem,
|
||||
GeneratedAudioListResponse,
|
||||
)
|
||||
|
||||
BUCKET = "generated-audios"
|
||||
|
||||
|
||||
def _locale_to_qwen_lang(locale: str) -> str:
|
||||
mapping = {"zh": "Chinese", "en": "English"}
|
||||
return mapping.get(locale.split("-")[0], "Auto")
|
||||
|
||||
|
||||
def _get_audio_duration(file_path: str) -> float:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
|
||||
'-of', 'csv=p=0', file_path],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
except Exception as e:
|
||||
logger.warning(f"获取音频时长失败: {e}")
|
||||
return 0.0
|
||||
|
||||
|
||||
async def generate_audio_task(task_id: str, req: GenerateAudioRequest, user_id: str):
|
||||
"""后台任务:生成配音"""
|
||||
try:
|
||||
task_store.update(task_id, {"status": "processing", "progress": 10, "message": "正在生成配音..."})
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
||||
audio_path = tmp.name
|
||||
|
||||
try:
|
||||
if req.tts_mode == "voiceclone":
|
||||
if not req.ref_audio_id or not req.ref_text:
|
||||
raise ValueError("声音克隆模式需要提供参考音频和参考文字")
|
||||
|
||||
task_store.update(task_id, {"progress": 20, "message": "正在下载参考音频..."})
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_ref:
|
||||
ref_local = tmp_ref.name
|
||||
|
||||
try:
|
||||
ref_url = await storage_service.get_signed_url(
|
||||
bucket="ref-audios", path=req.ref_audio_id
|
||||
)
|
||||
timeout = httpx.Timeout(None)
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
async with client.stream("GET", ref_url) as resp:
|
||||
resp.raise_for_status()
|
||||
with open(ref_local, "wb") as f:
|
||||
async for chunk in resp.aiter_bytes():
|
||||
f.write(chunk)
|
||||
|
||||
task_store.update(task_id, {"progress": 40, "message": "正在克隆声音 (Qwen3-TTS)..."})
|
||||
await voice_clone_service.generate_audio(
|
||||
text=req.text,
|
||||
ref_audio_path=ref_local,
|
||||
ref_text=req.ref_text,
|
||||
output_path=audio_path,
|
||||
language=_locale_to_qwen_lang(req.language),
|
||||
)
|
||||
finally:
|
||||
if os.path.exists(ref_local):
|
||||
os.unlink(ref_local)
|
||||
else:
|
||||
task_store.update(task_id, {"progress": 30, "message": "正在生成语音 (EdgeTTS)..."})
|
||||
tts = TTSService()
|
||||
await tts.generate_audio(req.text, req.voice, audio_path)
|
||||
|
||||
task_store.update(task_id, {"progress": 70, "message": "正在上传配音..."})
|
||||
|
||||
duration = _get_audio_duration(audio_path)
|
||||
timestamp = int(time.time())
|
||||
audio_id = f"{user_id}/{timestamp}_audio.wav"
|
||||
meta_id = f"{user_id}/{timestamp}_audio.json"
|
||||
|
||||
# 生成 display_name
|
||||
now = time.strftime("%Y%m%d_%H%M", time.localtime(timestamp))
|
||||
display_name = f"配音_{now}"
|
||||
|
||||
with open(audio_path, "rb") as f:
|
||||
wav_data = f.read()
|
||||
|
||||
await storage_service.upload_file(
|
||||
bucket=BUCKET, path=audio_id,
|
||||
file_data=wav_data, content_type="audio/wav",
|
||||
)
|
||||
|
||||
metadata = {
|
||||
"display_name": display_name,
|
||||
"text": req.text,
|
||||
"tts_mode": req.tts_mode,
|
||||
"voice": req.voice if req.tts_mode == "edgetts" else None,
|
||||
"ref_audio_id": req.ref_audio_id,
|
||||
"language": req.language,
|
||||
"duration_sec": duration,
|
||||
"created_at": timestamp,
|
||||
}
|
||||
await storage_service.upload_file(
|
||||
bucket=BUCKET, path=meta_id,
|
||||
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
signed_url = await storage_service.get_signed_url(BUCKET, audio_id)
|
||||
|
||||
task_store.update(task_id, {
|
||||
"status": "completed",
|
||||
"progress": 100,
|
||||
"message": f"配音生成完成 ({duration:.1f}s)",
|
||||
"output": {
|
||||
"audio_id": audio_id,
|
||||
"name": display_name,
|
||||
"path": signed_url,
|
||||
"duration_sec": duration,
|
||||
"text": req.text,
|
||||
"tts_mode": req.tts_mode,
|
||||
"language": req.language,
|
||||
"created_at": timestamp,
|
||||
},
|
||||
})
|
||||
finally:
|
||||
if os.path.exists(audio_path):
|
||||
os.unlink(audio_path)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
task_store.update(task_id, {
|
||||
"status": "failed",
|
||||
"message": f"配音生成失败: {str(e)}",
|
||||
"error": traceback.format_exc(),
|
||||
})
|
||||
logger.error(f"Generate audio failed: {e}")
|
||||
|
||||
|
||||
async def list_generated_audios(user_id: str) -> dict:
|
||||
"""列出用户的所有已生成配音"""
|
||||
files = await storage_service.list_files(BUCKET, user_id)
|
||||
wav_files = [f for f in files if f.get("name", "").endswith("_audio.wav")]
|
||||
|
||||
if not wav_files:
|
||||
return GeneratedAudioListResponse(items=[]).model_dump()
|
||||
|
||||
async def fetch_info(f):
|
||||
name = f.get("name", "")
|
||||
storage_path = f"{user_id}/{name}"
|
||||
meta_name = name.replace("_audio.wav", "_audio.json")
|
||||
meta_path = f"{user_id}/{meta_name}"
|
||||
|
||||
display_name = name
|
||||
text = ""
|
||||
tts_mode = "edgetts"
|
||||
language = "zh-CN"
|
||||
duration_sec = 0.0
|
||||
created_at = 0
|
||||
|
||||
try:
|
||||
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(meta_url)
|
||||
if resp.status_code == 200:
|
||||
meta = resp.json()
|
||||
display_name = meta.get("display_name", name)
|
||||
text = meta.get("text", "")
|
||||
tts_mode = meta.get("tts_mode", "edgetts")
|
||||
language = meta.get("language", "zh-CN")
|
||||
duration_sec = meta.get("duration_sec", 0.0)
|
||||
created_at = meta.get("created_at", 0)
|
||||
except Exception as e:
|
||||
logger.debug(f"读取配音 metadata 失败: {e}")
|
||||
try:
|
||||
created_at = int(name.split("_")[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
signed_url = await storage_service.get_signed_url(BUCKET, storage_path)
|
||||
|
||||
return GeneratedAudioItem(
|
||||
id=storage_path,
|
||||
name=display_name,
|
||||
path=signed_url,
|
||||
duration_sec=duration_sec,
|
||||
text=text,
|
||||
tts_mode=tts_mode,
|
||||
language=language,
|
||||
created_at=created_at,
|
||||
)
|
||||
|
||||
items = await asyncio.gather(*[fetch_info(f) for f in wav_files])
|
||||
items = sorted(items, key=lambda x: x.created_at, reverse=True)
|
||||
return GeneratedAudioListResponse(items=items).model_dump()
|
||||
|
||||
|
||||
async def delete_generated_audio(audio_id: str, user_id: str) -> None:
|
||||
if not audio_id.startswith(f"{user_id}/"):
|
||||
raise PermissionError("无权删除此文件")
|
||||
|
||||
await storage_service.delete_file(BUCKET, audio_id)
|
||||
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
||||
try:
|
||||
await storage_service.delete_file(BUCKET, meta_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
async def rename_generated_audio(audio_id: str, new_name: str, user_id: str) -> dict:
|
||||
if not audio_id.startswith(f"{user_id}/"):
|
||||
raise PermissionError("无权修改此文件")
|
||||
|
||||
new_name = new_name.strip()
|
||||
if not new_name:
|
||||
raise ValueError("新名称不能为空")
|
||||
|
||||
meta_path = audio_id.replace("_audio.wav", "_audio.json")
|
||||
try:
|
||||
meta_url = await storage_service.get_signed_url(BUCKET, meta_path)
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.get(meta_url)
|
||||
if resp.status_code == 200:
|
||||
metadata = resp.json()
|
||||
else:
|
||||
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.warning(f"无法读取配音元数据: {e}, 将创建新的")
|
||||
metadata = {
|
||||
"display_name": new_name,
|
||||
"text": "",
|
||||
"tts_mode": "edgetts",
|
||||
"language": "zh-CN",
|
||||
"duration_sec": 0.0,
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
metadata["display_name"] = new_name
|
||||
await storage_service.upload_file(
|
||||
bucket=BUCKET,
|
||||
path=meta_path,
|
||||
file_data=json.dumps(metadata, ensure_ascii=False).encode("utf-8"),
|
||||
content_type="application/json",
|
||||
)
|
||||
return {"name": new_name}
|
||||
Reference in New Issue
Block a user