116 lines
3.8 KiB
Python
116 lines
3.8 KiB
Python
"""
|
|
声音克隆服务
|
|
通过 HTTP 调用 Qwen3-TTS 独立服务 (端口 8009)
|
|
"""
|
|
import httpx
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from loguru import logger
|
|
|
|
from app.core.config import settings
|
|
|
|
# Qwen3-TTS 服务地址
|
|
QWEN_TTS_URL = "http://localhost:8009"
|
|
|
|
|
|
class VoiceCloneService:
|
|
"""声音克隆服务 - 调用 Qwen3-TTS HTTP API"""
|
|
|
|
def __init__(self):
|
|
self.base_url = QWEN_TTS_URL
|
|
# 健康状态缓存
|
|
self._health_cache: Optional[dict] = None
|
|
self._health_cache_time: float = 0
|
|
# GPU 并发锁 (Serial Queue)
|
|
self._lock = asyncio.Lock()
|
|
|
|
async def generate_audio(
|
|
self,
|
|
text: str,
|
|
ref_audio_path: str,
|
|
ref_text: str,
|
|
output_path: str,
|
|
language: str = "Chinese"
|
|
) -> str:
|
|
"""
|
|
使用声音克隆生成语音
|
|
|
|
Args:
|
|
text: 要合成的文本
|
|
ref_audio_path: 参考音频本地路径
|
|
ref_text: 参考音频的转写文字
|
|
output_path: 输出 wav 路径
|
|
language: 语言 (Chinese/English/Auto)
|
|
|
|
Returns:
|
|
输出文件路径
|
|
"""
|
|
# 使用锁确保串行执行,避免 GPU 显存溢出
|
|
async with self._lock:
|
|
logger.info(f"🎤 Voice Clone: {text[:30]}...")
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# 读取参考音频
|
|
with open(ref_audio_path, "rb") as f:
|
|
ref_audio_data = f.read()
|
|
|
|
# 调用 Qwen3-TTS 服务
|
|
timeout = httpx.Timeout(300.0) # 5分钟超时
|
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
try:
|
|
response = await client.post(
|
|
f"{self.base_url}/generate",
|
|
files={"ref_audio": ("ref.wav", ref_audio_data, "audio/wav")},
|
|
data={
|
|
"text": text,
|
|
"ref_text": ref_text,
|
|
"language": language
|
|
}
|
|
)
|
|
response.raise_for_status()
|
|
|
|
# 保存返回的音频
|
|
with open(output_path, "wb") as f:
|
|
f.write(response.content)
|
|
|
|
logger.info(f"✅ Voice clone saved: {output_path}")
|
|
return output_path
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"Qwen3-TTS API error: {e.response.status_code} - {e.response.text}")
|
|
raise RuntimeError(f"声音克隆服务错误: {e.response.text}")
|
|
except httpx.RequestError as e:
|
|
logger.error(f"Qwen3-TTS connection error: {e}")
|
|
raise RuntimeError("无法连接声音克隆服务,请检查服务是否启动")
|
|
|
|
async def check_health(self) -> dict:
|
|
"""健康检查"""
|
|
import time
|
|
|
|
# 5分钟缓存
|
|
now = time.time()
|
|
if self._health_cache and (now - self._health_cache_time) < 300:
|
|
return self._health_cache
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{self.base_url}/health")
|
|
response.raise_for_status()
|
|
self._health_cache = response.json()
|
|
self._health_cache_time = now
|
|
return self._health_cache
|
|
except Exception as e:
|
|
logger.warning(f"Qwen3-TTS health check failed: {e}")
|
|
return {
|
|
"service": "Qwen3-TTS Voice Clone",
|
|
"model": "0.6B-Base",
|
|
"ready": False,
|
|
"gpu_id": 0,
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
# 单例
|
|
voice_clone_service = VoiceCloneService()
|