172 lines
5.7 KiB
Python
172 lines
5.7 KiB
Python
"""
|
|
声音克隆服务
|
|
通过 HTTP 调用 CosyVoice 3.0 独立服务 (端口 8010)
|
|
"""
|
|
import asyncio
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
from loguru import logger
|
|
|
|
# CosyVoice 3.0 服务地址
|
|
VOICE_CLONE_URL = "http://localhost:8010"
|
|
|
|
|
|
class VoiceCloneService:
|
|
"""声音克隆服务 - 调用 CosyVoice 3.0 HTTP API"""
|
|
|
|
def __init__(self):
|
|
self.base_url = VOICE_CLONE_URL
|
|
# 健康状态缓存
|
|
self._health_cache: Optional[dict] = None
|
|
self._health_cache_time: float = 0
|
|
# GPU 并发锁 (Serial Queue)
|
|
self._lock = asyncio.Lock()
|
|
|
|
async def _generate_once(
|
|
self,
|
|
*,
|
|
text: str,
|
|
ref_audio_data: bytes,
|
|
ref_text: str,
|
|
language: str,
|
|
speed: float = 1.0,
|
|
max_retries: int = 4,
|
|
) -> bytes:
|
|
timeout = httpx.Timeout(240.0)
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
response = await client.post(
|
|
f"{self.base_url}/generate",
|
|
files={"ref_audio": ("ref.wav", ref_audio_data, "audio/wav")},
|
|
data={
|
|
"text": text,
|
|
"ref_text": ref_text,
|
|
"language": language,
|
|
"speed": str(speed),
|
|
},
|
|
)
|
|
|
|
retryable = False
|
|
reason = ""
|
|
|
|
if response.status_code in (429, 502, 503, 504):
|
|
retryable = True
|
|
reason = f"HTTP {response.status_code}"
|
|
elif response.status_code == 500 and (
|
|
"生成超时" in response.text or "timeout" in response.text.lower()
|
|
):
|
|
retryable = True
|
|
reason = "upstream timeout"
|
|
|
|
if retryable and attempt < max_retries - 1:
|
|
wait = 8 * (attempt + 1)
|
|
logger.warning(
|
|
f"Voice clone retryable error ({reason}), retrying in {wait}s "
|
|
f"(attempt {attempt + 1}/{max_retries})"
|
|
)
|
|
await asyncio.sleep(wait)
|
|
continue
|
|
|
|
response.raise_for_status()
|
|
return response.content
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(f"Voice clone API error: {e.response.status_code} - {e.response.text}")
|
|
raise RuntimeError(f"声音克隆服务错误: {e.response.text}")
|
|
except httpx.RequestError as e:
|
|
if attempt < max_retries - 1:
|
|
wait = 6 * (attempt + 1)
|
|
logger.warning(
|
|
f"Voice clone connection error: {e}; retrying in {wait}s "
|
|
f"(attempt {attempt + 1}/{max_retries})"
|
|
)
|
|
await asyncio.sleep(wait)
|
|
continue
|
|
logger.error(f"Voice clone connection error: {e}")
|
|
raise RuntimeError("无法连接声音克隆服务,请检查服务是否启动")
|
|
|
|
raise RuntimeError("声音克隆服务繁忙,请稍后重试")
|
|
|
|
async def generate_audio(
|
|
self,
|
|
text: str,
|
|
ref_audio_path: str,
|
|
ref_text: str,
|
|
output_path: str,
|
|
language: str = "Chinese",
|
|
speed: float = 1.0,
|
|
) -> str:
|
|
"""
|
|
使用声音克隆生成语音
|
|
|
|
Args:
|
|
text: 要合成的文本
|
|
ref_audio_path: 参考音频本地路径
|
|
ref_text: 参考音频的转写文字
|
|
output_path: 输出 wav 路径
|
|
language: 语言 (Chinese/English/Auto)
|
|
|
|
Returns:
|
|
输出文件路径
|
|
"""
|
|
# 使用锁确保串行执行,避免 GPU 显存溢出
|
|
async with self._lock:
|
|
logger.info(f"🎤 Voice Clone: {text[:30]}... (language={language})")
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
text = text.strip()
|
|
if not text:
|
|
raise RuntimeError("文本为空,无法生成语音")
|
|
|
|
with open(ref_audio_path, "rb") as f:
|
|
ref_audio_data = f.read()
|
|
|
|
# CosyVoice 内部自带 text_normalize 分段,无需客户端切分
|
|
audio_bytes = await self._generate_once(
|
|
text=text,
|
|
ref_audio_data=ref_audio_data,
|
|
ref_text=ref_text,
|
|
language=language,
|
|
speed=speed,
|
|
)
|
|
with open(output_path, "wb") as f:
|
|
f.write(audio_bytes)
|
|
logger.info(f"✅ Voice clone saved: {output_path}")
|
|
return output_path
|
|
|
|
async def check_health(self) -> dict:
|
|
"""健康检查"""
|
|
import time
|
|
|
|
# 30秒缓存
|
|
now = time.time()
|
|
cached = self._health_cache
|
|
if cached is not None and (now - self._health_cache_time) < 30:
|
|
return cached
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{self.base_url}/health")
|
|
response.raise_for_status()
|
|
payload = response.json()
|
|
self._health_cache = payload
|
|
self._health_cache_time = now
|
|
return payload
|
|
except Exception as e:
|
|
logger.warning(f"Voice clone health check failed: {e}")
|
|
return {
|
|
"service": "CosyVoice 3.0 Voice Clone",
|
|
"model": "unknown",
|
|
"ready": False,
|
|
"gpu_id": 0,
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
# 单例
|
|
voice_clone_service = VoiceCloneService()
|