""" 声音克隆服务 通过 HTTP 调用 CosyVoice 3.0 独立服务 (端口 8010) """ import asyncio from pathlib import Path from typing import Optional import httpx from loguru import logger # CosyVoice 3.0 服务地址 VOICE_CLONE_URL = "http://localhost:8010" class VoiceCloneService: """声音克隆服务 - 调用 CosyVoice 3.0 HTTP API""" def __init__(self): self.base_url = VOICE_CLONE_URL # 健康状态缓存 self._health_cache: Optional[dict] = None self._health_cache_time: float = 0 # GPU 并发锁 (Serial Queue) self._lock = asyncio.Lock() async def _generate_once( self, *, text: str, ref_audio_data: bytes, ref_text: str, language: str, speed: float = 1.0, max_retries: int = 4, ) -> bytes: timeout = httpx.Timeout(240.0) for attempt in range(max_retries): try: async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( f"{self.base_url}/generate", files={"ref_audio": ("ref.wav", ref_audio_data, "audio/wav")}, data={ "text": text, "ref_text": ref_text, "language": language, "speed": str(speed), }, ) retryable = False reason = "" if response.status_code in (429, 502, 503, 504): retryable = True reason = f"HTTP {response.status_code}" elif response.status_code == 500 and ( "生成超时" in response.text or "timeout" in response.text.lower() ): retryable = True reason = "upstream timeout" if retryable and attempt < max_retries - 1: wait = 8 * (attempt + 1) logger.warning( f"Voice clone retryable error ({reason}), retrying in {wait}s " f"(attempt {attempt + 1}/{max_retries})" ) await asyncio.sleep(wait) continue response.raise_for_status() return response.content except httpx.HTTPStatusError as e: logger.error(f"Voice clone API error: {e.response.status_code} - {e.response.text}") raise RuntimeError(f"声音克隆服务错误: {e.response.text}") except httpx.RequestError as e: if attempt < max_retries - 1: wait = 6 * (attempt + 1) logger.warning( f"Voice clone connection error: {e}; retrying in {wait}s " f"(attempt {attempt + 1}/{max_retries})" ) await asyncio.sleep(wait) continue logger.error(f"Voice clone connection error: {e}") raise RuntimeError("无法连接声音克隆服务,请检查服务是否启动") raise RuntimeError("声音克隆服务繁忙,请稍后重试") async def generate_audio( self, text: str, ref_audio_path: str, ref_text: str, output_path: str, language: str = "Chinese", speed: float = 1.0, ) -> str: """ 使用声音克隆生成语音 Args: text: 要合成的文本 ref_audio_path: 参考音频本地路径 ref_text: 参考音频的转写文字 output_path: 输出 wav 路径 language: 语言 (Chinese/English/Auto) Returns: 输出文件路径 """ # 使用锁确保串行执行,避免 GPU 显存溢出 async with self._lock: logger.info(f"🎤 Voice Clone: {text[:30]}... (language={language})") Path(output_path).parent.mkdir(parents=True, exist_ok=True) text = text.strip() if not text: raise RuntimeError("文本为空,无法生成语音") with open(ref_audio_path, "rb") as f: ref_audio_data = f.read() # CosyVoice 内部自带 text_normalize 分段,无需客户端切分 audio_bytes = await self._generate_once( text=text, ref_audio_data=ref_audio_data, ref_text=ref_text, language=language, speed=speed, ) with open(output_path, "wb") as f: f.write(audio_bytes) logger.info(f"✅ Voice clone saved: {output_path}") return output_path async def check_health(self) -> dict: """健康检查""" import time # 30秒缓存 now = time.time() cached = self._health_cache if cached is not None and (now - self._health_cache_time) < 30: return cached try: async with httpx.AsyncClient(timeout=5.0) as client: response = await client.get(f"{self.base_url}/health") response.raise_for_status() payload = response.json() self._health_cache = payload self._health_cache_time = now return payload except Exception as e: logger.warning(f"Voice clone health check failed: {e}") return { "service": "CosyVoice 3.0 Voice Clone", "model": "unknown", "ready": False, "gpu_id": 0, "error": str(e) } # 单例 voice_clone_service = VoiceCloneService()