Init: 导入NaviGlassServer源码

2025-12-31 15:42:30 +08:00
parent 5baf812ed3
commit 2b6dd49a59
233 changed files with 20236 additions and 178 deletions
--- a/edge_tts_client.py
+++ b/edge_tts_client.py
@@ -0,0 +1,202 @@
+# edge_tts_client.py
+# -*- coding: utf-8 -*-
+"""
+EdgeTTS 流式语音合成客户端 - Day 21
+
+特点：
+- 完全免费
+- 流式输出（边合成边播放）
+- 低延迟
+"""
+
+import os
+import asyncio
+import edge_tts
+from typing import AsyncGenerator, Optional
+
+# 默认语音
+DEFAULT_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-XiaoxiaoNeural")
+
+# 语速调整 ("+0%", "+10%", "-10%" 等)
+DEFAULT_RATE = os.getenv("EDGE_TTS_RATE", "+0%")
+
+# 音量调整
+DEFAULT_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%")
+
+
+async def text_to_speech_stream(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    volume: str = DEFAULT_VOLUME,
+) -> AsyncGenerator[bytes, None]:
+    """
+    流式文本转语音
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        volume: 音量
+    
+    Yields:
+        MP3 音频数据块
+    """
+    if not text or not text.strip():
+        return
+    
+    try:
+        communicate = edge_tts.Communicate(
+            text=text,
+            voice=voice,
+            rate=rate,
+            volume=volume,
+        )
+        
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                yield chunk["data"]
+                
+    except Exception as e:
+        print(f"[EdgeTTS] 合成失败: {e}")
+
+
+async def text_to_speech(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    volume: str = DEFAULT_VOLUME,
+) -> bytes:
+    """
+    完整文本转语音（返回完整音频）
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        volume: 音量
+    
+    Returns:
+        MP3 音频数据
+    """
+    audio_chunks = []
+    async for chunk in text_to_speech_stream(text, voice, rate, volume):
+        audio_chunks.append(chunk)
+    return b"".join(audio_chunks)
+
+
+async def text_to_speech_pcm(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    target_sample_rate: int = 16000,
+) -> bytes:
+    """
+    文本转 PCM16 音频（用于直接播放）
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        target_sample_rate: 目标采样率
+    
+    Returns:
+        PCM16 音频数据
+    """
+    import io
+    from pydub import AudioSegment
+    
+    # 获取 MP3 数据
+    mp3_data = await text_to_speech(text, voice, rate)
+    
+    if not mp3_data:
+        return b""
+    
+    try:
+        # MP3 -> PCM 转换
+        audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
+        
+        # 设置采样率和通道
+        audio = audio.set_frame_rate(target_sample_rate)
+        audio = audio.set_channels(1)  # 单声道
+        audio = audio.set_sample_width(2)  # 16-bit
+        
+        return audio.raw_data
+        
+    except Exception as e:
+        print(f"[EdgeTTS] PCM 转换失败: {e}")
+        return b""
+
+
+async def text_to_speech_pcm_stream(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    target_sample_rate: int = 16000,
+) -> AsyncGenerator[bytes, None]:
+    """
+    流式文本转 PCM16 音频
+    
+    注意：由于需要解码 MP3，这里采用分段合成的方式
+    每遇到标点符号就合成一段
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        target_sample_rate: 目标采样率
+    
+    Yields:
+        PCM16 音频数据块
+    """
+    import io
+    from pydub import AudioSegment
+    
+    # 按标点分割文本
+    punctuation = "。，！？；：,.!?;:"
+    segments = []
+    current = ""
+    
+    for char in text:
+        current += char
+        if char in punctuation:
+            segments.append(current.strip())
+            current = ""
+    
+    if current.strip():
+        segments.append(current.strip())
+    
+    # 逐段合成
+    for segment in segments:
+        if not segment:
+            continue
+            
+        try:
+            mp3_data = await text_to_speech(segment, voice, rate)
+            
+            if mp3_data:
+                audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
+                audio = audio.set_frame_rate(target_sample_rate)
+                audio = audio.set_channels(1)
+                audio = audio.set_sample_width(2)
+                
+                yield audio.raw_data
+                
+        except Exception as e:
+            print(f"[EdgeTTS] 分段合成失败: {e}")
+
+
+# 语音列表（常用中文）
+CHINESE_VOICES = [
+    "zh-CN-XiaoxiaoNeural",    # 女声，自然
+    "zh-CN-YunxiNeural",       # 男声，自然
+    "zh-CN-XiaoyiNeural",      # 女声，活泼
+    "zh-CN-YunjianNeural",     # 男声，播报
+    "zh-CN-XiaochenNeural",    # 女声，温柔
+]
+
+
+async def list_voices() -> list:
+    """列出所有可用语音"""
+    voices = await edge_tts.list_voices()
+    return [v for v in voices if v["Locale"].startswith("zh")]