Init: 导入NaviGlassServer源码

This commit is contained in:
Kevin Wong
2025-12-31 15:42:30 +08:00
parent 5baf812ed3
commit 2b6dd49a59
233 changed files with 20236 additions and 178 deletions

202
edge_tts_client.py Normal file
View File

@@ -0,0 +1,202 @@
# edge_tts_client.py
# -*- coding: utf-8 -*-
"""
EdgeTTS 流式语音合成客户端 - Day 21
特点:
- 完全免费
- 流式输出(边合成边播放)
- 低延迟
"""
import os
import asyncio
import edge_tts
from typing import AsyncGenerator, Optional
# 默认语音
DEFAULT_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-XiaoxiaoNeural")
# 语速调整 ("+0%", "+10%", "-10%" 等)
DEFAULT_RATE = os.getenv("EDGE_TTS_RATE", "+0%")
# 音量调整
DEFAULT_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%")
async def text_to_speech_stream(
text: str,
voice: str = DEFAULT_VOICE,
rate: str = DEFAULT_RATE,
volume: str = DEFAULT_VOLUME,
) -> AsyncGenerator[bytes, None]:
"""
流式文本转语音
Args:
text: 要合成的文本
voice: 语音名称
rate: 语速
volume: 音量
Yields:
MP3 音频数据块
"""
if not text or not text.strip():
return
try:
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate,
volume=volume,
)
async for chunk in communicate.stream():
if chunk["type"] == "audio":
yield chunk["data"]
except Exception as e:
print(f"[EdgeTTS] 合成失败: {e}")
async def text_to_speech(
text: str,
voice: str = DEFAULT_VOICE,
rate: str = DEFAULT_RATE,
volume: str = DEFAULT_VOLUME,
) -> bytes:
"""
完整文本转语音(返回完整音频)
Args:
text: 要合成的文本
voice: 语音名称
rate: 语速
volume: 音量
Returns:
MP3 音频数据
"""
audio_chunks = []
async for chunk in text_to_speech_stream(text, voice, rate, volume):
audio_chunks.append(chunk)
return b"".join(audio_chunks)
async def text_to_speech_pcm(
text: str,
voice: str = DEFAULT_VOICE,
rate: str = DEFAULT_RATE,
target_sample_rate: int = 16000,
) -> bytes:
"""
文本转 PCM16 音频(用于直接播放)
Args:
text: 要合成的文本
voice: 语音名称
rate: 语速
target_sample_rate: 目标采样率
Returns:
PCM16 音频数据
"""
import io
from pydub import AudioSegment
# 获取 MP3 数据
mp3_data = await text_to_speech(text, voice, rate)
if not mp3_data:
return b""
try:
# MP3 -> PCM 转换
audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
# 设置采样率和通道
audio = audio.set_frame_rate(target_sample_rate)
audio = audio.set_channels(1) # 单声道
audio = audio.set_sample_width(2) # 16-bit
return audio.raw_data
except Exception as e:
print(f"[EdgeTTS] PCM 转换失败: {e}")
return b""
async def text_to_speech_pcm_stream(
text: str,
voice: str = DEFAULT_VOICE,
rate: str = DEFAULT_RATE,
target_sample_rate: int = 16000,
) -> AsyncGenerator[bytes, None]:
"""
流式文本转 PCM16 音频
注意:由于需要解码 MP3这里采用分段合成的方式
每遇到标点符号就合成一段
Args:
text: 要合成的文本
voice: 语音名称
rate: 语速
target_sample_rate: 目标采样率
Yields:
PCM16 音频数据块
"""
import io
from pydub import AudioSegment
# 按标点分割文本
punctuation = "。,!?;:,.!?;:"
segments = []
current = ""
for char in text:
current += char
if char in punctuation:
segments.append(current.strip())
current = ""
if current.strip():
segments.append(current.strip())
# 逐段合成
for segment in segments:
if not segment:
continue
try:
mp3_data = await text_to_speech(segment, voice, rate)
if mp3_data:
audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
audio = audio.set_frame_rate(target_sample_rate)
audio = audio.set_channels(1)
audio = audio.set_sample_width(2)
yield audio.raw_data
except Exception as e:
print(f"[EdgeTTS] 分段合成失败: {e}")
# 语音列表(常用中文)
CHINESE_VOICES = [
"zh-CN-XiaoxiaoNeural", # 女声,自然
"zh-CN-YunxiNeural", # 男声,自然
"zh-CN-XiaoyiNeural", # 女声,活泼
"zh-CN-YunjianNeural", # 男声,播报
"zh-CN-XiaochenNeural", # 女声,温柔
]
async def list_voices() -> list:
"""列出所有可用语音"""
voices = await edge_tts.list_voices()
return [v for v in voices if v["Locale"].startswith("zh")]