Init: 导入NaviGlassServer源码
This commit is contained in:
202
edge_tts_client.py
Normal file
202
edge_tts_client.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# edge_tts_client.py
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
EdgeTTS 流式语音合成客户端 - Day 21
|
||||
|
||||
特点:
|
||||
- 完全免费
|
||||
- 流式输出(边合成边播放)
|
||||
- 低延迟
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import edge_tts
|
||||
from typing import AsyncGenerator, Optional
|
||||
|
||||
# 默认语音
|
||||
DEFAULT_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-XiaoxiaoNeural")
|
||||
|
||||
# 语速调整 ("+0%", "+10%", "-10%" 等)
|
||||
DEFAULT_RATE = os.getenv("EDGE_TTS_RATE", "+0%")
|
||||
|
||||
# 音量调整
|
||||
DEFAULT_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%")
|
||||
|
||||
|
||||
async def text_to_speech_stream(
|
||||
text: str,
|
||||
voice: str = DEFAULT_VOICE,
|
||||
rate: str = DEFAULT_RATE,
|
||||
volume: str = DEFAULT_VOLUME,
|
||||
) -> AsyncGenerator[bytes, None]:
|
||||
"""
|
||||
流式文本转语音
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
voice: 语音名称
|
||||
rate: 语速
|
||||
volume: 音量
|
||||
|
||||
Yields:
|
||||
MP3 音频数据块
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
try:
|
||||
communicate = edge_tts.Communicate(
|
||||
text=text,
|
||||
voice=voice,
|
||||
rate=rate,
|
||||
volume=volume,
|
||||
)
|
||||
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
yield chunk["data"]
|
||||
|
||||
except Exception as e:
|
||||
print(f"[EdgeTTS] 合成失败: {e}")
|
||||
|
||||
|
||||
async def text_to_speech(
|
||||
text: str,
|
||||
voice: str = DEFAULT_VOICE,
|
||||
rate: str = DEFAULT_RATE,
|
||||
volume: str = DEFAULT_VOLUME,
|
||||
) -> bytes:
|
||||
"""
|
||||
完整文本转语音(返回完整音频)
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
voice: 语音名称
|
||||
rate: 语速
|
||||
volume: 音量
|
||||
|
||||
Returns:
|
||||
MP3 音频数据
|
||||
"""
|
||||
audio_chunks = []
|
||||
async for chunk in text_to_speech_stream(text, voice, rate, volume):
|
||||
audio_chunks.append(chunk)
|
||||
return b"".join(audio_chunks)
|
||||
|
||||
|
||||
async def text_to_speech_pcm(
|
||||
text: str,
|
||||
voice: str = DEFAULT_VOICE,
|
||||
rate: str = DEFAULT_RATE,
|
||||
target_sample_rate: int = 16000,
|
||||
) -> bytes:
|
||||
"""
|
||||
文本转 PCM16 音频(用于直接播放)
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
voice: 语音名称
|
||||
rate: 语速
|
||||
target_sample_rate: 目标采样率
|
||||
|
||||
Returns:
|
||||
PCM16 音频数据
|
||||
"""
|
||||
import io
|
||||
from pydub import AudioSegment
|
||||
|
||||
# 获取 MP3 数据
|
||||
mp3_data = await text_to_speech(text, voice, rate)
|
||||
|
||||
if not mp3_data:
|
||||
return b""
|
||||
|
||||
try:
|
||||
# MP3 -> PCM 转换
|
||||
audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
|
||||
|
||||
# 设置采样率和通道
|
||||
audio = audio.set_frame_rate(target_sample_rate)
|
||||
audio = audio.set_channels(1) # 单声道
|
||||
audio = audio.set_sample_width(2) # 16-bit
|
||||
|
||||
return audio.raw_data
|
||||
|
||||
except Exception as e:
|
||||
print(f"[EdgeTTS] PCM 转换失败: {e}")
|
||||
return b""
|
||||
|
||||
|
||||
async def text_to_speech_pcm_stream(
|
||||
text: str,
|
||||
voice: str = DEFAULT_VOICE,
|
||||
rate: str = DEFAULT_RATE,
|
||||
target_sample_rate: int = 16000,
|
||||
) -> AsyncGenerator[bytes, None]:
|
||||
"""
|
||||
流式文本转 PCM16 音频
|
||||
|
||||
注意:由于需要解码 MP3,这里采用分段合成的方式
|
||||
每遇到标点符号就合成一段
|
||||
|
||||
Args:
|
||||
text: 要合成的文本
|
||||
voice: 语音名称
|
||||
rate: 语速
|
||||
target_sample_rate: 目标采样率
|
||||
|
||||
Yields:
|
||||
PCM16 音频数据块
|
||||
"""
|
||||
import io
|
||||
from pydub import AudioSegment
|
||||
|
||||
# 按标点分割文本
|
||||
punctuation = "。,!?;:,.!?;:"
|
||||
segments = []
|
||||
current = ""
|
||||
|
||||
for char in text:
|
||||
current += char
|
||||
if char in punctuation:
|
||||
segments.append(current.strip())
|
||||
current = ""
|
||||
|
||||
if current.strip():
|
||||
segments.append(current.strip())
|
||||
|
||||
# 逐段合成
|
||||
for segment in segments:
|
||||
if not segment:
|
||||
continue
|
||||
|
||||
try:
|
||||
mp3_data = await text_to_speech(segment, voice, rate)
|
||||
|
||||
if mp3_data:
|
||||
audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
|
||||
audio = audio.set_frame_rate(target_sample_rate)
|
||||
audio = audio.set_channels(1)
|
||||
audio = audio.set_sample_width(2)
|
||||
|
||||
yield audio.raw_data
|
||||
|
||||
except Exception as e:
|
||||
print(f"[EdgeTTS] 分段合成失败: {e}")
|
||||
|
||||
|
||||
# 语音列表(常用中文)
|
||||
CHINESE_VOICES = [
|
||||
"zh-CN-XiaoxiaoNeural", # 女声,自然
|
||||
"zh-CN-YunxiNeural", # 男声,自然
|
||||
"zh-CN-XiaoyiNeural", # 女声,活泼
|
||||
"zh-CN-YunjianNeural", # 男声,播报
|
||||
"zh-CN-XiaochenNeural", # 女声,温柔
|
||||
]
|
||||
|
||||
|
||||
async def list_voices() -> list:
|
||||
"""列出所有可用语音"""
|
||||
voices = await edge_tts.list_voices()
|
||||
return [v for v in voices if v["Locale"].startswith("zh")]
|
||||
Reference in New Issue
Block a user