Init: 导入NaviGlassServer源码

2025-12-31 15:42:30 +08:00
parent 5baf812ed3
commit 2b6dd49a59
233 changed files with 20236 additions and 178 deletions
--- a/audio_compressor.py
+++ b/audio_compressor.py
@@ -0,0 +1,439 @@
+# audio_compressor.py
+# -*- coding: utf-8 -*-
+"""
+音频压缩工具 - 用于减少网络带宽占用
+支持将16kHz 16bit PCM压缩为更小的格式
+"""
+import os
+import wave
+import struct
+import numpy as np
+from typing import Optional, Tuple
+import logging
+
+logger = logging.getLogger(__name__)
+
+class AudioCompressor:
+    """音频压缩器 - 支持多种压缩算法"""
+    
+    @staticmethod
+    def pcm16_to_ulaw(pcm_data: bytes) -> bytes:
+        """
+        将16位PCM转换为8位μ-law
+        压缩率：50%（16bit -> 8bit）
+        """
+        # 解析16位PCM
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # μ-law压缩
+        ulaw_data = bytearray()
+        for sample in samples:
+            ulaw_byte = AudioCompressor._linear_to_ulaw(sample)
+            ulaw_data.append(ulaw_byte)
+        
+        return bytes(ulaw_data)
+    
+    @staticmethod
+    def ulaw_to_pcm16(ulaw_data: bytes) -> bytes:
+        """
+        将8位μ-law转换回16位PCM
+        """
+        pcm_samples = []
+        for ulaw_byte in ulaw_data:
+            pcm_sample = AudioCompressor._ulaw_to_linear(ulaw_byte)
+            pcm_samples.append(pcm_sample)
+        
+        return np.array(pcm_samples, dtype=np.int16).tobytes()
+    
+    @staticmethod
+    def _linear_to_ulaw(sample: int) -> int:
+        """
+        16位线性PCM转μ-law
+        """
+        # μ-law编码表
+        ULAW_MAX = 0x1FFF
+        ULAW_BIAS = 0x84
+        
+        # 限制范围
+        sample = max(-32768, min(32767, sample))
+        
+        # 获取符号位
+        sign = 0
+        if sample < 0:
+            sign = 0x80
+            sample = -sample
+        
+        # 添加偏置
+        sample = sample + ULAW_BIAS
+        
+        # 限制最大值
+        if sample > ULAW_MAX:
+            sample = ULAW_MAX
+        
+        # 查找指数和尾数
+        exponent = 7
+        for exp in range(7, -1, -1):
+            if sample & (0x4000 >> exp):
+                exponent = exp
+                break
+        
+        mantissa = (sample >> (exponent + 3)) & 0x0F
+        ulawbyte = ~(sign | (exponent << 4) | mantissa) & 0xFF
+        
+        return ulawbyte
+    
+    @staticmethod
+    def _ulaw_to_linear(ulawbyte: int) -> int:
+        """
+        μ-law转16位线性PCM
+        """
+        ULAW_BIAS = 0x84
+        
+        ulawbyte = ~ulawbyte & 0xFF
+        sign = ulawbyte & 0x80
+        exponent = (ulawbyte >> 4) & 0x07
+        mantissa = ulawbyte & 0x0F
+        
+        sample = ((mantissa << 3) + ULAW_BIAS) << exponent
+        
+        if sign:
+            sample = -sample
+            
+        return sample
+    
+    @staticmethod
+    def pcm16_to_adpcm(pcm_data: bytes) -> bytes:
+        """
+        将16位PCM转换为4位ADPCM
+        压缩率：75%（16bit -> 4bit）
+        保持较好的语音质量
+        """
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # IMA ADPCM 步长表
+        step_table = [
+            7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
+            19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
+            50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
+            130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
+            337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
+            876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
+            2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
+            5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
+            15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+        ]
+        
+        # 索引调整表
+        index_table = [-1, -1, -1, -1, 2, 4, 6, 8]
+        
+        # 初始化
+        adpcm_data = bytearray()
+        predicted = 0
+        step_index = 0
+        
+        # 每两个样本打包成一个字节
+        for i in range(0, len(samples), 2):
+            byte = 0
+            
+            for j in range(2):
+                if i + j < len(samples):
+                    sample = samples[i + j]
+                    
+                    # 计算差值
+                    diff = sample - predicted
+                    
+                    # 量化
+                    step = step_table[step_index]
+                    adpcm_sample = 0
+                    
+                    if diff < 0:
+                        adpcm_sample = 8
+                        diff = -diff
+                    
+                    if diff >= step:
+                        adpcm_sample |= 4
+                        diff -= step
+                        
+                    step >>= 1
+                    if diff >= step:
+                        adpcm_sample |= 2
+                        diff -= step
+                        
+                    step >>= 1
+                    if diff >= step:
+                        adpcm_sample |= 1
+                    
+                    # 更新预测值
+                    step = step_table[step_index]
+                    diff = 0
+                    if adpcm_sample & 4:
+                        diff += step
+                    step >>= 1
+                    if adpcm_sample & 2:
+                        diff += step
+                    step >>= 1
+                    if adpcm_sample & 1:
+                        diff += step
+                    step >>= 1
+                    diff += step
+                    
+                    if adpcm_sample & 8:
+                        predicted -= diff
+                    else:
+                        predicted += diff
+                    
+                    # 限制预测值范围
+                    if predicted > 32767:
+                        predicted = 32767
+                    elif predicted < -32768:
+                        predicted = -32768
+                    
+                    # 更新步长索引
+                    step_index += index_table[adpcm_sample & 7]
+                    if step_index < 0:
+                        step_index = 0
+                    elif step_index > 88:
+                        step_index = 88
+                    
+                    # 打包到字节中
+                    if j == 0:
+                        byte = adpcm_sample
+                    else:
+                        byte |= (adpcm_sample << 4)
+            
+            adpcm_data.append(byte)
+        
+        # 添加头部信息：初始预测值和步长索引
+        header = struct.pack('<hB', predicted, step_index)
+        return header + bytes(adpcm_data)
+    
+    @staticmethod
+    def adpcm_to_pcm16(adpcm_data: bytes) -> bytes:
+        """
+        将4位ADPCM转换回16位PCM
+        """
+        if len(adpcm_data) < 3:
+            return b''
+        
+        # 读取头部
+        predicted, step_index = struct.unpack('<hB', adpcm_data[:3])
+        adpcm_bytes = adpcm_data[3:]
+        
+        # IMA ADPCM 步长表
+        step_table = [
+            7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
+            19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
+            50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
+            130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
+            337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
+            876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
+            2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
+            5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
+            15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+        ]
+        
+        # 索引调整表
+        index_table = [-1, -1, -1, -1, 2, 4, 6, 8]
+        
+        pcm_samples = []
+        
+        for byte in adpcm_bytes:
+            # 解码两个4位样本
+            for shift in [0, 4]:
+                adpcm_sample = (byte >> shift) & 0x0F
+                
+                # 计算差值
+                step = step_table[step_index]
+                diff = 0
+                
+                if adpcm_sample & 4:
+                    diff += step
+                step >>= 1
+                if adpcm_sample & 2:
+                    diff += step
+                step >>= 1
+                if adpcm_sample & 1:
+                    diff += step
+                step >>= 1
+                diff += step
+                
+                if adpcm_sample & 8:
+                    predicted -= diff
+                else:
+                    predicted += diff
+                
+                # 限制范围
+                if predicted > 32767:
+                    predicted = 32767
+                elif predicted < -32768:
+                    predicted = -32768
+                
+                pcm_samples.append(predicted)
+                
+                # 更新步长索引
+                step_index += index_table[adpcm_sample & 7]
+                if step_index < 0:
+                    step_index = 0
+                elif step_index > 88:
+                    step_index = 88
+        
+        return np.array(pcm_samples, dtype=np.int16).tobytes()
+    
+    @staticmethod
+    def downsample_pcm16(pcm_data: bytes, from_rate: int = 16000, to_rate: int = 8000) -> bytes:
+        """
+        降采样（可选）
+        16kHz -> 8kHz 可以再减少50%数据量
+        """
+        if from_rate == to_rate:
+            return pcm_data
+            
+        # 解析PCM数据
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # 简单的降采样（每隔一个样本取一个）
+        if from_rate == 16000 and to_rate == 8000:
+            downsampled = samples[::2]
+        else:
+            # 更复杂的重采样需要scipy
+            ratio = to_rate / from_rate
+            new_length = int(len(samples) * ratio)
+            downsampled = np.interp(
+                np.linspace(0, len(samples) - 1, new_length),
+                np.arange(len(samples)),
+                samples
+            ).astype(np.int16)
+        
+        return downsampled.tobytes()
+
+
+class CompressedAudioCache:
+    """压缩音频缓存"""
+    
+    def __init__(self, compression_type: str = "adpcm", use_downsample: bool = False):
+        """
+        compression_type: "none", "ulaw", "adpcm"
+        """
+        self.compression_type = compression_type
+        self.use_downsample = use_downsample
+        self._cache = {}  # {filepath: compressed_data}
+        self._original_sizes = {}  # {filepath: original_size}
+        
+    def load_and_compress(self, filepath: str) -> Optional[bytes]:
+        """加载并压缩音频文件（统一转换为8kHz）"""
+        if filepath in self._cache:
+            return self._cache[filepath]
+        
+        try:
+            with wave.open(filepath, 'rb') as wav:
+                # 检查格式
+                channels = wav.getnchannels()
+                sampwidth = wav.getsampwidth()
+                framerate = wav.getframerate()
+                
+                if channels != 1:
+                    logger.warning(f"{filepath} 不是单声道")
+                if sampwidth != 2:
+                    logger.warning(f"{filepath} 不是16位音频")
+                
+                # 读取所有数据
+                frames = wav.readframes(wav.getnframes())
+                
+                # 如果是立体声，转换为单声道
+                if channels == 2:
+                    import audioop
+                    frames = audioop.tomono(frames, sampwidth, 1, 0)
+                
+                # 【修改】始终转换为16kHz（匹配客户端播放器）
+                if framerate != 16000:
+                    import audioop
+                    frames, _ = audioop.ratecv(frames, sampwidth, 1, framerate, 16000, None)
+                    framerate = 16000
+                
+                # 记录原始大小（转换后的大小）
+                self._original_sizes[filepath] = len(frames)
+                
+                # 压缩
+                if self.compression_type == "ulaw":
+                    compressed = AudioCompressor.pcm16_to_ulaw(frames)
+                    # 添加简单的头部信息（1字节标识 + 4字节原始长度）
+                    header = struct.pack('!BI', 0x01, len(frames))  # 0x01表示μ-law
+                    compressed = header + compressed
+                elif self.compression_type == "adpcm":
+                    compressed = AudioCompressor.pcm16_to_adpcm(frames)
+                    # 添加简单的头部信息（1字节标识 + 4字节原始长度）
+                    header = struct.pack('!BI', 0x02, len(frames))  # 0x02表示ADPCM
+                    compressed = header + compressed
+                else:
+                    compressed = frames
+                
+                self._cache[filepath] = compressed
+                
+                # 打印压缩率
+                compression_ratio = len(compressed) / self._original_sizes[filepath]
+                logger.info(f"[压缩] {os.path.basename(filepath)}: "
+                          f"{self._original_sizes[filepath]} -> {len(compressed)} bytes "
+                          f"({compression_ratio:.1%})")
+                
+                return compressed
+                
+        except Exception as e:
+            logger.error(f"压缩音频失败 {filepath}: {e}")
+            return None
+    
+    def decompress(self, compressed_data: bytes) -> Optional[bytes]:
+        """解压音频数据"""
+        if not compressed_data or len(compressed_data) < 5:
+            return compressed_data
+        
+        try:
+            # 检查头部
+            compression_type = compressed_data[0]
+            if compression_type == 0x01:  # μ-law标识
+                header_size = 5
+                original_length = struct.unpack('!I', compressed_data[1:5])[0]
+                ulaw_data = compressed_data[header_size:]
+                
+                # μ-law解压
+                pcm_data = AudioCompressor.ulaw_to_pcm16(ulaw_data)
+                
+                return pcm_data
+            elif compression_type == 0x02:  # ADPCM标识
+                header_size = 5
+                original_length = struct.unpack('!I', compressed_data[1:5])[0]
+                adpcm_data = compressed_data[header_size:]
+                
+                # ADPCM解压
+                pcm_data = AudioCompressor.adpcm_to_pcm16(adpcm_data)
+                
+                return pcm_data
+            else:
+                # 未压缩的数据
+                return compressed_data
+                
+        except Exception as e:
+            logger.error(f"解压音频失败: {e}")
+            return compressed_data
+    
+    def get_compression_stats(self) -> dict:
+        """获取压缩统计信息"""
+        total_original = sum(self._original_sizes.values())
+        total_compressed = sum(len(data) for data in self._cache.values())
+        
+        return {
+            "files_cached": len(self._cache),
+            "total_original_size": total_original,
+            "total_compressed_size": total_compressed,
+            "compression_ratio": total_compressed / total_original if total_original > 0 else 0,
+            "bytes_saved": total_original - total_compressed
+        }
+
+
+# 全局压缩音频缓存实例
+# 默认使用ADPCM压缩，音质更好，压缩率也不错（75%）
+# 可通过环境变量 AIGLASS_COMPRESS_TYPE 设置: none, ulaw, adpcm
+import os
+compression_type = os.getenv("AIGLASS_COMPRESS_TYPE", "adpcm").lower()
+if compression_type not in ["none", "ulaw", "adpcm"]:
+    compression_type = "adpcm"
+compressed_audio_cache = CompressedAudioCache(compression_type=compression_type, use_downsample=False)