diff --git a/.env.performance b/.env.performance
new file mode 100644
index 0000000..57bf4a3
--- /dev/null
+++ b/.env.performance
@@ -0,0 +1,68 @@
+# ============================================================
+# Day 22 性能优化配置文件
+# 复制此文件为 .env 并根据硬件调整参数
+# ============================================================
+
+# === YOLO 盲道/斑马线检测 ===
+# 输入分辨率 (越小越快，但精度降低)
+# 建议: RTX 3090 用 640, GTX 1060 用 320-384
+AIGLASS_YOLO_IMGSZ=480
+
+# 检测间隔 (每N帧检测一次)
+# 建议: 高端GPU用 6-8, 低端用 12-15
+AIGLASS_BLINDPATH_INTERVAL=10
+
+# 启用FP16半精度 (1=启用, 0=禁用)
+# 注意: GTX 1060 的FP16性能不佳，建议设为0
+AIGLASS_YOLO_HALF=1
+
+# === 障碍物检测 ===
+# 输入分辨率
+AIGLASS_OBS_IMGSZ=480
+
+# 检测间隔
+AIGLASS_OBS_INTERVAL=18
+
+# 缓存帧数
+AIGLASS_OBS_CACHE_FRAMES=12
+
+# 置信度阈值
+AIGLASS_OBS_CONF=0.25
+
+# 启用FP16
+AIGLASS_OBS_HALF=1
+
+# === GPU 并发控制 ===
+# 同时推理的最大任务数
+AIGLASS_GPU_SLOTS=2
+
+# 设备选择 (cuda:0, cuda:1, cpu)
+AIGLASS_DEVICE=cuda:0
+
+# 混合精度模式 (fp16, bf16, off)
+# RTX 30系列支持 bf16, 其他用 fp16
+AIGLASS_AMP=fp16
+
+# === 语音播报 ===
+# 直行提示间隔(秒)
+AIGLASS_STRAIGHT_INTERVAL=4.0
+
+# 方向指令间隔(秒)
+AIGLASS_DIRECTION_INTERVAL=3.0
+
+# 持续播报模式 (1=启用)
+AIGLASS_STRAIGHT_CONTINUOUS=1
+
+# 限制模式下最大重复次数
+AIGLASS_STRAIGHT_LIMIT=2
+
+# === 模型路径 (根据实际路径修改) ===
+# BLIND_PATH_MODEL=models/yolo-seg.pt
+# OBSTACLE_MODEL=models/yoloe-11l-seg.pt
+
+# === 调试选项 ===
+# 启用ASR原始日志
+# ASR_DEBUG_RAW=1
+
+# 启用红绿灯调试图像
+# AIGLASS_DEBUG_TRAFFIC_LIGHT=1
diff --git a/.gitignore b/.gitignore
index 36b13f1..cd0bb7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,176 +1,176 @@
-# ---> Python
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
-.pdm.toml
-.pdm-python
-.pdm-build/
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-
-# Ruff stuff:
-.ruff_cache/
-
-# PyPI configuration file
-.pypirc
-
+# ---> Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..00f5255
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,90 @@
+# 更新日志
+
+本文档记录项目的所有重要变更。
+
+格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/)，
+版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。
+
+## [未发布]
+
+### 新增
+- 首次开源发布
+- 完整的 GitHub 文档（README, CONTRIBUTING, LICENSE 等）
+- Docker 支持
+- 环境变量配置模板
+
+### 修改
+- 优化了 README 文档结构
+- 改进了代码注释
+
+## [1.0.0] - 2025-01-XX
+
+### 新增
+- 🚶 盲道导航系统
+  - 实时盲道检测与分割
+  - 智能语音引导
+  - 障碍物检测与避障
+  - 急转弯检测与提醒
+  - 光流稳定算法
+
+- 🚦 过马路辅助
+  - 斑马线识别与方向检测
+  - 红绿灯颜色识别
+  - 对齐引导系统
+  - 安全提醒
+
+- 🔍 物品识别与查找
+  - YOLO-E 开放词汇检测
+  - MediaPipe 手部引导
+  - 实时目标追踪
+  - 抓取动作检测
+
+- 🎙️ 实时语音交互
+  - 阿里云 Paraformer ASR
+  - Qwen-Omni-Turbo 多模态对话
+  - 智能指令解析
+  - 上下文感知
+
+- 📹 视频与音频处理
+  - WebSocket 实时推流
+  - 音视频同步录制
+  - IMU 数据融合
+  - 多路音频混音
+
+- 🎨 可视化与交互
+  - Web 实时监控界面
+  - IMU 3D 可视化
+  - 状态面板
+  - 中文友好界面
+
+### 技术栈
+- FastAPI + WebSocket
+- YOLO11 / YOLO-E
+- MediaPipe
+- PyTorch + CUDA
+- OpenCV
+- DashScope API
+
+### 已知问题
+- [ ] 在低端 GPU 上可能出现卡顿
+- [ ] macOS 上缺少 GPU 加速支持
+- [ ] 部分中文字体在 Linux 上显示不正确
+
+---
+
+## 版本说明
+
+### 主版本（Major）
+- 不兼容的 API 更改
+
+### 次版本（Minor）
+- 向后兼容的新功能
+
+### 修订版本（Patch）
+- 向后兼容的问题修复
+
+---
+
+[未发布]: https://github.com/yourusername/aiglass/compare/v1.0.0...HEAD
+[1.0.0]: https://github.com/yourusername/aiglass/releases/tag/v1.0.0
+
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..1443dd9
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,59 @@
+# AI Glass System - Dockerfile
+# 基于 NVIDIA CUDA 的 Python 镜像
+
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+
+# 设置环境变量
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV CUDA_HOME=/usr/local/cuda
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+
+# 设置工作目录
+WORKDIR /app
+
+# 安装系统依赖
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3-pip \
+    python3-dev \
+    portaudio19-dev \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    git \
+    wget \
+    curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# 升级 pip
+RUN python3 -m pip install --upgrade pip
+
+# 复制 requirements.txt
+COPY requirements.txt .
+
+# 安装 Python 依赖
+RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 复制应用代码
+COPY . .
+
+# 创建必要的目录
+RUN mkdir -p recordings model music voice static templates
+
+# 暴露端口
+EXPOSE 8081 12345/udp
+
+# 健康检查
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8081/api/health || exit 1
+
+# 启动命令
+CMD ["python3", "app_main.py"]
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4b5f221
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 AI-FanGe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/PROJECT_STRUCTURE.md b/PROJECT_STRUCTURE.md
new file mode 100644
index 0000000..0f19329
--- /dev/null
+++ b/PROJECT_STRUCTURE.md
@@ -0,0 +1,402 @@
+# 项目结构说明
+
+本文档详细说明项目的目录结构和主要文件的作用。
+
+## 📁 目录结构
+
+```
+rebuild1002/
+├── 📄 主要应用文件
+│   ├── app_main.py                    # 主应用入口（FastAPI 服务）
+│   ├── navigation_master.py           # 导航统领器（状态机）
+│   ├── workflow_blindpath.py          # 盲道导航工作流
+│   ├── workflow_crossstreet.py        # 过马路导航工作流
+│   └── yolomedia.py                   # 物品查找工作流
+│
+├── 🎙️ 语音处理模块
+│   ├── asr_core.py                    # 语音识别核心
+│   ├── omni_client.py                 # Qwen-Omni 客户端
+│   ├── qwen_extractor.py              # 标签提取（中文->英文）
+│   ├── audio_player.py                # 音频播放器
+│   └── audio_stream.py                # 音频流管理
+│
+├── 🤖 模型相关
+│   ├── yoloe_backend.py               # YOLO-E 后端（开放词汇）
+│   ├── trafficlight_detection.py      # 红绿灯检测
+│   ├── obstacle_detector_client.py    # 障碍物检测客户端
+│   └── models.py                      # 模型定义
+│
+├── 🎥 视频处理
+│   ├── bridge_io.py                   # 线程安全的帧缓冲
+│   ├── sync_recorder.py               # 音视频同步录制
+│   └── video_recorder.py              # 视频录制（旧版）
+│
+├── 🌐 Web 前端
+│   ├── templates/
+│   │   └── index.html                 # 主界面 HTML
+│   ├── static/
+│   │   ├── main.js                    # 主 JS 脚本
+│   │   ├── vision.js                  # 视觉流处理
+│   │   ├── visualizer.js              # 数据可视化
+│   │   ├── vision_renderer.js         # 渲染器
+│   │   ├── vision.css                 # 样式表
+│   │   └── models/                    # 3D 模型（IMU 可视化）
+│
+├── 🎵 音频资源
+│   ├── music/                         # 系统提示音
+│   │   ├── converted_向上.wav
+│   │   ├── converted_向下.wav
+│   │   └── ...
+│   └── voice/                         # 预录语音
+│       ├── voice_mapping.json
+│       └── *.wav
+│
+├── 🧠 模型文件
+│   └── model/
+│       ├── yolo-seg.pt                # 盲道分割模型
+│       ├── yoloe-11l-seg.pt           # YOLO-E 开放词汇模型
+│       ├── shoppingbest5.pt           # 物品识别模型
+│       ├── trafficlight.pt            # 红绿灯检测模型
+│       └── hand_landmarker.task       # MediaPipe 手部模型
+│
+├── 📹 录制文件
+│   └── recordings/                    # 自动保存的视频和音频
+│       ├── video_*.avi
+│       └── audio_*.wav
+│
+├── 🛠️ ESP32 固件
+│   └── compile/
+│       ├── compile.ino                # Arduino 主程序
+│       ├── camera_pins.h              # 摄像头引脚定义
+│       ├── ICM42688.cpp/h             # IMU 驱动
+│       └── ESP32_VIDEO_OPTIMIZATION.md
+│
+├── 🧪 测试文件
+│   ├── test_recorder.py               # 录制功能测试
+│   ├── test_traffic_light.py          # 红绿灯检测测试
+│   ├── test_cross_street_blindpath.py # 导航测试
+│   └── test_crosswalk_awareness.py    # 斑马线检测测试
+│
+├── 📚 文档
+│   ├── README.md                      # 项目主文档
+│   ├── INSTALLATION.md                # 安装指南
+│   ├── CONTRIBUTING.md                # 贡献指南
+│   ├── FAQ.md                         # 常见问题
+│   ├── CHANGELOG.md                   # 更新日志
+│   ├── SECURITY.md                    # 安全政策
+│   └── PROJECT_STRUCTURE.md           # 本文件
+│
+├── 🐳 Docker 相关
+│   ├── Dockerfile                     # Docker 镜像定义
+│   ├── docker-compose.yml             # Docker Compose 配置
+│   └── .dockerignore                  # Docker 忽略文件
+│
+├── ⚙️ 配置文件
+│   ├── .env.example                   # 环境变量模板
+│   ├── .gitignore                     # Git 忽略文件
+│   ├── requirements.txt               # Python 依赖
+│   ├── setup.sh                       # Linux/macOS 安装脚本
+│   └── setup.bat                      # Windows 安装脚本
+│
+├── 📄 许可证
+│   └── LICENSE                        # MIT 许可证
+│
+└── 🔧 GitHub 相关
+    └── .github/
+        ├── ISSUE_TEMPLATE/
+        │   ├── bug_report.md
+        │   └── feature_request.md
+        └── pull_request_template.md
+```
+
+## 🔑 核心文件说明
+
+### 主应用层
+
+#### `app_main.py`
+- **作用**: FastAPI 主服务，处理所有 WebSocket 连接
+- **主要功能**:
+  - WebSocket 路由管理（/ws/camera, /ws_audio, /ws/viewer 等）
+  - 模型加载与初始化
+  - 状态协调与管理
+  - 音视频流分发
+- **依赖**: 所有其他模块
+- **入口点**: `python app_main.py`
+
+#### `navigation_master.py`
+- **作用**: 导航统领器，管理整个系统的状态机
+- **主要状态**:
+  - IDLE: 空闲
+  - CHAT: 对话模式
+  - BLINDPATH_NAV: 盲道导航
+  - CROSSING: 过马路
+  - TRAFFIC_LIGHT_DETECTION: 红绿灯检测
+  - ITEM_SEARCH: 物品查找
+- **核心方法**:
+  - `process_frame()`: 处理每一帧
+  - `start_blind_path_navigation()`: 启动盲道导航
+  - `start_crossing()`: 启动过马路模式
+  - `on_voice_command()`: 处理语音命令
+
+### 工作流模块
+
+#### `workflow_blindpath.py`
+- **作用**: 盲道导航核心逻辑
+- **主要功能**:
+  - 盲道分割与检测
+  - 障碍物检测
+  - 转弯检测
+  - 光流稳定
+  - 方向引导生成
+- **状态机**:
+  - ONBOARDING: 上盲道
+  - NAVIGATING: 导航中
+  - MANEUVERING_TURN: 转弯
+  - AVOIDING_OBSTACLE: 避障
+
+#### `workflow_crossstreet.py`
+- **作用**: 过马路导航逻辑
+- **主要功能**:
+  - 斑马线检测
+  - 方向对齐
+  - 引导生成
+- **核心方法**:
+  - `_is_crosswalk_near()`: 判断是否接近斑马线
+  - `_compute_angle_and_offset()`: 计算角度和偏移
+
+#### `yolomedia.py`
+- **作用**: 物品查找工作流
+- **主要功能**:
+  - YOLO-E 文本提示检测
+  - MediaPipe 手部追踪
+  - 光流目标追踪
+  - 手部引导（方向提示）
+  - 抓取动作检测
+- **模式**:
+  - SEGMENT: 检测模式
+  - FLASH: 闪烁确认
+  - CENTER_GUIDE: 居中引导
+  - TRACK: 手部追踪
+
+### 语音模块
+
+#### `asr_core.py`
+- **作用**: 阿里云 Paraformer ASR 实时语音识别
+- **主要功能**:
+  - 实时语音识别
+  - VAD（语音活动检测）
+  - 识别结果回调
+- **关键类**: `ASRCallback`
+
+#### `omni_client.py`
+- **作用**: Qwen-Omni-Turbo 多模态对话客户端
+- **主要功能**:
+  - 流式对话生成
+  - 图像+文本输入
+  - 语音输出
+- **核心函数**: `stream_chat()`
+
+#### `audio_player.py`
+- **作用**: 统一的音频播放管理
+- **主要功能**:
+  - TTS 语音播放
+  - 多路音频混音
+  - 音量控制
+  - 线程安全播放
+- **核心函数**: `play_voice_text()`, `play_audio_threadsafe()`
+
+### 模型后端
+
+#### `yoloe_backend.py`
+- **作用**: YOLO-E 开放词汇检测后端
+- **主要功能**:
+  - 文本提示设置
+  - 实时分割
+  - 目标追踪
+- **核心类**: `YoloEBackend`
+
+#### `trafficlight_detection.py`
+- **作用**: 红绿灯检测模块
+- **检测方法**:
+  1. YOLO 模型检测
+  2. HSV 颜色分类（备用）
+- **输出**: 红灯/绿灯/黄灯/未知
+
+#### `obstacle_detector_client.py`
+- **作用**: 障碍物检测客户端
+- **主要功能**:
+  - 白名单类别过滤
+  - 路径掩码内检测
+  - 物体属性计算（面积、位置、危险度）
+
+### 视频处理
+
+#### `bridge_io.py`
+- **作用**: 线程安全的帧缓冲与分发
+- **主要功能**:
+  - 生产者-消费者模式
+  - 原始帧缓存
+  - 处理后帧分发
+- **核心函数**:
+  - `push_raw_jpeg()`: 接收 ESP32 帧
+  - `wait_raw_bgr()`: 取原始帧
+  - `send_vis_bgr()`: 发送处理后的帧
+
+#### `sync_recorder.py`
+- **作用**: 音视频同步录制
+- **主要功能**:
+  - 同步录制视频和音频
+  - 自动文件命名（时间戳）
+  - 线程安全
+- **输出**: `recordings/video_*.avi`, `audio_*.wav`
+
+### 前端
+
+#### `templates/index.html`
+- **作用**: Web 监控界面
+- **主要区域**:
+  - 视频流显示
+  - 状态面板
+  - IMU 3D 可视化
+  - 语音识别结果
+
+#### `static/main.js`
+- **作用**: 主 JavaScript 逻辑
+- **主要功能**:
+  - WebSocket 连接管理
+  - UI 更新
+  - 事件处理
+
+#### `static/vision.js`
+- **作用**: 视觉流处理
+- **主要功能**:
+  - WebSocket 接收视频帧
+  - Canvas 渲染
+  - FPS 计算
+
+#### `static/visualizer.js`
+- **作用**: IMU 3D 可视化（Three.js）
+- **主要功能**:
+  - 接收 IMU 数据
+  - 实时渲染设备姿态
+  - 动态灯光效果
+
+## 🔄 数据流
+
+### 视频流
+```
+ESP32-CAM 
+  → [JPEG] WebSocket /ws/camera 
+  → bridge_io.push_raw_jpeg() 
+  → yolomedia / navigation_master 
+  → bridge_io.send_vis_bgr() 
+  → [JPEG] WebSocket /ws/viewer 
+  → Browser Canvas
+```
+
+### 音频流（上行）
+```
+ESP32-MIC 
+  → [PCM16] WebSocket /ws_audio 
+  → asr_core 
+  → DashScope ASR 
+  → 识别结果 
+  → start_ai_with_text_custom()
+```
+
+### 音频流（下行）
+```
+Qwen-Omni / TTS 
+  → audio_player 
+  → [PCM16] audio_stream 
+  → [WAV] HTTP /stream.wav 
+  → ESP32-Speaker
+```
+
+### IMU 数据流
+```
+ESP32-IMU 
+  → [JSON] UDP 12345 
+  → process_imu_and_maybe_store() 
+  → [JSON] WebSocket /ws 
+  → visualizer.js (Three.js)
+```
+
+## 🎯 关键设计模式
+
+### 1. 状态机模式
+- **位置**: `navigation_master.py`
+- **作用**: 管理系统状态转换
+- **状态**: IDLE → CHAT / BLINDPATH_NAV / CROSSING / ...
+
+### 2. 生产者-消费者模式
+- **位置**: `bridge_io.py`
+- **作用**: 解耦视频接收与处理
+- **实现**: 线程 + 队列
+
+### 3. 策略模式
+- **位置**: 各 `workflow_*.py`
+- **作用**: 不同导航策略的实现
+- **实现**: 统一的 `process_frame()` 接口
+
+### 4. 单例模式
+- **位置**: 模型加载
+- **作用**: 全局共享模型实例
+- **实现**: 全局变量 + 初始化检查
+
+### 5. 观察者模式
+- **位置**: WebSocket 通信
+- **作用**: 多客户端订阅视频流
+- **实现**: `camera_viewers: Set[WebSocket]`
+
+## 📦 依赖关系
+
+```
+app_main.py
+├── navigation_master.py
+│   ├── workflow_blindpath.py
+│   │   ├── yoloe_backend.py
+│   │   └── obstacle_detector_client.py
+│   ├── workflow_crossstreet.py
+│   └── trafficlight_detection.py
+├── yolomedia.py
+│   └── yoloe_backend.py
+├── asr_core.py
+├── omni_client.py
+├── audio_player.py
+├── audio_stream.py
+├── bridge_io.py
+└── sync_recorder.py
+```
+
+## 🚀 启动流程
+
+1. **初始化阶段** (`app_main.py`)
+   - 加载环境变量
+   - 加载导航模型（YOLO、MediaPipe）
+   - 初始化音频系统
+   - 启动录制系统
+   - 预加载红绿灯模型
+
+2. **服务启动** (FastAPI)
+   - 注册 WebSocket 路由
+   - 挂载静态文件
+   - 启动 UDP 监听（IMU）
+   - 启动 HTTP 服务（8081 端口）
+
+3. **运行阶段**
+   - 等待 ESP32 连接
+   - 接收视频/音频/IMU 数据
+   - 处理用户语音指令
+   - 实时推送处理结果
+
+4. **关闭阶段**
+   - 停止录制（保存文件）
+   - 关闭所有 WebSocket 连接
+   - 释放模型资源
+   - 清理临时文件
+
+---
+
+**提示**: 如需了解某个模块的详细实现，请查看相应源文件的注释和 docstring。
+
diff --git a/README.md b/README.md
index 19f64e6..397294e 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,506 @@
-# NaviGlassServer
-
+# AI 智能盲人眼镜系统 🤖👓
+
+<div align="center">
+
+一个面向视障人士的智能导航与辅助系统，集成了盲道导航、过马路辅助、物品识别、实时语音交互等功能。  本项目仅为交流学习使用，请勿直接给视障人群使用。本项目内仅包含代码，模型地址：https://www.modelscope.cn/models/archifancy/AIGlasses_for_navigation  。下载后存放在/model 文件夹
+
+[功能特性](#功能特性) • [快速开始](#快速开始) • [系统架构](#系统架构) • [使用说明](#使用说明) • [开发文档](#开发文档)
+
+</div>
+
+---
+<img width="2481" height="3508" alt="1" src="https://github.com/user-attachments/assets/e8dec4a6-8fa6-4d94-bd66-4e9864b67daf" />
+<img width="2480" height="3508" alt="2" src="https://github.com/user-attachments/assets/bc7d1aac-a9e9-4ef8-9d67-224708d0c9fd" />
+<img width="2481" height="3508" alt="4" src="https://github.com/user-attachments/assets/6dd19750-57af-4560-a007-9a7059956b53" />
+
+## 📋 目录
+
+- [功能特性](#功能特性)
+- [系统要求](#系统要求)
+- [快速开始](#快速开始)
+- [系统架构](#系统架构)
+- [使用说明](#使用说明)
+- [配置说明](#配置说明)
+- [开发文档](#开发文档)
+
+## ✨ 功能特性
+
+### 🚶 盲道导航系统
+- **实时盲道检测**：基于 YOLO 分割模型实时识别盲道
+- **智能语音引导**：提供精准的方向指引（左转、右转、直行等）
+- **障碍物检测与避障**：自动识别前方障碍物并规划避障路线
+- **转弯检测**：自动识别急转弯并提前提醒
+- **光流稳定**：使用 Lucas-Kanade 光流算法稳定掩码，减少抖动
+
+### 🚦 过马路辅助
+- **斑马线识别**：实时检测斑马线位置和方向
+- **红绿灯识别**：基于颜色和形状的红绿灯状态检测
+- **对齐引导**：引导用户对准斑马线中心
+- **安全提醒**：绿灯时语音提示可以通行
+
+### 🔍 物品识别与查找
+- **智能物品搜索**：语音指令查找物品（如"帮我找一下红牛"）
+- **实时目标追踪**：使用 YOLO-E 开放词汇检测 + ByteTrack 追踪
+- **手部引导**：结合 MediaPipe 手部检测，引导用户手部靠近物品
+- **抓取检测**：检测手部握持动作，确认物品已拿到
+- **多模态反馈**：视觉标注 + 语音引导 + 居中提示
+
+### 🎙️ 实时语音交互
+- **语音识别（ASR）**：基于阿里云 DashScope Paraformer 实时语音识别
+- **多模态对话**：Qwen-Omni-Turbo 支持图像+文本输入，语音输出
+- **智能指令解析**：自动识别导航、查找、对话等不同类型指令
+- **上下文感知**：在不同模式下智能过滤无关指令
+
+### 📹 视频与音频处理
+- **实时视频流**：WebSocket 推流，支持多客户端同时观看
+- **音视频同步录制**：自动保存带时间戳的录像和音频文件
+- **IMU 数据融合**：接收 ESP32 的 IMU 数据，支持姿态估计
+- **多路音频混音**：支持系统语音、AI 回复、环境音同时播放
+
+### 🎨 可视化与交互
+- **Web 实时监控**：浏览器端实时查看处理后的视频流
+- **IMU 3D 可视化**：Three.js 实时渲染设备姿态
+- **状态面板**：显示导航状态、检测信息、FPS 等
+- **中文友好**：所有界面和语音使用中文，支持自定义字体
+
+## 💻 系统要求
+
+### 硬件要求
+- **开发/服务器端**：
+  - CPU: Intel i5 或以上（推荐 i7/i9）
+  - GPU: NVIDIA GPU（支持 CUDA 11.8+，推荐 RTX 3060 或以上）
+  - 内存: 8GB RAM（推荐 16GB）
+  - 存储: 10GB 可用空间
+
+- **客户端设备**（可选）：
+  - ESP32-CAM 或其他支持 WebSocket 的摄像头
+  - 麦克风（用于语音输入）
+  - 扬声器/耳机（用于语音输出）
+
+### 软件要求
+- **操作系统**: Windows 10/11, Linux (Ubuntu 20.04+), macOS 10.15+
+- **Python**: 3.9 - 3.11
+- **CUDA**: 11.8 或更高版本（GPU 加速必需）
+- **浏览器**: Chrome 90+, Firefox 88+, Edge 90+（用于 Web 监控）
+
+### API 密钥
+- **阿里云 DashScope API Key**（必需）：
+  - 用于语音识别（ASR）和 Qwen-Omni 对话
+  - 申请地址：https://dashscope.console.aliyun.com/
+
+## 🚀 快速开始
+
+### 1. 克隆项目
+
+```bash
+git clone https://github.com/yourusername/aiglass.git
+cd aiglass/rebuild1002
+```
+
+### 2. 安装依赖
+
+#### 创建虚拟环境（推荐）
+```bash
+python -m venv venv
+# Windows
+venv\Scripts\activate
+# Linux/macOS
+source venv/bin/activate
+```
+
+#### 安装 Python 包
+```bash
+pip install -r requirements.txt
+```
+
+#### 安装 CUDA 和 cuDNN（GPU 加速）
+请参考 [NVIDIA CUDA Toolkit 安装指南](https://developer.nvidia.com/cuda-downloads)
+
+### 3. 下载模型文件
+
+将以下模型文件放入 `model/` 目录：
+
+| 模型文件 | 用途 | 大小 | 下载链接 |
+|---------|------|------|---------|
+| `yolo-seg.pt` | 盲道分割 | ~50MB | [待补充] |
+| `yoloe-11l-seg.pt` | 开放词汇检测 | ~80MB | [待补充] |
+| `shoppingbest5.pt` | 物品识别 | ~30MB | [待补充] |
+| `trafficlight.pt` | 红绿灯检测 | ~20MB | [待补充] |
+| `hand_landmarker.task` | 手部检测 | ~15MB | [MediaPipe Models](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker#models) |
+
+### 4. 配置 API 密钥
+
+创建 `.env` 文件：
+
+```bash
+# .env
+DASHSCOPE_API_KEY=your_api_key_here
+```
+
+或在代码中直接修改（不推荐）：
+```python
+# app_main.py, line 50
+API_KEY = "your_api_key_here"
+```
+
+### 5. 启动系统
+
+```bash
+python app_main.py
+```
+
+系统将在 `http://0.0.0.0:8081` 启动，打开浏览器访问即可看到实时监控界面。
+
+### 6. 连接设备（可选）
+
+如果使用 ESP32-CAM，请：
+1. 烧录 `compile/compile.ino` 到 ESP32
+2. 修改 WiFi 配置，连接到同一网络
+3. ESP32 自动连接到 WebSocket 端点
+
+## 🏗️ 系统架构
+
+### 整体架构
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        客户端层                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐      │
+│  │  ESP32-CAM   │  │   浏览器      │  │   移动端      │      │
+│  │  (视频/音频)  │  │  (监控界面)   │  │  (语音控制)   │      │
+│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘      │
+└─────────┼──────────────────┼──────────────────┼─────────────┘
+          │ WebSocket        │ HTTP/WS          │ WebSocket
+┌─────────┼──────────────────┼──────────────────┼─────────────┐
+│         │                  │                  │              │
+│    ┌────▼──────────────────▼──────────────────▼────────┐    │
+│    │         FastAPI 主服务 (app_main.py)              │    │
+│    │  - WebSocket 路由管理                              │    │
+│    │  - 音视频流分发                                     │    │
+│    │  - 状态管理与协调                                   │    │
+│    └────┬────────────────┬────────────────┬─────────────┘    │
+│         │                │                │                  │
+│  ┌──────▼──────┐  ┌──────▼──────┐  ┌──────▼──────┐         │
+│  │ ASR 模块     │  │ Omni 对话   │  │ 音频播放     │         │
+│  │ (asr_core)   │  │(omni_client)│  │(audio_player)│         │
+│  └──────────────┘  └──────────────┘  └──────────────┘         │
+│                                                               │
+│         应用层                                                │
+└───────────────────────────────────────────────────────────────┘
+          │                  │                  │
+┌─────────▼──────────────────▼──────────────────▼──────────────┐
+│                     导航统领层                                │
+│    ┌─────────────────────────────────────────────────┐       │
+│    │  NavigationMaster (navigation_master.py)         │       │
+│    │  - 状态机：IDLE/CHAT/BLINDPATH_NAV/              │       │
+│    │            CROSSING/TRAFFIC_LIGHT/ITEM_SEARCH    │       │
+│    │  - 模式切换与协调                                │       │
+│    └───┬─────────────────────┬───────────────────┬───┘       │
+│        │                     │                   │            │
+│   ┌────▼────────┐   ┌────────▼────────┐   ┌─────▼──────┐   │
+│   │ 盲道导航     │   │  过马路导航      │   │ 物品查找    │   │
+│   │(blindpath)   │   │ (crossstreet)   │   │(yolomedia)  │   │
+│   └──────────────┘   └──────────────────┘   └─────────────┘   │
+└───────────────────────────────────────────────────────────────┘
+          │                  │                  │
+┌─────────▼──────────────────▼──────────────────▼──────────────┐
+│                       模型推理层                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐       │
+│  │ YOLO 分割     │  │  YOLO-E 检测 │  │ MediaPipe    │       │
+│  │ (盲道/斑马线) │  │ (开放词汇)   │  │  (手部检测)   │       │
+│  └──────────────┘  └──────────────┘  └──────────────┘       │
+│  ┌──────────────┐  ┌──────────────┐                         │
+│  │ 红绿灯检测    │  │ 光流稳定      │                         │
+│  │(HSV+YOLO)     │  │(Lucas-Kanade)│                         │
+│  └──────────────┘  └──────────────┘                         │
+└───────────────────────────────────────────────────────────────┘
+          │
+┌─────────▼─────────────────────────────────────────────────────┐
+│                    外部服务层                                  │
+│  ┌──────────────────────────────────────────────┐            │
+│  │  阿里云 DashScope API                         │            │
+│  │  - Paraformer ASR (实时语音识别)              │            │
+│  │  - Qwen-Omni-Turbo (多模态对话)               │            │
+│  │  - Qwen-Turbo (标签提取)                      │            │
+│  └──────────────────────────────────────────────┘            │
+└───────────────────────────────────────────────────────────────┘
+```
+
+### 核心模块说明
+
+| 模块 | 文件 | 功能 |
+|------|------|------|
+| **主应用** | `app_main.py` | FastAPI 服务、WebSocket 管理、状态协调 |
+| **导航统领** | `navigation_master.py` | 状态机管理、模式切换、语音节流 |
+| **盲道导航** | `workflow_blindpath.py` | 盲道检测、避障、转弯引导 |
+| **过马路导航** | `workflow_crossstreet.py` | 斑马线检测、红绿灯识别、对齐引导 |
+| **物品查找** | `yolomedia.py` | 物品检测、手部引导、抓取确认 |
+| **语音识别** | `asr_core.py` | 实时 ASR、VAD、指令解析 |
+| **语音合成** | `omni_client.py` | Qwen-Omni 流式语音生成 |
+| **音频播放** | `audio_player.py` | 多路混音、TTS 播放、音量控制 |
+| **视频录制** | `sync_recorder.py` | 音视频同步录制 |
+| **桥接 IO** | `bridge_io.py` | 线程安全的帧缓冲与分发 |
+
+## 📖 使用说明
+
+### 语音指令
+
+系统支持以下语音指令（说话时无需唤醒词）：
+
+#### 导航控制
+```
+"开始导航" / "盲道导航"     → 启动盲道导航
+"停止导航" / "结束导航"     → 停止盲道导航
+"开始过马路" / "帮我过马路"  → 启动过马路模式
+"过马路结束" / "结束过马路"  → 停止过马路模式
+```
+
+#### 红绿灯检测
+```
+"检测红绿灯" / "看红绿灯"   → 启动红绿灯检测
+"停止检测" / "停止红绿灯"   → 停止检测
+```
+
+#### 物品查找
+```
+"帮我找一下 [物品名]"       → 启动物品搜索
+  示例：
+  - "帮我找一下红牛"
+  - "找一下AD钙奶"
+  - "帮我找矿泉水"
+"找到了" / "拿到了"         → 确认找到物品
+```
+
+#### 智能对话
+```
+"帮我看看这是什么"          → 拍照识别
+"这个东西能吃吗"            → 物品咨询
+任何其他问题                 → AI 对话
+```
+
+### 导航状态说明
+
+系统包含以下主要状态（自动切换）：
+
+1. **IDLE** - 空闲状态
+   - 等待用户指令
+   - 显示原始视频流
+
+2. **CHAT** - 对话模式
+   - 与 AI 进行多模态对话
+   - 暂停导航功能
+
+3. **BLINDPATH_NAV** - 盲道导航
+   - **ONBOARDING**: 上盲道引导
+     - ROTATION: 旋转对准盲道
+     - TRANSLATION: 平移至盲道中心
+   - **NAVIGATING**: 沿盲道行走
+     - 实时方向修正
+     - 障碍物检测
+   - **MANEUVERING_TURN**: 转弯处理
+   - **AVOIDING_OBSTACLE**: 避障
+
+4. **CROSSING** - 过马路模式
+   - **SEEKING_CROSSWALK**: 寻找斑马线
+   - **WAIT_TRAFFIC_LIGHT**: 等待绿灯
+   - **CROSSING**: 过马路中
+   - **SEEKING_NEXT_BLINDPATH**: 寻找对面盲道
+
+5. **ITEM_SEARCH** - 物品查找
+   - 实时检测目标物品
+   - 引导手部靠近
+   - 确认抓取
+
+6. **TRAFFIC_LIGHT_DETECTION** - 红绿灯检测
+   - 实时检测红绿灯状态
+   - 语音播报颜色变化
+
+### Web 监控界面
+
+打开浏览器访问 `http://localhost:8081`，可以看到：
+
+- **实时视频流**：显示处理后的视频，包括导航标注
+- **状态面板**：当前模式、检测信息、FPS
+- **IMU 可视化**：设备姿态 3D 实时渲染
+- **语音识别结果**：显示识别的文字和 AI 回复
+
+### WebSocket 端点
+
+| 端点 | 用途 | 数据格式 |
+|------|------|---------|
+| `/ws/camera` | ESP32 相机推流 | Binary (JPEG) |
+| `/ws/viewer` | 浏览器订阅视频 | Binary (JPEG) |
+| `/ws_audio` | ESP32 音频上传 | Binary (PCM16) |
+| `/ws_ui` | UI 状态推送 | JSON |
+| `/ws` | IMU 数据接收 | JSON |
+| `/stream.wav` | 音频下载流 | Binary (WAV) |
+
+## ⚙️ 配置说明
+
+### 环境变量
+
+创建 `.env` 文件配置以下参数：
+
+```bash
+# 阿里云 API
+DASHSCOPE_API_KEY=sk-xxxxx
+
+# 模型路径（可选，使用默认路径可不配置）
+BLIND_PATH_MODEL=model/yolo-seg.pt
+OBSTACLE_MODEL=model/yoloe-11l-seg.pt
+YOLOE_MODEL_PATH=model/yoloe-11l-seg.pt
+
+# 导航参数
+AIGLASS_MASK_MIN_AREA=1500      # 最小掩码面积
+AIGLASS_MASK_MORPH=3            # 形态学核大小
+AIGLASS_MASK_MISS_TTL=6         # 掩码丢失容忍帧数
+AIGLASS_PANEL_SCALE=0.65        # 数据面板缩放
+
+# 音频配置
+TTS_INTERVAL_SEC=1.0            # 语音播报间隔
+ENABLE_TTS=true                 # 启用语音播报
+```
+
+### 修改模型路径
+
+如果模型文件不在默认位置，可以在相应文件中修改：
+
+```python
+# workflow_blindpath.py
+seg_model_path = "your/custom/path/yolo-seg.pt"
+
+# yolomedia.py
+YOLO_MODEL_PATH = "your/custom/path/shoppingbest5.pt"
+HAND_TASK_PATH = "your/custom/path/hand_landmarker.task"
+```
+
+### 调整性能参数
+
+根据硬件性能调整：
+
+```python
+# yolomedia.py
+HAND_DOWNSCALE = 0.8    # 手部检测降采样（越小越快，精度降低）
+HAND_FPS_DIV = 1        # 手部检测抽帧（2=隔帧，3=每3帧）
+
+# workflow_blindpath.py  
+FEATURE_PARAMS = dict(
+    maxCorners=600,      # 光流特征点数（越少越快）
+    qualityLevel=0.001,  # 特征点质量
+    minDistance=5        # 特征点最小间距
+)
+```
+
+## 🛠️ 开发文档
+
+### 添加新的语音指令
+
+1. 在 `app_main.py` 的 `start_ai_with_text_custom()` 函数中添加：
+
+```python
+# 检查新指令
+if "新指令关键词" in user_text:
+    # 执行自定义逻辑
+    print("[CUSTOM] 新指令被触发")
+    await ui_broadcast_final("[系统] 新功能已启动")
+    return
+```
+
+2. 如需修改指令过滤规则：
+
+```python
+# 修改 allowed_keywords 列表
+allowed_keywords = ["帮我看", "帮我找", "你的新关键词"]
+```
+
+### 扩展导航功能
+
+1. 在 `workflow_blindpath.py` 添加新状态：
+
+```python
+# 在 BlindPathNavigator.__init__() 中初始化
+self.your_new_state_var = False
+
+# 在 process_frame() 中处理
+def process_frame(self, image):
+    if self.your_new_state_var:
+        # 自定义处理逻辑
+        guidance_text = "新状态引导"
+    # ...
+```
+
+2. 在 `navigation_master.py` 添加状态机状态：
+
+```python
+class NavigationMaster:
+    def start_your_new_mode(self):
+        self.state = "YOUR_NEW_MODE"
+        # 初始化逻辑
+```
+
+### 集成新模型
+
+1. 创建模型包装类：
+
+```python
+# your_model_wrapper.py
+class YourModelWrapper:
+    def __init__(self, model_path):
+        self.model = load_your_model(model_path)
+    
+    def detect(self, image):
+        # 推理逻辑
+        return results
+```
+
+2. 在 `app_main.py` 中加载：
+
+```python
+your_model = YourModelWrapper("model/your_model.pt")
+```
+
+3. 在相应的工作流中调用：
+
+```python
+results = your_model.detect(image)
+```
+
+### 调试技巧
+
+1. **启用详细日志**：
+
+```python
+# app_main.py 顶部
+import logging
+logging.basicConfig(level=logging.DEBUG)
+```
+
+2. **查看帧率瓶颈**：
+
+```python
+# yolomedia.py
+PERF_DEBUG = True  # 打印处理时间
+```
+
+3. **测试单个模块**：
+
+```bash
+# 测试盲道导航
+python test_cross_street_blindpath.py
+
+# 测试红绿灯检测
+python test_traffic_light.py
+
+# 测试录制功能
+python test_recorder.py
+```
+
+
+
+
+
+## 📄 许可证
+
+本项目采用 MIT 许可证 - 详见 [LICENSE](LICENSE) 文件
+
+
diff --git a/ai_voice_pipeline.py b/ai_voice_pipeline.py
new file mode 100644
index 0000000..9d6798d
--- /dev/null
+++ b/ai_voice_pipeline.py
@@ -0,0 +1,154 @@
+# ai_voice_pipeline.py
+# -*- coding: utf-8 -*-
+"""
+AI 语音交互管道 - Day 21
+
+整合 SenseVoice + GLM-4.5-Flash + EdgeTTS
+
+流程：
+1. 客户端 VAD 检测语音结束
+2. 发送完整音频到服务器
+3. SenseVoice 识别 → GLM 生成回复 → EdgeTTS 合成语音
+4. 流式返回 PCM 音频
+"""
+
+import asyncio
+from typing import Optional, Callable, AsyncGenerator
+
+# 导入各模块
+from sensevoice_asr import recognize as asr_recognize, init_sensevoice
+from glm_client import chat as llm_chat, chat_stream as llm_chat_stream
+from edge_tts_client import (
+    text_to_speech_pcm_stream,
+    text_to_speech_pcm,
+    DEFAULT_VOICE,
+)
+
+
+async def init_pipeline():
+    """初始化 AI 管道（服务器启动时调用）"""
+    await init_sensevoice()
+    print("[AI Pipeline] 初始化完成")
+
+
+async def process_voice(
+    pcm_audio: bytes,
+    image_base64: Optional[str] = None,
+    on_text: Optional[Callable[[str], None]] = None,
+    on_audio: Optional[Callable[[bytes], None]] = None,
+) -> str:
+    """
+    处理语音输入，返回 AI 回复
+    
+    Args:
+        pcm_audio: PCM16 音频数据 (16kHz, mono)
+        image_base64: 可选的图片（用于多模态）
+        on_text: 文本回调（用于 UI 显示）
+        on_audio: 音频回调（用于流式播放）
+    
+    Returns:
+        AI 回复文本
+    """
+    # 1. 语音识别
+    user_text = await asr_recognize(pcm_audio)
+    
+    if not user_text:
+        print("[AI Pipeline] 未识别到有效语音")
+        return ""
+    
+    print(f"[AI Pipeline] 用户说: {user_text}")
+    
+    # 通知 UI
+    if on_text:
+        on_text(f"用户: {user_text}")
+    
+    # 2. LLM 生成回复
+    ai_response = await llm_chat(user_text, image_base64)
+    
+    if not ai_response:
+        print("[AI Pipeline] AI 无回复")
+        return ""
+    
+    print(f"[AI Pipeline] AI 回复: {ai_response}")
+    
+    # 通知 UI
+    if on_text:
+        on_text(f"AI: {ai_response}")
+    
+    # 3. TTS 合成并播放
+    if on_audio:
+        async for audio_chunk in text_to_speech_pcm_stream(ai_response):
+            on_audio(audio_chunk)
+    
+    return ai_response
+
+
+async def process_voice_stream(
+    pcm_audio: bytes,
+    image_base64: Optional[str] = None,
+) -> AsyncGenerator[tuple, None]:
+    """
+    流式处理语音输入
+    
+    Args:
+        pcm_audio: PCM16 音频数据
+        image_base64: 可选的图片
+    
+    Yields:
+        ("text", str) - 文本片段
+        ("audio", bytes) - 音频片段
+    """
+    # 1. 语音识别
+    user_text = await asr_recognize(pcm_audio)
+    
+    if not user_text:
+        return
+    
+    yield ("user_text", user_text)
+    
+    # 2. LLM 流式生成 + 3. TTS 流式合成
+    # 收集一定长度的文本后送 TTS
+    buffer = ""
+    punctuation = "。，！？；：,.!?;:"
+    
+    async for text_chunk in llm_chat_stream(user_text, image_base64):
+        yield ("ai_text", text_chunk)
+        buffer += text_chunk
+        
+        # 遇到标点时合成音频
+        if buffer and buffer[-1] in punctuation:
+            async for audio_chunk in text_to_speech_pcm_stream(buffer):
+                yield ("audio", audio_chunk)
+            buffer = ""
+    
+    # 处理剩余文本
+    if buffer.strip():
+        async for audio_chunk in text_to_speech_pcm_stream(buffer):
+            yield ("audio", audio_chunk)
+
+
+async def text_to_voice(text: str) -> bytes:
+    """
+    文本转语音（用于导航提示等）
+    
+    Args:
+        text: 要合成的文本
+    
+    Returns:
+        PCM16 音频数据
+    """
+    return await text_to_speech_pcm(text)
+
+
+async def text_to_voice_stream(text: str) -> AsyncGenerator[bytes, None]:
+    """
+    流式文本转语音
+    
+    Args:
+        text: 要合成的文本
+    
+    Yields:
+        PCM16 音频块
+    """
+    async for chunk in text_to_speech_pcm_stream(text):
+        yield chunk
diff --git a/app_main.py b/app_main.py
new file mode 100644
index 0000000..8ebbbb3
--- /dev/null
+++ b/app_main.py
@@ -0,0 +1,1868 @@
+# app_main.py
+# -*- coding: utf-8 -*-
+import os, sys, time, json, asyncio, base64, audioop
+from typing import Any, Dict, Optional, Tuple, List, Callable, Set, Deque
+from collections import deque
+from dataclasses import dataclass
+from concurrent.futures import ThreadPoolExecutor
+import re
+from dotenv import load_dotenv
+
+# 加载环境变量 (Day 18: 修复 GPU 选择等配置不生效的问题)
+load_dotenv()
+# 在其它 import 之后加：
+
+from qwen_extractor import extract_english_label
+from navigation_master import NavigationMaster, OrchestratorResult 
+# 新增：导入盲道导航器
+from workflow_blindpath import BlindPathNavigator
+# 新增：导入过马路导航器
+from workflow_crossstreet import CrossStreetNavigator
+import torch
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
+from fastapi.responses import HTMLResponse, PlainTextResponse
+from fastapi.staticfiles import StaticFiles
+from starlette.websockets import WebSocketState
+import uvicorn
+import cv2
+import numpy as np
+
+# 【Day 19 优化】TurboJPEG - 比 cv2.imencode/imdecode 快 2-3 倍
+# Day 20: TensorRT 模型加载工具
+from model_utils import get_best_model_path
+try:
+    from turbojpeg import TurboJPEG
+    _turbo_jpeg = TurboJPEG()
+    print("[INIT] TurboJPEG 加载成功，JPEG 编解码将使用加速版本")
+except ImportError:
+    _turbo_jpeg = None
+    print("[INIT] TurboJPEG 未安装，使用 cv2 作为回退 (pip install PyTurboJPEG)")
+
+from ultralytics import YOLO
+from obstacle_detector_client import ObstacleDetectorClient
+from contextlib import asynccontextmanager
+
+# Day 18: 删除了重复的 import torch（已在 L17 导入）
+
+
+import mediapipe as mp
+import bridge_io
+import threading
+import yolomedia  # 确保和 app_main.py 同目录，文件名就是 yolomedia.py
+# ---- Windows 事件循环策略 ----
+if sys.platform.startswith("win"):
+    try:
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+    except Exception:
+        pass
+
+# ---- .env ----
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except Exception:
+    pass
+
+# ---- Day 21: 新 AI 管道 (SenseVoice + GLM-4.6v-Flash + EdgeTTS) ----
+# 选择使用新的 AI 管道还是旧的 DashScope/Omni
+USE_NEW_AI_PIPELINE = os.getenv("USE_NEW_AI_PIPELINE", "1") == "1"
+
+if USE_NEW_AI_PIPELINE:
+    # 新管道：本地 ASR + GLM + EdgeTTS + 服务器端 VAD
+    from sensevoice_asr import recognize as sensevoice_recognize, init_sensevoice
+    from glm_client import chat as glm_chat
+    from edge_tts_client import text_to_speech_pcm_stream
+    from server_vad import get_server_vad, reset_server_vad
+    print("[AI Pipeline] 使用新管道: SenseVoice + GLM-4.6v-Flash + EdgeTTS + Server VAD")
+else:
+    # 旧管道：DashScope ASR + Omni
+    from dashscope import audio as dash_audio
+    API_KEY = os.getenv("DASHSCOPE_API_KEY", "sk-a9440db694924559ae4ebdc2023d2b9a")
+    MODEL = "paraformer-realtime-v2"
+    from omni_client import stream_chat, OmniStreamPiece
+    from asr_core import ASRCallback, set_current_recognition, stop_current_recognition
+    print("[AI Pipeline] 使用旧管道: DashScope + Qwen-Omni")
+
+# 通用常量
+AUDIO_FMT = "pcm"
+SAMPLE_RATE = 16000
+SILENCE_CHUNK = b'\x00' * 640  # 20ms 静音
+
+# 兼容层：当使用新管道时，提供 ASR 相关函数的 stub
+if USE_NEW_AI_PIPELINE:
+    # 新管道不使用流式 ASR，但需要保持函数存在避免导入错误
+    async def set_current_recognition(rec): pass
+    async def stop_current_recognition(): pass
+    class ASRCallback:
+        def __init__(self, **kwargs): pass
+
+
+from audio_stream import (
+    hard_reset_audio,
+    BYTES_PER_20MS_16K,
+    is_playing_now,
+    current_ai_task,
+    register_stream_route,
+    broadcast_pcm16_realtime,
+)
+from audio_player import initialize_audio_system, play_voice_text
+
+# ---- 同步录制器 ----
+import sync_recorder
+import signal
+import atexit
+
+# ---- IMU UDP ----
+UDP_IP   = "0.0.0.0"
+UDP_PORT = 12345
+
+
+# ---- 【新】lifespan 管理器（替代 on_event） ----
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """应用生命周期管理器 - 替代 on_event 装饰器"""
+    # === 启动逻辑（原 @app.on_event("startup") 的内容） ===
+    print("[LIFESPAN] 应用启动中...")
+    
+    # 1. 注册 bridge_io 发送回调
+    main_loop = asyncio.get_event_loop()
+    
+    def _sender(jpeg_bytes: bytes):
+        try:
+            if main_loop.is_closed():
+                return
+            
+            global yolomedia_sending_frames
+            if not yolomedia_sending_frames:
+                yolomedia_sending_frames = True
+                print("[YOLOMEDIA] 开始发送处理后的帧，切换到YOLO画面", flush=True)
+            
+            async def _broadcast():
+                if not camera_viewers:
+                    return
+                dead = []
+                for ws in list(camera_viewers):
+                    try:
+                        await ws.send_bytes(jpeg_bytes)
+                    except Exception:
+                        dead.append(ws)
+                for ws in dead:
+                    try:
+                        camera_viewers.remove(ws)
+                    except Exception:
+                        pass
+            
+            future = asyncio.run_coroutine_threadsafe(_broadcast(), main_loop)
+        except Exception as e:
+            if "Event loop is closed" not in str(e):
+                print(f"[DEBUG] _sender error: {e}", flush=True)
+    
+    bridge_io.set_sender(_sender)
+    
+    # 2. 初始化音频系统（后台线程）
+    def _init_audio():
+        try:
+            initialize_audio_system()
+        except Exception as e:
+            print(f"[AUDIO] 初始化失败: {e}")
+    
+    threading.Thread(target=_init_audio, daemon=True).start()
+    
+    # 3. 启动 UDP 服务器
+    loop = asyncio.get_running_loop()
+    await loop.create_datagram_endpoint(lambda: UDPProto(), local_addr=(UDP_IP, UDP_PORT))
+    
+    # 4. Day 21: 预加载新 AI 管道模型（避免首次使用时延迟）
+    if USE_NEW_AI_PIPELINE:
+        async def _preload_models():
+            try:
+                print("[PRELOAD] 预加载 Silero VAD...")
+                from server_vad import get_server_vad
+                get_server_vad()  # 触发 VAD 模型加载
+                
+                print("[PRELOAD] 预加载 SenseVoice ASR...")
+                from sensevoice_asr import init_sensevoice
+                await init_sensevoice()  # 异步加载 ASR 模型
+                
+                print("[PRELOAD] 新 AI 管道模型预加载完成")
+            except Exception as e:
+                print(f"[PRELOAD] 模型预加载失败: {e}")
+        
+        # 后台预加载，不阻塞启动
+        asyncio.create_task(_preload_models())
+    
+    print("[LIFESPAN] 应用启动完成")
+    
+    # === yield 表示应用开始运行 ===
+    # Day 13: 使用 try-finally 确保关闭逻辑执行，并捕获 CancelledError
+    try:
+        yield
+    except asyncio.CancelledError:
+        # Ctrl+C 时 Starlette 会取消 lifespan，这是正常行为
+        pass
+    finally:
+        # === 关闭逻辑（原 @app.on_event("shutdown") 的内容） ===
+        print("[LIFESPAN] 应用关闭中...")
+        
+        # 停止YOLO媒体处理
+        try:
+            stop_yolomedia()
+        except Exception:
+            pass
+        
+        # 停止音频和AI任务
+        try:
+            await hard_reset_audio("shutdown")
+        except Exception:
+            pass
+        
+        # 【Day 15】关闭帧处理线程池
+        try:
+            frame_processing_executor.shutdown(wait=False)
+            print("[LIFESPAN] 帧处理线程池已关闭")
+        except Exception:
+            pass
+        
+        print("[LIFESPAN] 应用关闭完成")
+
+        
+        # Day 13: 强制退出进程，避免 uvicorn 挂起
+        # 注意：不能在这里 import threading 或 os，否则会破坏 Python 作用域
+        # 顶层已经导入了这些模块
+        def _force_exit():
+            import time as _time
+            import os as _os
+            _time.sleep(0.5)  # 给其他清理一点时间
+            _os._exit(0)
+        import threading as _threading
+        _threading.Thread(target=_force_exit, daemon=True).start()
+
+
+app = FastAPI(lifespan=lifespan)
+
+# ====== 状态与容器 ======
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+ui_clients: Dict[int, WebSocket] = {}
+current_partial: str = ""
+recent_finals: List[str] = []
+RECENT_MAX = 50
+last_frames: Deque[Tuple[float, bytes]] = deque(maxlen=10)
+
+camera_viewers: Set[WebSocket] = set()
+esp32_camera_ws: Optional[WebSocket] = None
+imu_ws_clients: Set[WebSocket] = set()
+esp32_audio_ws: Optional[WebSocket] = None
+
+# 【新增】盲道导航相关全局变量
+blind_path_navigator = None
+navigation_active = False
+yolo_seg_model = None
+obstacle_detector = None
+
+# 【新增】过马路导航相关全局变量
+cross_street_navigator = None
+cross_street_active = False
+orchestrator = None  # 新增
+
+# 【新增】omni对话状态标志
+omni_conversation_active = False  # 标记omni对话是否正在进行
+omni_previous_nav_state = None  # 保存omni激活前的导航状态，用于恢复
+
+# 【Day 15 性能优化】帧处理线程池 - Day 18 优化: 增加worker数量
+# 将 CPU 密集型的帧处理移至后台线程，避免阻塞事件循环
+frame_processing_executor = ThreadPoolExecutor(max_workers=3, thread_name_prefix="frame_proc")
+
+# 【Day 15 跳帧机制】异步帧处理状态
+# 避免 await 阻塞，使用后台任务 + 最新结果缓存
+_nav_processing_task = None  # 当前的后台处理任务
+_nav_last_result_image = None  # 最后一次成功处理的输出图像
+_nav_last_result_jpeg: bytes = None  # 【Day 19 优化】缓存编码后的 JPEG，避免重复编码
+_nav_pending_frame = None  # 等待处理的最新帧
+_nav_processing_lock = asyncio.Lock()  # 确保单任务运行
+_nav_task_start_time = None  # Day 20: 任务开始时间，用于计算处理耗时
+
+# 【Day 18 性能优化】并行广播辅助函数 - 解决 WebSocket 顺序发送阻塞
+
+# 【Day 19 优化】TurboJPEG 辅助函数 - 带回退逻辑
+def turbo_decode(jpeg_bytes: bytes):
+    """解码 JPEG 为 BGR numpy 数组，优先使用 TurboJPEG"""
+    if _turbo_jpeg:
+        return _turbo_jpeg.decode(jpeg_bytes)
+    else:
+        arr = np.frombuffer(jpeg_bytes, dtype=np.uint8)
+        return cv2.imdecode(arr, cv2.IMREAD_COLOR)
+
+def turbo_encode(bgr_image, quality: int = 80) -> bytes:
+    """编码 BGR numpy 数组为 JPEG bytes，优先使用 TurboJPEG"""
+    if _turbo_jpeg:
+        return _turbo_jpeg.encode(bgr_image, quality=quality)
+    else:
+        ok, enc = cv2.imencode(".jpg", bgr_image, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
+        return enc.tobytes() if ok else None
+
+async def _broadcast_to_viewers(jpeg_data: bytes) -> None:
+    """并行向所有 viewer 广播 JPEG 帧，避免顺序 await 阻塞事件循环"""
+    if not camera_viewers or not jpeg_data:
+        return
+    
+    viewers = list(camera_viewers)
+    if not viewers:
+        return
+    
+    # 使用 asyncio.gather 并行发送，return_exceptions=True 确保单个失败不影响其他
+    async def _safe_send(ws):
+        try:
+            await ws.send_bytes(jpeg_data)
+            return None
+        except Exception:
+            return ws  # 返回失败的 ws 以便移除
+    
+    results = await asyncio.gather(*[_safe_send(ws) for ws in viewers], return_exceptions=True)
+    
+    # 清理失败的连接
+    for r in results:
+        if r is not None and r in camera_viewers:
+            try:
+                camera_viewers.discard(r)
+            except Exception:
+                pass
+
+
+def load_navigation_models():
+    """加载盲道导航所需的模型"""
+    global yolo_seg_model, obstacle_detector
+
+    try:
+        seg_model_path = os.getenv("BLIND_PATH_MODEL", "model/yolo-seg.pt")
+        # Day 20: 优先使用 TensorRT 引擎
+        seg_model_path = get_best_model_path(seg_model_path)
+        #print(f"[NAVIGATION] 尝试加载模型: {seg_model_path}")
+
+        if os.path.exists(seg_model_path):
+            print(f"[NAVIGATION] 模型文件存在，开始加载...")
+            yolo_seg_model = YOLO(seg_model_path)
+
+            # Day 20: TensorRT 引擎不需要 .to() 和 .fuse()
+            from model_utils import is_tensorrt_engine
+            if is_tensorrt_engine(seg_model_path):
+                print(f"[NAVIGATION] TensorRT 引擎已加载，跳过 .to() 和 .fuse()")
+            elif torch.cuda.is_available():
+                yolo_seg_model.to("cuda")
+                # Day 22 优化: 融合模型层以提升推理速度
+                try:
+                    yolo_seg_model.fuse()
+                    print(f"[NAVIGATION] 模型层融合完成")
+                except Exception as e:
+                    print(f"[NAVIGATION] 模型融合失败（非致命）: {e}")
+                print(f"[NAVIGATION] 盲道分割模型加载成功并放到GPU: {yolo_seg_model.device}")
+            else:
+                print("[NAVIGATION] CUDA不可用，模型仍在CPU")
+
+            # Day 22 优化: 使用配置的输入尺寸进行预热，并启用FP16
+            try:
+                imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
+                use_half = os.getenv("AIGLASS_YOLO_HALF", "1") == "1"
+                test_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
+                
+                # 预热推理，让CUDA编译kernel
+                for _ in range(3):  # 多次预热确保稳定
+                    results = yolo_seg_model.predict(
+                        test_img,
+                        device="cuda" if torch.cuda.is_available() else "cpu",
+                        verbose=False,
+                        imgsz=imgsz,
+                        half=use_half
+                    )
+                print(f"[NAVIGATION] 模型预热成功 (imgsz={imgsz}, half={use_half})")
+                print(f"[NAVIGATION] 支持的类别数: {len(yolo_seg_model.names) if hasattr(yolo_seg_model, 'names') else '未知'}")
+                if hasattr(yolo_seg_model, 'names'):
+                    print(f"[NAVIGATION] 模型类别: {yolo_seg_model.names}")
+            except Exception as e:
+                print(f"[NAVIGATION] 模型预热失败: {e}")
+        else:
+            print(f"[NAVIGATION] 错误：找不到模型文件: {seg_model_path}")
+            print(f"[NAVIGATION] 当前工作目录: {os.getcwd()}")
+            print(f"[NAVIGATION] 请检查文件路径是否正确")
+            
+        # 【修改开始】使用 ObstacleDetectorClient 替代直接的 YOLO
+        obstacle_model_path = os.getenv("OBSTACLE_MODEL", "model/yoloe-11l-seg.pt")
+        # Day 20: 优先使用 TensorRT 引擎
+        obstacle_model_path = get_best_model_path(obstacle_model_path)
+        print(f"[NAVIGATION] 尝试加载障碍物检测模型: {obstacle_model_path}")
+        
+        if os.path.exists(obstacle_model_path):
+            print(f"[NAVIGATION] 障碍物检测模型文件存在，开始加载...")
+            try:
+                # 使用 ObstacleDetectorClient 封装的 YOLO-E
+                obstacle_detector = ObstacleDetectorClient(model_path=obstacle_model_path)
+                print(f"[NAVIGATION] ========== YOLO-E 障碍物检测器加载成功 ==========")
+                
+                # 检查模型是否成功加载
+                if hasattr(obstacle_detector, 'model') and obstacle_detector.model is not None:
+                    print(f"[NAVIGATION] YOLO-E 模型已初始化")
+                    # Day 20: TensorRT 引擎没有 .parameters()，跳过设备检查
+                    if not is_tensorrt_engine(obstacle_model_path):
+                        try:
+                            print(f"[NAVIGATION] 模型设备: {next(obstacle_detector.model.parameters()).device}")
+                        except StopIteration:
+                            pass
+                else:
+                    print(f"[NAVIGATION] 警告：YOLO-E 模型初始化异常")
+                
+                # 检查白名单是否成功加载
+                if hasattr(obstacle_detector, 'WHITELIST_CLASSES'):
+                    print(f"[NAVIGATION] 白名单类别数: {len(obstacle_detector.WHITELIST_CLASSES)}")
+                    print(f"[NAVIGATION] 白名单前10个类别: {', '.join(obstacle_detector.WHITELIST_CLASSES[:10])}")
+                else:
+                    print(f"[NAVIGATION] 警告：白名单类别未定义")
+                
+                # 检查文本特征是否成功预计算
+                if hasattr(obstacle_detector, 'whitelist_embeddings') and obstacle_detector.whitelist_embeddings is not None:
+                    print(f"[NAVIGATION] YOLO-E 文本特征已预计算")
+                    print(f"[NAVIGATION] 文本特征张量形状: {obstacle_detector.whitelist_embeddings.shape if hasattr(obstacle_detector.whitelist_embeddings, 'shape') else '未知'}")
+                else:
+                    print(f"[NAVIGATION] 警告：YOLO-E 文本特征未预计算")
+                
+                # 测试障碍物检测功能
+                print(f"[NAVIGATION] 开始测试 YOLO-E 检测功能...")
+                try:
+                    test_img = np.zeros((640, 640, 3), dtype=np.uint8)
+                    # 在测试图像中画一个白色矩形，模拟一个物体
+                    cv2.rectangle(test_img, (200, 200), (400, 400), (255, 255, 255), -1)
+                    
+                    # 测试检测（不提供 path_mask）
+                    test_results = obstacle_detector.detect(test_img)
+                    print(f"[NAVIGATION] YOLO-E 检测测试成功!")
+                    print(f"[NAVIGATION] 测试检测结果数: {len(test_results)}")
+                    
+                    if len(test_results) > 0:
+                        print(f"[NAVIGATION] 测试检测到的物体:")
+                        for i, obj in enumerate(test_results):
+                            print(f"  - 物体 {i+1}: {obj.get('name', 'unknown')}, "
+                                  f"面积比例: {obj.get('area_ratio', 0):.3f}, "
+                                  f"位置: ({obj.get('center_x', 0):.0f}, {obj.get('center_y', 0):.0f})")
+                except Exception as e:
+                    print(f"[NAVIGATION] YOLO-E 检测测试失败: {e}")
+                    import traceback
+                    traceback.print_exc()
+                
+                print(f"[NAVIGATION] ========== YOLO-E 障碍物检测器加载完成 ==========")
+                
+            except Exception as e:
+                print(f"[NAVIGATION] 障碍物检测器加载失败: {e}")
+                import traceback
+                traceback.print_exc()
+                obstacle_detector = None
+        else:
+            print(f"[NAVIGATION] 警告：找不到障碍物检测模型文件: {obstacle_model_path}")
+        
+    except Exception as e:
+        print(f"[NAVIGATION] 模型加载失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+# 在程序启动时加载模型
+print("[NAVIGATION] 开始加载导航模型...")
+load_navigation_models()
+print(f"[NAVIGATION] 模型加载完成 - yolo_seg_model: {yolo_seg_model is not None}")
+
+# Day 14 优化: 在服务器启动时就预先创建导航器实例，避免客户端连接时延迟
+if yolo_seg_model is not None and blind_path_navigator is None:
+    print("[NAVIGATION] 预初始化盲道导航器...")
+    blind_path_navigator = BlindPathNavigator(yolo_seg_model, obstacle_detector)
+    print("[NAVIGATION] 盲道导航器已预初始化")
+
+if yolo_seg_model is not None and cross_street_navigator is None:
+    print("[CROSS_STREET] 预初始化过马路导航器...")
+    cross_street_navigator = CrossStreetNavigator(
+        seg_model=yolo_seg_model,
+        coco_model=None,
+        obs_model=None
+    )
+    print("[CROSS_STREET] 过马路导航器已预初始化")
+
+if orchestrator is None and blind_path_navigator is not None and cross_street_navigator is not None:
+    print("[NAV MASTER] 预初始化统领状态机...")
+    orchestrator = NavigationMaster(blind_path_navigator, cross_street_navigator)
+    print("[NAV MASTER] 统领状态机已预初始化")
+
+# 【新增】启动同步录制
+print("[RECORDER] 启动同步录制系统...")
+sync_recorder.start_recording()
+print("[RECORDER] 录制系统已启动，将自动保存视频和音频")
+
+# 【新增】注册退出处理器，确保Ctrl+C时保存录制文件
+def cleanup_on_exit():
+    """程序退出时的清理工作"""
+    print("\n[SYSTEM] 正在关闭录制器...")
+    try:
+        sync_recorder.stop_recording()
+        print("[SYSTEM] 录制文件已保存")
+    except Exception as e:
+        print(f"[SYSTEM] 关闭录制器时出错: {e}")
+
+def signal_handler(sig, frame):
+    """处理Ctrl+C信号"""
+    print("\n[SYSTEM] 收到中断信号，正在安全退出...")
+    cleanup_on_exit()
+    # Day 13: 使用 os._exit() 强制退出，避免 asyncio 事件循环干扰
+    import os
+    os._exit(0)
+
+# 注册信号处理器
+signal.signal(signal.SIGINT, signal_handler)  # Ctrl+C
+signal.signal(signal.SIGTERM, signal_handler)  # 终止信号
+atexit.register(cleanup_on_exit)  # 正常退出时也调用
+
+print("[RECORDER] 已注册退出处理器 - Ctrl+C时会自动保存录制文件")
+
+
+
+# 【新增】预加载红绿灯检测模型（避免进入WAIT_TRAFFIC_LIGHT状态时卡顿）
+try:
+    import trafficlight_detection
+    print("[TRAFFIC_LIGHT] 开始预加载红绿灯检测模型...")
+    if trafficlight_detection.init_model():
+        print("[TRAFFIC_LIGHT] 红绿灯检测模型预加载成功")
+        # 执行一次测试推理，完全预热模型
+        try:
+            test_img = np.zeros((640, 640, 3), dtype=np.uint8)
+            _ = trafficlight_detection.process_single_frame(test_img)
+            print("[TRAFFIC_LIGHT] 模型预热完成")
+        except Exception as e:
+            print(f"[TRAFFIC_LIGHT] 模型预热失败: {e}")
+    else:
+        print("[TRAFFIC_LIGHT] 红绿灯检测模型预加载失败")
+except Exception as e:
+    print(f"[TRAFFIC_LIGHT] 红绿灯模型预加载出错: {e}")
+
+# ============== 关键：系统级"硬重置"总闸 =================
+interrupt_lock = asyncio.Lock()
+
+# ============== YOLO媒体线程管理 =================
+yolomedia_thread: Optional[threading.Thread] = None
+yolomedia_stop_event = threading.Event()
+yolomedia_running = False
+yolomedia_sending_frames = False  # 新增：标记YOLO是否已经开始发送处理后的帧
+
+# ============== 红绿灯检测跳帧机制 =================
+_traffic_light_task = None
+_traffic_light_result_jpeg = None
+_traffic_light_pending_frame = None
+
+# 物品名称到YOLO类别的映射
+ITEM_TO_CLASS_MAP = {
+    "红牛": "Red_Bull",
+    "AD钙奶": "AD_milk",
+    "ad钙奶": "AD_milk",
+    "钙奶": "AD_milk",
+}
+
+async def ui_broadcast_raw(msg: str):
+    dead = []
+    for k, ws in list(ui_clients.items()):
+        try:
+            await ws.send_text(msg)
+        except Exception:
+            dead.append(k)
+    for k in dead:
+        ui_clients.pop(k, None)
+
+
+async def ui_broadcast_partial(text: str):
+    global current_partial
+    current_partial = text
+    await ui_broadcast_raw("PARTIAL:" + text)
+
+async def ui_broadcast_final(text: str):
+    global current_partial, recent_finals
+    current_partial = ""
+    recent_finals.append(text)
+    if len(recent_finals) > RECENT_MAX:
+        recent_finals = recent_finals[-RECENT_MAX:]
+    await ui_broadcast_raw("FINAL:" + text)
+    print(f"[ASR/AI FINAL] {text}", flush=True)
+
+async def full_system_reset(reason: str = ""):
+    """
+    回到刚启动后的状态：
+    1) 停播 + 取消AI任务 + 切断所有/stream.wav（hard_reset_audio）
+    2) 停止 ASR 实时识别流（关键）
+    3) 清 UI 状态
+    4) 清最近相机帧（避免把旧帧又拼进下一轮）
+    5) 告知 ESP32：RESET（可选）
+    """
+    # 1) 音频&AI
+    await hard_reset_audio(reason or "full_system_reset")
+
+    # 2) ASR
+    await stop_current_recognition()
+
+    # 3) UI
+    global current_partial, recent_finals
+    current_partial = ""
+    recent_finals = []
+
+    # 4) 相机帧
+    try:
+        last_frames.clear()
+    except Exception:
+        pass
+
+    # 5) 通知 ESP32
+    try:
+        if esp32_audio_ws and (esp32_audio_ws.client_state == WebSocketState.CONNECTED):
+            await esp32_audio_ws.send_text("RESET")
+    except Exception:
+        pass
+
+    print("[SYSTEM] full reset done.", flush=True)
+
+# ========= 启动/停止 YOLO 媒体处理 =========
+def start_yolomedia_with_target(target_name: str):
+    """启动yolomedia线程，搜索指定物品"""
+    global yolomedia_thread, yolomedia_stop_event, yolomedia_running, yolomedia_sending_frames
+    
+    # 如果已经在运行，先停止
+    if yolomedia_running:
+        stop_yolomedia()
+    
+    # 查找对应的YOLO类别
+    yolo_class = ITEM_TO_CLASS_MAP.get(target_name, target_name)
+    print(f"[YOLOMEDIA] Starting with target: {target_name} -> YOLO class: {yolo_class}", flush=True)
+    print(f"[YOLOMEDIA] Available mappings: {ITEM_TO_CLASS_MAP}", flush=True)  # 添加这行调试
+    
+    yolomedia_stop_event.clear()
+    yolomedia_running = True
+    yolomedia_sending_frames = False  # 重置发送帧状态
+    
+    def _run():
+        try:
+            # 传递目标类别名和停止事件
+            yolomedia.main(headless=True, prompt_name=yolo_class, stop_event=yolomedia_stop_event)
+        except Exception as e:
+            print(f"[YOLOMEDIA] worker stopped: {e}", flush=True)
+        finally:
+            global yolomedia_running, yolomedia_sending_frames
+            yolomedia_running = False
+            yolomedia_sending_frames = False
+    
+    yolomedia_thread = threading.Thread(target=_run, daemon=True)
+    yolomedia_thread.start()
+    print(f"[YOLOMEDIA] background worker started for: {yolo_class}（正在初始化，暂时显示原始画面）", flush=True)
+
+def stop_yolomedia():
+    """停止yolomedia线程"""
+    global yolomedia_thread, yolomedia_stop_event, yolomedia_running, yolomedia_sending_frames
+    
+    if yolomedia_running:
+        print("[YOLOMEDIA] Stopping worker...", flush=True)
+        yolomedia_stop_event.set()
+        
+        # 等待线程结束（最多等5秒）
+        if yolomedia_thread and yolomedia_thread.is_alive():
+            yolomedia_thread.join(timeout=5.0)
+        
+        yolomedia_running = False
+        yolomedia_sending_frames = False
+        
+        # 【新增】如果orchestrator在找物品模式，结束时不自动恢复（由命令控制）
+        # 只清理标志位即可
+        print("[YOLOMEDIA] Worker stopped, 等待状态切换.", flush=True)
+
+# ========= 自定义的 start_ai_with_text，支持识别特殊命令 =========
+async def start_ai_with_text_custom(user_text: str):
+    """扩展版的AI启动函数，支持识别特殊命令"""
+    global navigation_active, blind_path_navigator, cross_street_active, cross_street_navigator, orchestrator
+    
+    # 【修改】在导航模式和红绿灯检测模式下，只有特定词才进入omni对话
+    if orchestrator:
+        current_state = orchestrator.get_state()
+        # 如果在导航模式或红绿灯检测模式（非CHAT模式）
+        if current_state not in ["CHAT", "IDLE"]:
+            # 检查是否是允许的对话触发词
+            allowed_keywords = ["帮我看", "帮我看下", "帮我找", "找一下", "看看", "识别一下"]
+            is_allowed_query = any(keyword in user_text for keyword in allowed_keywords)
+            
+            # 检查是否是导航控制命令
+            nav_control_keywords = ["开始过马路", "过马路结束", "开始导航", "盲道导航", "停止导航", "结束导航", 
+                                   "检测红绿灯", "看红绿灯", "停止检测", "停止红绿灯"]
+            is_nav_control = any(keyword in user_text for keyword in nav_control_keywords)
+            
+            # 如果既不是允许的查询，也不是导航控制命令，则丢弃
+            if not is_allowed_query and not is_nav_control:
+                mode_name = "红绿灯检测" if current_state == "TRAFFIC_LIGHT_DETECTION" else "导航"
+                print(f"[{mode_name}模式] 丢弃非对话语音: {user_text}")
+                return  # 直接丢弃，不进入omni
+    
+    # 【修改】检查是否是过马路相关命令 - 使用orchestrator控制
+    if "开始过马路" in user_text or "帮我过马路" in user_text:
+        # 【新增】如果正在找物品，先停止
+        if yolomedia_running:
+            stop_yolomedia()
+            print("[ITEM_SEARCH] 从找物品模式切换到过马路")
+        
+        if orchestrator:
+            orchestrator.start_crossing()
+            print(f"[CROSS_STREET] 过马路模式已启动，状态: {orchestrator.get_state()}")
+            # 播放启动语音并广播到UI
+            play_voice_text("过马路模式已启动。")
+            await ui_broadcast_final("[系统] 过马路模式已启动")
+        else:
+            print("[CROSS_STREET] 警告：导航统领器未初始化！")
+            play_voice_text("启动过马路模式失败，请稍后重试。")
+            await ui_broadcast_final("[系统] 导航系统未就绪")
+        return
+    
+    if "过马路结束" in user_text or "结束过马路" in user_text:
+        if orchestrator:
+            orchestrator.stop_navigation()
+            print(f"[CROSS_STREET] 导航已停止，状态: {orchestrator.get_state()}")
+            # 播放停止语音并广播到UI
+            play_voice_text("已停止导航。")
+            await ui_broadcast_final("[系统] 过马路模式已停止")
+        else:
+            await ui_broadcast_final("[系统] 导航系统未运行")
+        return
+    
+    # 【修改】检查是否是红绿灯检测命令 - 实现与盲道导航互斥
+    if "检测红绿灯" in user_text or "看红绿灯" in user_text:
+        try:
+            import trafficlight_detection
+            
+            # 切换orchestrator到红绿灯检测模式（暂停盲道导航）
+            if orchestrator:
+                orchestrator.start_traffic_light_detection()
+                print(f"[TRAFFIC] 切换到红绿灯检测模式，状态: {orchestrator.get_state()}")
+            
+            # 【改进】使用主线程模式而不是独立线程，避免掉帧
+            success = trafficlight_detection.init_model()  # 只初始化模型，不启动线程
+            trafficlight_detection.reset_detection_state()  # 重置状态
+            
+            if success:
+                await ui_broadcast_final("[系统] 红绿灯检测已启动")
+            else:
+                await ui_broadcast_final("[系统] 红绿灯模型加载失败")
+        except Exception as e:
+            print(f"[TRAFFIC] 启动红绿灯检测失败: {e}")
+            await ui_broadcast_final(f"[系统] 启动失败: {e}")
+        return
+    
+    if "停止检测" in user_text or "停止红绿灯" in user_text:
+        try:
+            # 恢复到对话模式
+            if orchestrator:
+                orchestrator.stop_navigation()  # 回到CHAT模式
+                print(f"[TRAFFIC] 红绿灯检测停止，恢复到{orchestrator.get_state()}模式")
+            
+            # 清除红绿灯检测缓存
+            global _traffic_light_result_jpeg
+            _traffic_light_result_jpeg = None
+            
+            await ui_broadcast_final("[系统] 红绿灯检测已停止")
+        except Exception as e:
+            print(f"[TRAFFIC] 停止红绿灯检测失败: {e}")
+            await ui_broadcast_final(f"[系统] 停止失败: {e}")
+        return
+    
+    # 【修改】检查是否是导航相关命令 - 使用orchestrator控制
+    if "开始导航" in user_text or "盲道导航" in user_text or "帮我导航" in user_text:
+        # 【新增】如果正在找物品，先停止
+        if yolomedia_running:
+            stop_yolomedia()
+            print("[ITEM_SEARCH] 从找物品模式切换到盲道导航")
+        
+        if orchestrator:
+            orchestrator.start_blind_path_navigation()
+            print(f"[NAVIGATION] 盲道导航已启动，状态: {orchestrator.get_state()}")
+            await ui_broadcast_final("[系统] 盲道导航已启动")
+        else:
+            print("[NAVIGATION] 警告：导航统领器未初始化！")
+            await ui_broadcast_final("[系统] 导航系统未就绪")
+        return
+    
+    if "停止导航" in user_text or "结束导航" in user_text:
+        if orchestrator:
+            orchestrator.stop_navigation()
+            print(f"[NAVIGATION] 导航已停止，状态: {orchestrator.get_state()}")
+            await ui_broadcast_final("[系统] 盲道导航已停止")
+        else:
+            await ui_broadcast_final("[系统] 导航系统未运行")
+        return
+
+    nav_cmd_keywords = ["开始过马路", "过马路结束", "开始导航", "盲道导航", "停止导航", "结束导航", "立即通过", "现在通过", "继续"]
+    if any(k in user_text for k in nav_cmd_keywords):
+        if orchestrator:
+            orchestrator.on_voice_command(user_text)
+            await ui_broadcast_final("[系统] 导航模式已更新")
+        else:
+            await ui_broadcast_final("[系统] 导航统领器未初始化")
+        return    
+
+    # 检查是否是"帮我找/识别一下xxx"的命令
+    # 扩展正则表达式，支持更多关键词
+    find_pattern = r"(?:^\s*帮我)?\s*找一下\s*(.+?)(?:。|！|？|$)"
+    match = re.search(find_pattern, user_text)
+        
+    if match:
+        # 提取中文物品名称
+        item_cn = match.group(1).strip()
+        if item_cn:
+            # 【新增】用本地映射 + Qwen 提取英文类名
+            label_en, src = extract_english_label(item_cn)
+            print(f"[COMMAND] Finder request: '{item_cn}' -> '{label_en}' (src={src})", flush=True)
+
+            # 【新增】切换到找物品模式（暂停导航）
+            if orchestrator:
+                orchestrator.start_item_search()
+                print(f"[ITEM_SEARCH] 已切换到找物品模式，状态: {orchestrator.get_state()}")
+            
+            # 【关键】把英文类名传给 yolomedia（它会在找不到类时自动切 YOLOE）
+            start_yolomedia_with_target(label_en)
+
+            # 给前端/语音来个确认反馈
+            try:
+                await ui_broadcast_final(f"[找物品] 正在寻找 {item_cn}...")
+            except Exception:
+                pass
+
+            return
+    
+    # 检查是否是"找到了"的命令
+    if "找到了" in user_text or "拿到了" in user_text:
+        print("[COMMAND] Found command detected", flush=True)
+        # 停止yolomedia
+        stop_yolomedia()
+        
+        # 【新增】停止找物品模式，恢复之前的导航状态
+        if orchestrator:
+            orchestrator.stop_item_search(restore_nav=True)
+            current_state = orchestrator.get_state()
+            print(f"[ITEM_SEARCH] 找物品结束，当前状态: {current_state}")
+            
+            # 根据恢复的状态给出反馈
+            if current_state in ["BLINDPATH_NAV", "SEEKING_CROSSWALK", "WAIT_TRAFFIC_LIGHT", "CROSSING", "SEEKING_NEXT_BLINDPATH"]:
+                await ui_broadcast_final("[找物品] 已找到物品，继续导航。")
+            else:
+                await ui_broadcast_final("[找物品] 已找到物品。")
+        else:
+            await ui_broadcast_final("[找物品] 已找到物品。")
+        
+        return
+    
+    # 【修改】omni对话开始时，切换到CHAT模式
+    global omni_conversation_active, omni_previous_nav_state
+    omni_conversation_active = True
+    
+    # 保存当前导航状态并切换到CHAT模式
+    if orchestrator:
+        current_state = orchestrator.get_state()
+        # 只有在导航模式下才需要保存和切换
+        if current_state not in ["CHAT", "IDLE"]:
+            omni_previous_nav_state = current_state
+            orchestrator.force_state("CHAT")
+            print(f"[OMNI] 对话开始，从{current_state}切换到CHAT模式")
+        else:
+            omni_previous_nav_state = None
+            print(f"[OMNI] 对话开始（当前已在{current_state}模式）")
+    
+    # 如果不是特殊命令，执行原有的AI对话逻辑
+    # 但如果yolomedia正在运行，暂时不处理普通对话
+    if yolomedia_running:
+        print("[AI] YOLO media is running, skipping normal AI response", flush=True)
+        return
+    
+    # 原有的AI对话逻辑
+    await start_ai_with_text(user_text)
+
+# ========= Omni 播放启动 =========
+async def start_ai_with_text(user_text: str):
+    """硬重置后，开启新的 AI 语音输出。"""
+    
+    # Day 13: 在 AI 处理开始前保存 WebSocket 引用
+    from audio_stream import set_tts_websocket
+    set_tts_websocket(esp32_audio_ws)
+    
+    async def _runner_new_pipeline():
+        """Day 21: 新管道 - GLM-4.5-Flash + EdgeTTS"""
+        txt_buf: List[str] = []
+        
+        try:
+            # 获取图片（如果有）
+            img_b64 = None
+            if last_frames:
+                try:
+                    _, jpeg_bytes = last_frames[-1]
+                    img_b64 = base64.b64encode(jpeg_bytes).decode("ascii")
+                except Exception:
+                    pass
+            
+            # 调用 GLM-4.5-Flash
+            print(f"[NEW AI] 调用 GLM: {user_text[:50]}...")
+            ai_response = await glm_chat(user_text, img_b64)
+            
+            if not ai_response:
+                print("[NEW AI] AI 无回复")
+                return
+            
+            txt_buf.append(ai_response)
+            print(f"[NEW AI] AI 回复: {ai_response}")
+            await ui_broadcast_partial("[AI] " + ai_response)
+            
+            # EdgeTTS 流式合成并发送
+            # 设置 VAD TTS 播放状态，避免将 TTS 回声误识别为用户语音
+            vad = get_server_vad()
+            vad.set_tts_playing(True)
+            
+            try:
+                async for audio_chunk in text_to_speech_pcm_stream(ai_response):
+                    if audio_chunk:
+                        await broadcast_pcm16_realtime(audio_chunk)
+            finally:
+                # TTS 播放结束，恢复 VAD 检测
+                vad.set_tts_playing(False)
+            
+            print("[NEW AI] 音频播放完成")
+            
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            err_msg = f"AI Error: {str(e)}"
+            print(f"[NEW AI] 错误: {err_msg}")
+            import traceback
+            traceback.print_exc()
+            
+            # 1. 广播错误到 UI
+            try:
+                await ui_broadcast_final(f"[系统] {err_msg}")
+            except Exception:
+                pass
+                
+            # 2. 发送错误到客户端日志
+            if esp32_audio_ws:
+                try:
+                    await esp32_audio_ws.send_text(f"ERR:{str(e)[:50]}")
+                except Exception:
+                    pass
+            
+            # 3. 语音播报错误 (可选，防止用户以为在思考)
+            try:
+                vad = get_server_vad()
+                vad.set_tts_playing(True)
+                async for audio_chunk in text_to_speech_pcm_stream("抱歉，我遇到了一些问题。"):
+                    if audio_chunk:
+                        await broadcast_pcm16_realtime(audio_chunk)
+                vad.set_tts_playing(False)
+            except Exception:
+                pass
+        finally:
+            global omni_conversation_active, omni_previous_nav_state
+            omni_conversation_active = False
+            
+            if orchestrator and omni_previous_nav_state:
+                orchestrator.force_state(omni_previous_nav_state)
+                print(f"[AI] 对话结束，恢复到{omni_previous_nav_state}模式")
+                omni_previous_nav_state = None
+            
+            from audio_stream import stream_clients
+            for sc in list(stream_clients):
+                if not sc.abort_event.is_set():
+                    try: sc.q.put_nowait(b"\x00"*BYTES_PER_20MS_16K)
+                    except Exception: pass
+                    try: sc.q.put_nowait(None)
+                    except Exception: pass
+            
+            final_text = ("".join(txt_buf)).strip() or "（空响应）"
+            await ui_broadcast_final("[AI] " + final_text)
+    
+    async def _runner_old_pipeline():
+        """旧管道 - Qwen-Omni (流式音频)"""
+        txt_buf: List[str] = []
+        rate_state = None
+
+        # 组装（图像+文本）
+        content_list = []
+        if last_frames:
+            try:
+                _, jpeg_bytes = last_frames[-1]
+                img_b64 = base64.b64encode(jpeg_bytes).decode("ascii")
+                content_list.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
+                })
+            except Exception:
+                pass
+        content_list.append({"type": "text", "text": user_text})
+
+        try:
+            async for piece in stream_chat(content_list, voice="Cherry", audio_format="wav"):
+                if piece.text_delta:
+                    txt_buf.append(piece.text_delta)
+                    try:
+                        await ui_broadcast_partial("[AI] " + "".join(txt_buf))
+                    except Exception:
+                        pass
+
+                if piece.audio_b64:
+                    try:
+                        pcm24 = base64.b64decode(piece.audio_b64)
+                    except Exception:
+                        pcm24 = b""
+                    if pcm24:
+                        pcm16k, rate_state = audioop.ratecv(pcm24, 2, 1, 24000, 16000, rate_state)
+                        pcm16k = audioop.mul(pcm16k, 2, 0.60)
+                        if pcm16k:
+                            await broadcast_pcm16_realtime(pcm16k)
+
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:
+            try:
+                await ui_broadcast_final(f"[AI] 发生错误：{e}")
+            except Exception:
+                pass
+        finally:
+            global omni_conversation_active, omni_previous_nav_state
+            omni_conversation_active = False
+            
+            if orchestrator and omni_previous_nav_state:
+                orchestrator.force_state(omni_previous_nav_state)
+                print(f"[OMNI] 对话结束，恢复到{omni_previous_nav_state}模式")
+                omni_previous_nav_state = None
+            else:
+                print(f"[OMNI] 对话结束（无需恢复导航状态）")
+            
+            from audio_stream import stream_clients
+            for sc in list(stream_clients):
+                if not sc.abort_event.is_set():
+                    try: sc.q.put_nowait(b"\x00"*BYTES_PER_20MS_16K)
+                    except Exception: pass
+                    try: sc.q.put_nowait(None)
+                    except Exception: pass
+
+            final_text = ("".join(txt_buf)).strip() or "（空响应）"
+            try:
+                await ui_broadcast_final("[AI] " + final_text)
+            except Exception:
+                pass
+
+    # 真正启动前先硬重置
+    await hard_reset_audio("start_ai_with_text")
+    loop = asyncio.get_running_loop()
+    from audio_stream import __dict__ as _as_dict
+    
+    # 根据配置选择管道
+    if USE_NEW_AI_PIPELINE:
+        task = loop.create_task(_runner_new_pipeline())
+    else:
+        task = loop.create_task(_runner_old_pipeline())
+    
+    _as_dict["current_ai_task"] = task
+
+# ---------- 页面 / 健康 ----------
+@app.get("/", response_class=HTMLResponse)
+def root():
+    with open(os.path.join("templates", "index.html"), "r", encoding="utf-8") as f:
+        return HTMLResponse(f.read())
+
+@app.get("/api/health", response_class=PlainTextResponse)
+def health():
+    return "OK"
+
+# 注册 /stream.wav
+register_stream_route(app)
+
+# ---------- WebSocket：WebUI 文本（ASR/AI 状态推送） ----------
+@app.websocket("/ws_ui")
+async def ws_ui(ws: WebSocket):
+    await ws.accept()
+    ui_clients[id(ws)] = ws
+    try:
+        init = {"partial": current_partial, "finals": recent_finals[-10:]}
+        await ws.send_text("INIT:" + json.dumps(init, ensure_ascii=False))
+        while True:
+            await asyncio.sleep(60)
+    except (WebSocketDisconnect, asyncio.CancelledError):
+        pass
+    finally:
+        ui_clients.pop(id(ws), None)
+
+
+# ---------- Day 21: 新版 AI 音频处理 (SenseVoice + GLM + EdgeTTS) ----------
+async def process_complete_audio_new_pipeline(audio_data: bytes, ws: WebSocket):
+    """
+    非流式音频处理：
+    1. SenseVoice 识别完整音频
+    2. GLM 生成回复
+    3. EdgeTTS 流式合成并发送
+    """
+    try:
+        # 1. 语音识别
+        print(f"[NEW AI] 开始识别音频: {len(audio_data)} bytes")
+        user_text = await sensevoice_recognize(audio_data)
+        
+        if not user_text or len(user_text.strip()) < 2:
+            print("[NEW AI] 未识别到有效语音")
+            return
+        
+        print(f"[NEW AI] 用户说: {user_text}")
+        await ui_broadcast_partial(f"[用户] {user_text}")
+        
+        # 检查是否是导航命令
+        # 使用现有的 start_ai_with_text_custom 来处理特殊命令
+        # 这样可以保持导航功能不变
+        
+        # 2. 调用 GLM 生成回复
+        ai_response = await glm_chat(user_text)
+        
+        if not ai_response:
+            print("[NEW AI] AI 无回复")
+            return
+        
+        print(f"[NEW AI] AI 回复: {ai_response}")
+        await ui_broadcast_final(f"[AI] {ai_response}")
+        
+        # 3. EdgeTTS 流式合成并发送
+        # 设置 VAD TTS 播放状态
+        vad = get_server_vad()
+        vad.set_tts_playing(True)
+        
+        try:
+            async for audio_chunk in text_to_speech_pcm_stream(ai_response):
+                if audio_chunk:
+                    await broadcast_pcm16_realtime(audio_chunk)
+        finally:
+            vad.set_tts_playing(False)
+        
+        print("[NEW AI] 音频合成并发送完成")
+        
+    except Exception as e:
+        print(f"[NEW AI] 处理失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+# ---------- WebSocket：设备音频入口（ASR 上行） ----------
+@app.websocket("/ws_audio")
+async def ws_audio(ws: WebSocket):
+    global esp32_audio_ws
+    esp32_audio_ws = ws
+    # Day 20: 连接建立时立即保存 TTS WebSocket 引用
+    # 避免因引用丢失导致 TTS 音频无法发送
+    from audio_stream import set_tts_websocket
+    set_tts_websocket(ws)
+    await ws.accept()
+    print("\n[AUDIO] client connected (TTS WebSocket reference saved)")
+    recognition = None
+    streaming = False
+    last_ts = time.monotonic()
+    keepalive_task: Optional[asyncio.Task] = None
+    audio_buffer = bytearray()  # Day 21: 用于新管道收集音频
+
+    async def stop_rec(send_notice: Optional[str] = None):
+        nonlocal recognition, streaming, keepalive_task
+        if keepalive_task and not keepalive_task.done():
+            keepalive_task.cancel()
+            try: await keepalive_task
+            except Exception: pass
+        keepalive_task = None
+        if recognition:
+            try: recognition.stop()
+            except Exception: pass
+            recognition = None
+        await set_current_recognition(None)
+        streaming = False
+        if send_notice:
+            try: await ws.send_text(send_notice)
+            except Exception: pass
+
+    async def on_sdk_error(_msg: str):
+        await stop_rec(send_notice="RESTART")
+
+    async def keepalive_loop():
+        nonlocal last_ts, recognition, streaming
+        try:
+            while streaming and recognition is not None:
+                idle = time.monotonic() - last_ts
+                if idle > 0.35:
+                    try:
+                        for _ in range(30):  # ~600ms 静音
+                            recognition.send_audio_frame(SILENCE_CHUNK)
+                        last_ts = time.monotonic()
+                    except Exception:
+                        await on_sdk_error("keepalive send failed")
+                        return
+                await asyncio.sleep(0.10)
+        except asyncio.CancelledError:
+            return
+
+    try:
+        while True:
+            if WebSocketState and ws.client_state != WebSocketState.CONNECTED:
+                break
+            try:
+                msg = await ws.receive()
+            except WebSocketDisconnect:
+                break
+            except RuntimeError as e:
+                if "Cannot call \"receive\"" in str(e):
+                    break
+                raise
+
+            if "text" in msg and msg["text"] is not None:
+                raw = (msg["text"] or "").strip()
+                cmd = raw.upper()
+
+                if cmd == "START":
+                    print("[AUDIO] START received")
+                    await stop_rec()
+                    
+                    # Day 13: 刷新 TTS 缓存
+                    try:
+                        from audio_stream import flush_tts_buffer
+                        flushed = await flush_tts_buffer(ws)
+                        if flushed > 0:
+                            print(f"[AUDIO] Flushed {flushed} bytes of cached TTS audio")
+                    except Exception as e:
+                        print(f"[AUDIO] Error flushing TTS buffer: {e}")
+                    
+                    if USE_NEW_AI_PIPELINE:
+                        # Day 21: 新管道 - 服务器端 VAD + 非流式 SenseVoice
+                        reset_server_vad()  # 重置 VAD 状态
+                        
+                        # 清除对话历史（新会话开始）
+                        from glm_client import clear_conversation_history
+                        clear_conversation_history()
+                        
+                        streaming = True
+                        await ui_broadcast_partial("（已开始接收音频…）")
+                        await ws.send_text("OK:STARTED")
+                        print("[NEW ASR] 新管道已启动，服务器端 VAD 监听中")
+                    else:
+                        # 旧管道 - 流式 DashScope
+                        loop = asyncio.get_running_loop()
+                        def post(coro):
+                            asyncio.run_coroutine_threadsafe(coro, loop)
+
+                        cb = ASRCallback(
+                            on_sdk_error=lambda s: post(on_sdk_error(s)),
+                            post=post,
+                            ui_broadcast_partial=ui_broadcast_partial,
+                            ui_broadcast_final=ui_broadcast_final,
+                            is_playing_now_fn=is_playing_now,
+                            start_ai_with_text_fn=start_ai_with_text_custom,
+                            full_system_reset_fn=full_system_reset,
+                            interrupt_lock=interrupt_lock,
+                        )
+
+                        recognition = dash_audio.asr.Recognition(
+                            api_key=API_KEY, model=MODEL, format=AUDIO_FMT,
+                            sample_rate=SAMPLE_RATE, callback=cb
+                        )
+                        recognition.start()
+                        await set_current_recognition(recognition)
+                        streaming = True
+                        last_ts = time.monotonic()
+                        keepalive_task = asyncio.create_task(keepalive_loop())
+                        await ui_broadcast_partial("（已开始接收音频…）")
+                        await ws.send_text("OK:STARTED")
+
+                elif cmd == "STOP":
+                    if recognition:
+                        for _ in range(15):  # ~300ms 静音
+                            try: recognition.send_audio_frame(SILENCE_CHUNK)
+                            except Exception: break
+                    await stop_rec(send_notice="OK:STOPPED")
+
+                elif cmd == "RECOGNIZE" and USE_NEW_AI_PIPELINE:
+                    # Day 21: 客户端 VAD 检测到语音结束，请求识别
+                    if audio_buffer and len(audio_buffer) > 3200:  # 至少 100ms 音频
+                        print(f"[NEW ASR] 收到 RECOGNIZE 命令，音频长度: {len(audio_buffer)} bytes")
+                        await ui_broadcast_partial("（正在识别…）")
+                        
+                        # 非流式识别
+                        user_text = await sensevoice_recognize(bytes(audio_buffer))
+                        audio_buffer.clear()
+                        
+                        if user_text and len(user_text.strip()) >= 2:
+                            print(f"[NEW ASR] 识别结果: {user_text}")
+                            await ui_broadcast_final(f"[用户] {user_text}")
+                            
+                            # 调用 AI 回复
+                            async with interrupt_lock:
+                                await start_ai_with_text_custom(user_text)
+                            await ws.send_text("OK:RECOGNIZED")
+                        else:
+                            print("[NEW ASR] 未识别到有效语音")
+                            await ws.send_text("OK:EMPTY")
+                    else:
+                        print("[NEW ASR] 音频太短，忽略")
+                        await ws.send_text("OK:TOO_SHORT")
+
+                elif raw.startswith("PROMPT:"):
+                    text = raw[len("PROMPT:"):].strip()
+                    if text:
+                        async with interrupt_lock:
+                            await start_ai_with_text_custom(text)
+                        await ws.send_text("OK:PROMPT_ACCEPTED")
+                    else:
+                        await ws.send_text("ERR:EMPTY_PROMPT")
+
+            elif "bytes" in msg and msg["bytes"] is not None:
+                audio_bytes = msg["bytes"]
+                if not hasattr(ws_audio, '_audio_recv_count'):
+                    ws_audio._audio_recv_count = 0
+                    ws_audio._audio_total_bytes = 0
+                ws_audio._audio_recv_count += 1
+                ws_audio._audio_total_bytes += len(audio_bytes)
+                
+                if ws_audio._audio_recv_count % 500 == 0:
+                    print(f"[AUDIO] 📥 Received: {ws_audio._audio_recv_count} packets, {ws_audio._audio_total_bytes} bytes total")
+                
+                if USE_NEW_AI_PIPELINE:
+                    # Day 21 改进: 使用服务器端 VAD 检测语音
+                    if streaming:
+                        vad = get_server_vad()
+                        vad_result = vad.process(audio_bytes)
+                        
+                        if vad_result['speech_started']:
+                            await ui_broadcast_partial("（正在录音…）")
+                        
+                        if vad_result['speech_ended'] and vad_result['speech_audio']:
+                            # VAD 检测到语音结束，自动触发识别
+                            speech_audio = vad_result['speech_audio']
+                            print(f"[VAD] 自动触发识别，音频长度: {len(speech_audio)} bytes")
+                            await ui_broadcast_partial("（正在识别…）")
+                            
+                            # 非流式识别
+                            user_text = await sensevoice_recognize(speech_audio)
+                            
+                            if user_text and len(user_text.strip()) >= 2:
+                                print(f"[NEW ASR] 识别结果: {user_text}")
+                                await ui_broadcast_final(f"[用户] {user_text}")
+                                
+                                # 调用 AI 回复
+                                async with interrupt_lock:
+                                    await start_ai_with_text_custom(user_text)
+                                await ws.send_text("OK:RECOGNIZED")
+                            else:
+                                print("[NEW ASR] 未识别到有效语音")
+                                await ws.send_text("OK:EMPTY")
+                else:
+                    # 旧管道：实时发送到 DashScope
+                    if streaming and recognition:
+                        try:
+                            recognition.send_audio_frame(audio_bytes)
+                            last_ts = time.monotonic()
+                        except Exception:
+                            await on_sdk_error("send_audio_frame failed")
+
+    except Exception as e:
+        print(f"\n[WS ERROR] {e}")
+    finally:
+        await stop_rec()
+        try:
+            if WebSocketState is None or ws.client_state == WebSocketState.CONNECTED:
+                await ws.close(code=1000)
+        except Exception:
+            pass
+        if esp32_audio_ws is ws:
+            esp32_audio_ws = None
+        print("[WS] connection closed")
+
+# ---------- WebSocket：设备相机入口（JPEG 二进制） ----------
+@app.websocket("/ws/camera")
+async def ws_camera_esp(ws: WebSocket):
+    global esp32_camera_ws, blind_path_navigator, cross_street_navigator, cross_street_active, navigation_active, orchestrator
+    if esp32_camera_ws is not None:
+        await ws.close(code=1013)
+        return
+    esp32_camera_ws = ws
+    await ws.accept()
+    print("[CAMERA] 设备已连接")
+    
+    # 【新增】初始化盲道导航器
+    if blind_path_navigator is None and yolo_seg_model is not None:
+        blind_path_navigator = BlindPathNavigator(yolo_seg_model, obstacle_detector)
+        print("[NAVIGATION] 盲道导航器已初始化")
+    else:
+        if blind_path_navigator is not None:
+            print("[NAVIGATION] 导航器已存在，无需重新初始化")
+        elif yolo_seg_model is None:
+            print("[NAVIGATION] 警告：YOLO模型未加载，无法初始化导航器")
+    
+    # 【新增】初始化过马路导航器
+    if cross_street_navigator is None:
+        if yolo_seg_model:
+            cross_street_navigator = CrossStreetNavigator(
+                seg_model=yolo_seg_model,
+                coco_model=None,  # 不使用交通灯检测
+                obs_model=None    # 暂时也不用障碍物检测，让它更快
+            )
+            print("[CROSS_STREET] 过马路导航器已初始化（简化版 - 仅斑马线检测）")
+        else:
+            print("[CROSS_STREET] 错误：缺少分割模型，无法初始化过马路导航器")
+            
+            if not yolo_seg_model:
+                print("[CROSS_STREET] - 缺少分割模型 (yolo_seg_model)")
+            if not obstacle_detector:
+                print("[CROSS_STREET] - 缺少障碍物检测器 (obstacle_detector)")
+    
+    if orchestrator is None and blind_path_navigator is not None and cross_street_navigator is not None:
+        orchestrator = NavigationMaster(blind_path_navigator, cross_street_navigator)
+        print("[NAV MASTER] 统领状态机已初始化（托管模式）")
+    frame_counter = 0  # 添加帧计数器
+    
+    # Day 20: 性能诊断变量
+    _perf_last_frame_time = None
+    _perf_frame_intervals = []
+    _perf_broadcast_times = []
+    _perf_nav_times = []
+    
+    try:
+        while True:
+            msg = await ws.receive()
+            if "bytes" in msg and msg["bytes"] is not None:
+                data = msg["bytes"]
+                frame_counter += 1
+                
+                # Day 20: 记录帧接收时间
+                _perf_frame_time = time.perf_counter()
+                if _perf_last_frame_time is not None:
+                    _perf_frame_intervals.append(_perf_frame_time - _perf_last_frame_time)
+                _perf_last_frame_time = _perf_frame_time
+                
+                # 【新增】录制原始帧
+                try:
+                    sync_recorder.record_frame(data)
+                except Exception as e:
+                    if frame_counter % 100 == 0:  # 避免日志刷屏
+                        print(f"[RECORDER] 录制帧失败: {e}")
+                
+                try:
+                    last_frames.append((time.time(), data))
+                except Exception:
+                    pass
+                
+                # 推送到bridge_io（供yolomedia使用）
+                bridge_io.push_raw_jpeg(data)
+                
+                # 【调试】检查导航条件
+                if frame_counter % 60 == 0:  # 每60帧输出一次（约5-6秒）
+                    state_dbg = orchestrator.get_state() if orchestrator else "N/A"
+                    
+                    # Day 20: 性能诊断汇总
+                    if _perf_frame_intervals:
+                        avg_interval = sum(_perf_frame_intervals) / len(_perf_frame_intervals) * 1000
+                        fps = 1000 / avg_interval if avg_interval > 0 else 0
+                        _perf_frame_intervals.clear()
+                    else:
+                        avg_interval = 0
+                        fps = 0
+                    
+                    avg_broadcast = sum(_perf_broadcast_times) / len(_perf_broadcast_times) if _perf_broadcast_times else 0
+                    avg_nav = sum(_perf_nav_times) / len(_perf_nav_times) if _perf_nav_times else 0
+                    _perf_broadcast_times.clear()
+                    _perf_nav_times.clear()
+                    
+                    print(f"[PERF] 帧:{frame_counter} | 客户端FPS:{fps:.1f} | 帧间隔:{avg_interval:.1f}ms | "
+                          f"广播:{avg_broadcast:.1f}ms | 导航:{avg_nav:.1f}ms | state={state_dbg}")
+                
+                # 【Day 19 优化】延迟解码：只在需要处理时才解码，避免白白浪费 CPU
+                # 先检查是否需要导航处理
+                needs_processing = (orchestrator and not yolomedia_running)
+                bgr = None  # 延迟初始化
+                
+                if needs_processing:
+                    current_state = orchestrator.get_state()
+                    
+                    # 【Day 19】ITEM_SEARCH/CHAT/IDLE 模式无需处理，直接转发原始 JPEG
+                    if current_state in ("ITEM_SEARCH", "CHAT", "IDLE"):
+                        if not yolomedia_sending_frames and camera_viewers:
+                            await _broadcast_to_viewers(data)  # 零拷贝直传
+                        continue
+                    
+                    # 需要导航处理时才解码
+                    try:
+                        bgr = turbo_decode(data)
+                        if bgr is None or bgr.size == 0:
+                            if frame_counter % 30 == 0:
+                                print(f"[JPEG] 解码失败：数据长度={len(data)}")
+                            bgr = None
+                    except Exception as e:
+                        if frame_counter % 30 == 0:
+                            print(f"[JPEG] 解码异常: {e}")
+                        bgr = None
+
+                # 【托管】优先交给统领状态机（寻物未占用画面时）
+                if orchestrator and not yolomedia_running and bgr is not None:
+                    out_img = bgr  # 默认输出原图
+                    try:
+                        # 【新增】检查是否在红绿灯检测模式
+                        if current_state == "TRAFFIC_LIGHT_DETECTION":
+                            # 红绿灯检测模式：使用跳帧机制避免阻塞
+                            import trafficlight_detection
+                            global _traffic_light_task, _traffic_light_result_jpeg, _traffic_light_pending_frame
+                            
+                            # 更新待处理帧
+                            _traffic_light_pending_frame = bgr
+                            
+                            # 如果没有正在运行的任务，启动一个
+                            if _traffic_light_task is None or _traffic_light_task.done():
+                                if _traffic_light_task is not None and _traffic_light_task.done():
+                                    try:
+                                        result = _traffic_light_task.result()
+                                        if result and result.get('vis_image') is not None:
+                                            enc = turbo_encode(result['vis_image'], quality=80)
+                                            if enc:
+                                                _traffic_light_result_jpeg = enc
+                                    except Exception:
+                                        pass
+                                
+                                # 启动新任务
+                                if _traffic_light_pending_frame is not None:
+                                    frame = _traffic_light_pending_frame
+                                    _traffic_light_pending_frame = None
+                                    loop = asyncio.get_event_loop()
+                                    _traffic_light_task = loop.run_in_executor(
+                                        frame_processing_executor,
+                                        trafficlight_detection.process_single_frame,
+                                        frame,
+                                        None
+                                    )
+                            
+                            # 广播红绿灯检测结果（独立于盲道导航缓存）
+                            if camera_viewers:
+                                if _traffic_light_result_jpeg is not None:
+                                    await _broadcast_to_viewers(_traffic_light_result_jpeg)
+                                else:
+                                    await _broadcast_to_viewers(data)  # 首帧回退
+                            continue  # 跳过盲道导航的广播逻辑
+                        else:
+                            # 【Day 15 跳帧机制】非阻塞式帧处理
+                            # 不等待处理完成，使用最后一次成功的结果
+                            global _nav_processing_task, _nav_last_result_image, _nav_last_result_jpeg, _nav_pending_frame
+                            
+                            # 更新待处理帧（始终是最新的）
+                            _nav_pending_frame = bgr
+                            
+                            # 如果没有正在运行的任务，启动一个
+                            if _nav_processing_task is None or _nav_processing_task.done():
+                                # 检查上一个任务的结果
+                                if _nav_processing_task is not None and _nav_processing_task.done():
+                                    # Day 20: 记录处理耗时
+                                    global _nav_task_start_time
+                                    if _nav_task_start_time is not None:
+                                        nav_elapsed = (time.perf_counter() - _nav_task_start_time) * 1000
+                                        _perf_nav_times.append(nav_elapsed)
+                                        _nav_task_start_time = None
+                                    
+                                    try:
+                                        res = _nav_processing_task.result()
+                                        if res is not None:
+                                            _nav_last_result_image = res.annotated_image
+                                            # 【Day 19 优化】立即编码并缓存 JPEG，避免每帧重复编码
+                                            if _nav_last_result_image is not None:
+                                                # 使用 TurboJPEG 编码
+                                                enc_result = turbo_encode(_nav_last_result_image, quality=80)
+                                                if enc_result:
+                                                    _nav_last_result_jpeg = enc_result
+                                            # 语音引导
+                                            if res.guidance_text:
+                                                try:
+                                                    # Day 21 优化：视觉优先级中断
+                                                    # 当检测到近距离障碍物时，打断正在进行的 AI 对话
+                                                    obstacle_keywords = ['前方有', '停一下', '注意避让', '左侧有', '右侧有']
+                                                    is_obstacle_warning = any(kw in res.guidance_text for kw in obstacle_keywords)
+                                                    
+                                                    if is_obstacle_warning:
+                                                        # 检查是否有正在进行的 AI 对话
+                                                        if is_playing_now():
+                                                            # 打断 AI 对话，优先播报障碍物警告
+                                                            print(f"[PRIORITY INTERRUPT] 检测到障碍物警告，打断AI对话: {res.guidance_text}")
+                                                            asyncio.create_task(hard_reset_audio("Obstacle priority interrupt"))
+                                                    
+                                                    play_voice_text(res.guidance_text)
+                                                    asyncio.create_task(ui_broadcast_final(f"[导航] {res.guidance_text}"))
+                                                except Exception:
+                                                    pass
+                                    except Exception:
+                                        print(f"[NAV MASTER] 获取导航结果异常:")
+                                        traceback.print_exc()
+                                
+                                # 启动新的处理任务
+                                if _nav_pending_frame is not None:
+                                    frame_to_process = _nav_pending_frame
+                                    _nav_pending_frame = None
+                                    _nav_task_start_time = time.perf_counter()  # Day 20: 记录开始时间
+                                    loop = asyncio.get_event_loop()
+                                    _nav_processing_task = loop.run_in_executor(
+                                        frame_processing_executor,
+                                        orchestrator.process_frame,
+                                        frame_to_process
+                                    )
+                            
+                            # 使用最后一次成功的结果（不阻塞等待）
+                            out_img = _nav_last_result_image if _nav_last_result_image is not None else bgr
+                    except Exception as e:
+                        if frame_counter % 100 == 0:
+                            print(f"[NAV MASTER] 处理帧时出错: {e}")
+
+                    # 【Day 19 优化】广播导航结果，优先使用缓存的 JPEG
+                    if camera_viewers:
+                        _t_broadcast = time.perf_counter()  # Day 20: 计时
+                        # 如果有缓存的 JPEG（导航结果），直接使用
+                        if _nav_last_result_jpeg is not None:
+                            await _broadcast_to_viewers(_nav_last_result_jpeg)
+                        elif out_img is not None:
+                            # 回退：使用 TurboJPEG 编码当前帧
+                            enc_result = turbo_encode(out_img, quality=80)
+                            if enc_result:
+                                await _broadcast_to_viewers(enc_result)
+                        else:
+                            # 【Day 23 修复】首帧回退：导航刚启动时无处理结果，直接广播原始帧
+                            await _broadcast_to_viewers(data)
+                        _perf_broadcast_times.append((time.perf_counter() - _t_broadcast) * 1000)  # Day 20
+                    # 已托管，进入下一帧
+                    continue
+
+                # 【Day 19 优化】零拷贝直传：原始 JPEG 直接转发，无需解码再编码
+                # 之前的问题：imdecode + imencode 浪费 CPU，原始 data 就是 JPEG
+                if not yolomedia_sending_frames and camera_viewers:
+                    try:
+                        # 直接转发原始 JPEG 数据，跳过解码-编码循环
+                        await _broadcast_to_viewers(data)
+                    except Exception as e:
+                        print(f"[CAMERA] Broadcast error: {e}")
+
+            elif "type" in msg and msg["type"] in ("websocket.close", "websocket.disconnect"):
+                break
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        print(f"[CAMERA ERROR] {e}")
+    finally:
+        try:
+            if WebSocketState is None or ws.client_state == WebSocketState.CONNECTED:
+                await ws.close(code=1000)
+        except Exception:
+            pass
+        esp32_camera_ws = None
+        print("[CAMERA] 设备已断开")
+        
+        # 【新增】清理导航状态
+        if blind_path_navigator:
+            blind_path_navigator.reset()
+        if cross_street_navigator:
+            cross_street_navigator.reset()
+        if orchestrator:
+            orchestrator.reset()
+            print("[NAV MASTER] 统领器已重置")
+
+# ---------- WebSocket：浏览器订阅相机帧 ----------
+@app.websocket("/ws/viewer")
+async def ws_viewer(ws: WebSocket):
+    await ws.accept()
+    camera_viewers.add(ws)
+    print(f"[VIEWER] Browser connected. Total viewers: {len(camera_viewers)}", flush=True)
+    try:
+        while True:
+            # 保持连接活跃
+            await asyncio.sleep(60)
+    except (WebSocketDisconnect, asyncio.CancelledError):
+        pass  # 正常关闭，静默处理
+    finally:
+        try: 
+            camera_viewers.remove(ws)
+        except Exception: 
+            pass
+        print(f"[VIEWER] Removed. Total viewers: {len(camera_viewers)}", flush=True)
+
+# ---------- WebSocket：浏览器订阅 IMU ----------
+@app.websocket("/ws")
+async def ws_imu(ws: WebSocket):
+    await ws.accept()
+    imu_ws_clients.add(ws)
+    try:
+        while True:
+            await asyncio.sleep(60)
+    except (WebSocketDisconnect, asyncio.CancelledError):
+        pass  # 正常关闭，静默处理
+    finally:
+        imu_ws_clients.discard(ws)
+
+async def imu_broadcast(msg: str):
+    if not imu_ws_clients: return
+    dead = []
+    for ws in list(imu_ws_clients):
+        try:
+            await ws.send_text(msg)
+        except Exception:
+            dead.append(ws)
+    for ws in dead:
+        imu_ws_clients.discard(ws)
+
+# ---------- 服务端 IMU 估计（原样保留） ----------
+from math import atan2, hypot, pi
+GRAV_BETA   = 0.98
+STILL_W     = 0.4
+YAW_DB      = 0.08
+YAW_LEAK    = 0.2
+ANG_EMA     = 0.15
+AUTO_REZERO = True
+USE_PROJ    = True
+FREEZE_STILL= True
+G     = 9.807
+A_TOL = 0.08 * G
+gLP = {"x":0.0, "y":0.0, "z":0.0}
+gOff= {"x":0.0, "y":0.0, "z":0.0}
+BIAS_ALPHA = 0.002
+yaw  = 0.0
+Rf = Pf = Yf = 0.0
+ref = {"roll":0.0, "pitch":0.0, "yaw":0.0}
+holdStart = 0.0
+isStill   = False
+last_ts_imu = 0.0
+last_wall = 0.0
+imu_store: List[Dict[str, Any]] = []
+
+def _wrap180(a: float) -> float:
+    a = a % 360.0
+    if a >= 180.0: a -= 360.0
+    if a < -180.0: a += 360.0
+    return a
+
+def process_imu_and_maybe_store(d: Dict[str, Any]):
+    global gLP, gOff, yaw, Rf, Pf, Yf, ref, holdStart, isStill, last_ts_imu, last_wall
+
+    t_ms = float(d.get("ts", 0.0))
+    now_wall = time.monotonic()
+    if t_ms <= 0.0:
+        t_ms = (now_wall * 1000.0)
+    if last_ts_imu <= 0.0 or t_ms <= last_ts_imu or (t_ms - last_ts_imu) > 3000.0:
+        dt = 0.02
+    else:
+        dt = (t_ms - last_ts_imu) / 1000.0
+    last_ts_imu = t_ms
+
+    ax = float(((d.get("accel") or {}).get("x", 0.0)))
+    ay = float(((d.get("accel") or {}).get("y", 0.0)))
+    az = float(((d.get("accel") or {}).get("z", 0.0)))
+    wx = float(((d.get("gyro")  or {}).get("x", 0.0)))
+    wy = float(((d.get("gyro")  or {}).get("y", 0.0)))
+    wz = float(((d.get("gyro")  or {}).get("z", 0.0)))
+
+    gLP["x"] = GRAV_BETA * gLP["x"] + (1.0 - GRAV_BETA) * ax
+    gLP["y"] = GRAV_BETA * gLP["y"] + (1.0 - GRAV_BETA) * ay
+    gLP["z"] = GRAV_BETA * gLP["z"] + (1.0 - GRAV_BETA) * az
+    gmag = hypot(gLP["x"], gLP["y"], gLP["z"]) or 1.0
+    gHat = {"x": gLP["x"]/gmag, "y": gLP["y"]/gmag, "z": gLP["z"]/gmag}
+
+    roll  = (atan2(az, ay)   * 180.0 / pi)
+    pitch = (atan2(-ax, ay)  * 180.0 / pi)
+
+    aNorm = hypot(ax, ay, az); wNorm = hypot(wx, wy, wz)
+    nearFlat = (abs(roll) < 2.0 and abs(pitch) < 2.0)
+    stillCond = (abs(aNorm - G) < A_TOL) and (wNorm < STILL_W)
+
+    if stillCond:
+        if holdStart <= 0.0: holdStart = t_ms
+        if not isStill and (t_ms - holdStart) > 350.0: isStill = True
+        gOff["x"] = (1.0 - BIAS_ALPHA)*gOff["x"] + BIAS_ALPHA*wx
+        gOff["y"] = (1.0 - BIAS_ALPHA)*gOff["y"] + BIAS_ALPHA*wy
+        gOff["z"] = (1.0 - BIAS_ALPHA)*gOff["z"] + BIAS_ALPHA*wz
+    else:
+        holdStart = 0.0; isStill = False
+
+    if USE_PROJ:
+        yawdot = ((wx - gOff["x"])*gHat["x"] + (wy - gOff["y"])*gHat["y"] + (wz - gOff["z"])*gHat["z"])
+    else:
+        yawdot = (wy - gOff["y"])
+
+    if abs(yawdot) < YAW_DB: yawdot = 0.0
+    if FREEZE_STILL and stillCond: yawdot = 0.0
+
+    yaw = _wrap180(yaw + yawdot * dt)
+
+    if (YAW_LEAK > 0.0) and nearFlat and stillCond and abs(yaw) > 0.0:
+        step = YAW_LEAK * dt * (-1.0 if yaw > 0 else (1.0 if yaw < 0 else 0.0))
+        if abs(yaw) <= abs(step): yaw = 0.0
+        else: yaw += step
+
+    global Rf, Pf, Yf, ref, last_wall
+    Rf = ANG_EMA * roll  + (1.0 - ANG_EMA) * Rf
+    Pf = ANG_EMA * pitch + (1.0 - ANG_EMA) * Pf
+    Yf = ANG_EMA * yaw   + (1.0 - ANG_EMA) * Yf
+
+    if AUTO_REZERO and nearFlat and (wNorm < STILL_W):
+        if holdStart <= 0.0: holdStart = t_ms
+        if not isStill and (t_ms - holdStart) > 350.0:
+            ref.update({"roll": Rf, "pitch": Pf, "yaw": Yf})
+            isStill = True
+
+    R = _wrap180(Rf - ref["roll"])
+    P = _wrap180(Pf - ref["pitch"])
+    Y = _wrap180(Yf - ref["yaw"])
+
+    now_wall = time.monotonic()
+    if last_wall <= 0.0 or (now_wall - last_wall) >= 0.100:
+        last_wall = now_wall
+        item = {
+            "ts": t_ms/1000.0,
+            "angles": {"roll": R, "pitch": P, "yaw": Y},
+            "accel":  {"x": ax, "y": ay, "z": az},
+            "gyro":   {"x": wx, "y": wy, "z": wz},
+        }
+        imu_store.append(item)
+
+# ---------- UDP 接收 IMU 并转发 ----------
+class UDPProto(asyncio.DatagramProtocol):
+    def connection_made(self, transport):
+        print(f"[UDP] listening on {UDP_IP}:{UDP_PORT}")
+    def datagram_received(self, data, addr):
+        try:
+            s = data.decode('utf-8', errors='ignore').strip()
+            d = json.loads(s)
+            if 'ts' not in d and 'timestamp_ms' in d:
+                d['ts'] = d.pop('timestamp_ms')
+            process_imu_and_maybe_store(d)
+            asyncio.create_task(imu_broadcast(json.dumps(d)))
+        except Exception:
+            pass
+
+
+
+
+
+
+
+# --- 导出接口（可选） ---
+def get_last_frames():
+    return last_frames
+
+def get_camera_ws():
+    return esp32_camera_ws
+
+if __name__ == "__main__":
+    import signal
+    import logging
+    
+    # Day 13: 抑制 Ctrl+C 时的 asyncio CancelledError 日志
+    logging.getLogger("uvicorn.error").setLevel(logging.CRITICAL)
+    
+    # Day 13: 移除重复的信号处理器，模块级别已经处理了
+    # 信号处理在模块顶部已注册
+    
+    # Day 20: Numba JIT 预热，避免首次调用时的编译延迟
+    try:
+        from numba_utils import warmup as numba_warmup
+        numba_warmup()
+    except ImportError:
+        print("[启动] Numba 未安装，跳过预热")
+    
+    uvicorn.run(
+        app, host="0.0.0.0", port=8081,
+        log_level="warning", access_log=False,
+        loop="asyncio", workers=1, reload=False
+    )
diff --git a/asr_core.py b/asr_core.py
new file mode 100644
index 0000000..b3cdcdd
--- /dev/null
+++ b/asr_core.py
@@ -0,0 +1,221 @@
+# asr_core.py
+# -*- coding: utf-8 -*-
+import os, json, asyncio
+from typing import Any, Dict, List, Optional, Callable, Tuple
+
+ASR_DEBUG_RAW = os.getenv("ASR_DEBUG_RAW", "0") == "1"
+
+def _shorten(s: str, limit: int = 200) -> str:
+    if not s:
+        return ""
+    return s if len(s) <= limit else (s[:limit] + "…")
+
+def _safe_to_dict(x: Any) -> Dict[str, Any]:
+    if isinstance(x, dict): return x
+    for attr in ("to_dict", "model_dump", "__dict__"):
+        try:
+            v = getattr(x, attr, None)
+        except Exception:
+            v = None
+        if callable(v):
+            try:
+                d = v()
+                if isinstance(d, dict): return d
+            except Exception:
+                pass
+        elif isinstance(v, dict):
+            return v
+    try:
+        s = str(x)
+        if s and s.lstrip().startswith("{") and s.rstrip().endswith("}"):
+            return json.loads(s)
+    except Exception:
+        pass
+    return {"_raw": str(x)}
+
+def _extract_sentence(event_obj: Any) -> Tuple[Optional[str], Optional[bool]]:
+    d = _safe_to_dict(event_obj)
+    cands: List[Dict[str, Any]] = [d]
+    for k in ("output", "data", "result"):
+        v = d.get(k)
+        if isinstance(v, dict):
+            cands.append(v)
+    for obj in cands:
+        sent = obj.get("sentence")
+        if isinstance(sent, dict):
+            text = sent.get("text")
+            is_end = sent.get("sentence_end")
+            if is_end is not None:
+                is_end = bool(is_end)
+            return text, is_end
+    for obj in cands:
+        if "text" in obj and isinstance(obj.get("text"), str):
+            return obj.get("text"), None
+    return None, None
+
+# ====== 仅热词触发的“全清零复位”配置 ======
+INTERRUPT_KEYWORDS = set(
+    os.getenv("INTERRUPT_KEYWORDS", "停下,别说了,停止").split(",")
+)
+
+# Day 21: 导航控制白名单 - 这些命令包含热词但不应触发重置
+# 例如"停止导航"包含"停止"，但不应该触发 full_system_reset
+NAV_CONTROL_WHITELIST = [
+    "停止导航", "结束导航", "停止检测", "停止红绿灯",
+    "开始导航", "盲道导航", "开始过马路", "过马路结束",
+    "帮我导航", "帮我过马路"
+]
+
+
+def _normalize_cn(s: str) -> str:
+    try:
+        import unicodedata
+        s = "".join(" " if unicodedata.category(ch) == "Zs" else ch for ch in s)
+        s = s.strip().lower()
+    except Exception:
+        s = (s or "").strip().lower()
+    return s
+
+# ============ ASR 全局总闸 ============
+_current_recognition: Optional[object] = None
+_rec_lock = asyncio.Lock()
+
+async def set_current_recognition(r):
+    global _current_recognition
+    async with _rec_lock:
+        _current_recognition = r
+
+async def stop_current_recognition():
+    global _current_recognition
+    async with _rec_lock:
+        r = _current_recognition
+        _current_recognition = None
+    if r:
+        try:
+            r.stop()  # DashScope SDK 的实时识别停止
+        except Exception:
+            pass
+
+# ============ ASR 回调 ============
+class ASRCallback:
+    """
+    设计目标：
+    1) “停下 / 别说了 …”等热词一出现 → 立刻全清零复位（恢复到刚启动后的状态）。
+    2) 除此之外【不接受打断】；AI 正在播报时，用户说话只做展示，不触发新一轮。
+    3) 不再用 partial 叠加字符串；partial 只用于 UI 临时展示；只有 final sentence 用于驱动 AI。
+    """
+
+    def __init__(
+        self,
+        on_sdk_error: Callable[[str], None],
+        post: Callable[[asyncio.Future], None],
+        ui_broadcast_partial,
+        ui_broadcast_final,
+        is_playing_now_fn: Callable[[], bool],
+        start_ai_with_text_fn,               # async (text)
+        full_system_reset_fn,                 # async (reason)
+        interrupt_lock: asyncio.Lock,
+    ):
+        self._on_sdk_error = on_sdk_error
+        self._post = post
+        self._last_partial_for_ui: str = ""   # 只用于 UI 展示
+        self._last_final_text: str = ""       # 以句末 final 为准
+        self._hot_interrupted: bool = False   # 本句是否因热词触发过复位（防抖）
+
+        self._ui_partial = ui_broadcast_partial
+        self._ui_final   = ui_broadcast_final
+        self._is_playing = is_playing_now_fn
+        self._start_ai   = start_ai_with_text_fn
+        self._full_reset = full_system_reset_fn
+        self._interrupt_lock = interrupt_lock
+
+    def on_open(self):  pass
+    def on_close(self): pass
+    def on_complete(self): pass
+
+    def on_error(self, err):
+        try:
+            self._post(self._ui_partial(""))
+            self._on_sdk_error(str(err))
+        except Exception:
+            pass
+
+    def on_result(self, result): self._handle(result)
+    def on_event(self,  event):  self._handle(event)
+
+    def _has_hotword(self, text: str) -> bool:
+        """Day 21 修复: 检查是否包含热词，但排除导航控制命令"""
+        t = _normalize_cn(text)
+        if not t: return False
+        
+        # Day 21: 先检查是否是导航控制命令（白名单），如果是则不触发热词
+        for nav_cmd in NAV_CONTROL_WHITELIST:
+            if _normalize_cn(nav_cmd) in t:
+                return False  # 导航命令不触发热词重置
+        
+        # 检查热词
+        for w in INTERRUPT_KEYWORDS:
+            if w and _normalize_cn(w) in t:
+                return True
+        return False
+
+    def _handle(self, event: Any):
+        if ASR_DEBUG_RAW:
+            try:
+                rawd = _safe_to_dict(event)
+                print("[ASR EVENT RAW]", json.dumps(rawd, ensure_ascii=False), flush=True)
+            except Exception:
+                pass
+
+        text, is_end = _extract_sentence(event)
+        if text is None:
+            return
+        text = text.strip()
+        if not text:
+            return
+
+        # ---------- ① 热词优先：命中就全清零并短路，绝不送 LLM ----------
+        if not self._hot_interrupted and self._has_hotword(text):
+            self._hot_interrupted = True
+
+            async def _hot_reset():
+                async with self._interrupt_lock:
+                    print(f"[ASR HOTWORD] '{text}' -> FULL RESET, skip LLM", flush=True)
+                    await self._full_reset("Hotword interrupt")
+            try:
+                self._post(_hot_reset())
+            except Exception:
+                pass
+            return
+
+        # ---------- ② partial：仅用于 UI 展示 ----------
+        self._last_partial_for_ui = text
+        try:
+            print(f"[ASR PARTIAL] len={len(text)} text='{_shorten(text)}'", flush=True)
+            self._post(self._ui_partial(self._last_partial_for_ui))
+        except Exception:
+            pass
+
+        # ---------- ③ final：仅 final 驱动 LLM（若未在播报） ----------
+        if is_end is True:
+            final_text = text
+            try:
+                print(f"[ASR FINAL]  len={len(final_text)} text='{final_text}'", flush=True)
+                self._post(self._ui_final(final_text))
+            except Exception:
+                pass
+
+            if (not self._is_playing()) and final_text:
+                async def _run_final():
+                    async with self._interrupt_lock:
+                        print(f"[LLM INPUT TEXT] {final_text}", flush=True)
+                        await self._start_ai(final_text)
+                try:
+                    self._post(_run_final())
+                except Exception:
+                    pass
+
+            # 复位进入下一句
+            self._last_partial_for_ui = ""
+            self._last_final_text = ""
+            self._hot_interrupted = False
diff --git a/audio_compressor.py b/audio_compressor.py
new file mode 100644
index 0000000..9b60a4e
--- /dev/null
+++ b/audio_compressor.py
@@ -0,0 +1,439 @@
+# audio_compressor.py
+# -*- coding: utf-8 -*-
+"""
+音频压缩工具 - 用于减少网络带宽占用
+支持将16kHz 16bit PCM压缩为更小的格式
+"""
+import os
+import wave
+import struct
+import numpy as np
+from typing import Optional, Tuple
+import logging
+
+logger = logging.getLogger(__name__)
+
+class AudioCompressor:
+    """音频压缩器 - 支持多种压缩算法"""
+    
+    @staticmethod
+    def pcm16_to_ulaw(pcm_data: bytes) -> bytes:
+        """
+        将16位PCM转换为8位μ-law
+        压缩率：50%（16bit -> 8bit）
+        """
+        # 解析16位PCM
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # μ-law压缩
+        ulaw_data = bytearray()
+        for sample in samples:
+            ulaw_byte = AudioCompressor._linear_to_ulaw(sample)
+            ulaw_data.append(ulaw_byte)
+        
+        return bytes(ulaw_data)
+    
+    @staticmethod
+    def ulaw_to_pcm16(ulaw_data: bytes) -> bytes:
+        """
+        将8位μ-law转换回16位PCM
+        """
+        pcm_samples = []
+        for ulaw_byte in ulaw_data:
+            pcm_sample = AudioCompressor._ulaw_to_linear(ulaw_byte)
+            pcm_samples.append(pcm_sample)
+        
+        return np.array(pcm_samples, dtype=np.int16).tobytes()
+    
+    @staticmethod
+    def _linear_to_ulaw(sample: int) -> int:
+        """
+        16位线性PCM转μ-law
+        """
+        # μ-law编码表
+        ULAW_MAX = 0x1FFF
+        ULAW_BIAS = 0x84
+        
+        # 限制范围
+        sample = max(-32768, min(32767, sample))
+        
+        # 获取符号位
+        sign = 0
+        if sample < 0:
+            sign = 0x80
+            sample = -sample
+        
+        # 添加偏置
+        sample = sample + ULAW_BIAS
+        
+        # 限制最大值
+        if sample > ULAW_MAX:
+            sample = ULAW_MAX
+        
+        # 查找指数和尾数
+        exponent = 7
+        for exp in range(7, -1, -1):
+            if sample & (0x4000 >> exp):
+                exponent = exp
+                break
+        
+        mantissa = (sample >> (exponent + 3)) & 0x0F
+        ulawbyte = ~(sign | (exponent << 4) | mantissa) & 0xFF
+        
+        return ulawbyte
+    
+    @staticmethod
+    def _ulaw_to_linear(ulawbyte: int) -> int:
+        """
+        μ-law转16位线性PCM
+        """
+        ULAW_BIAS = 0x84
+        
+        ulawbyte = ~ulawbyte & 0xFF
+        sign = ulawbyte & 0x80
+        exponent = (ulawbyte >> 4) & 0x07
+        mantissa = ulawbyte & 0x0F
+        
+        sample = ((mantissa << 3) + ULAW_BIAS) << exponent
+        
+        if sign:
+            sample = -sample
+            
+        return sample
+    
+    @staticmethod
+    def pcm16_to_adpcm(pcm_data: bytes) -> bytes:
+        """
+        将16位PCM转换为4位ADPCM
+        压缩率：75%（16bit -> 4bit）
+        保持较好的语音质量
+        """
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # IMA ADPCM 步长表
+        step_table = [
+            7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
+            19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
+            50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
+            130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
+            337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
+            876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
+            2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
+            5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
+            15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+        ]
+        
+        # 索引调整表
+        index_table = [-1, -1, -1, -1, 2, 4, 6, 8]
+        
+        # 初始化
+        adpcm_data = bytearray()
+        predicted = 0
+        step_index = 0
+        
+        # 每两个样本打包成一个字节
+        for i in range(0, len(samples), 2):
+            byte = 0
+            
+            for j in range(2):
+                if i + j < len(samples):
+                    sample = samples[i + j]
+                    
+                    # 计算差值
+                    diff = sample - predicted
+                    
+                    # 量化
+                    step = step_table[step_index]
+                    adpcm_sample = 0
+                    
+                    if diff < 0:
+                        adpcm_sample = 8
+                        diff = -diff
+                    
+                    if diff >= step:
+                        adpcm_sample |= 4
+                        diff -= step
+                        
+                    step >>= 1
+                    if diff >= step:
+                        adpcm_sample |= 2
+                        diff -= step
+                        
+                    step >>= 1
+                    if diff >= step:
+                        adpcm_sample |= 1
+                    
+                    # 更新预测值
+                    step = step_table[step_index]
+                    diff = 0
+                    if adpcm_sample & 4:
+                        diff += step
+                    step >>= 1
+                    if adpcm_sample & 2:
+                        diff += step
+                    step >>= 1
+                    if adpcm_sample & 1:
+                        diff += step
+                    step >>= 1
+                    diff += step
+                    
+                    if adpcm_sample & 8:
+                        predicted -= diff
+                    else:
+                        predicted += diff
+                    
+                    # 限制预测值范围
+                    if predicted > 32767:
+                        predicted = 32767
+                    elif predicted < -32768:
+                        predicted = -32768
+                    
+                    # 更新步长索引
+                    step_index += index_table[adpcm_sample & 7]
+                    if step_index < 0:
+                        step_index = 0
+                    elif step_index > 88:
+                        step_index = 88
+                    
+                    # 打包到字节中
+                    if j == 0:
+                        byte = adpcm_sample
+                    else:
+                        byte |= (adpcm_sample << 4)
+            
+            adpcm_data.append(byte)
+        
+        # 添加头部信息：初始预测值和步长索引
+        header = struct.pack('<hB', predicted, step_index)
+        return header + bytes(adpcm_data)
+    
+    @staticmethod
+    def adpcm_to_pcm16(adpcm_data: bytes) -> bytes:
+        """
+        将4位ADPCM转换回16位PCM
+        """
+        if len(adpcm_data) < 3:
+            return b''
+        
+        # 读取头部
+        predicted, step_index = struct.unpack('<hB', adpcm_data[:3])
+        adpcm_bytes = adpcm_data[3:]
+        
+        # IMA ADPCM 步长表
+        step_table = [
+            7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
+            19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
+            50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
+            130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
+            337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
+            876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
+            2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
+            5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
+            15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+        ]
+        
+        # 索引调整表
+        index_table = [-1, -1, -1, -1, 2, 4, 6, 8]
+        
+        pcm_samples = []
+        
+        for byte in adpcm_bytes:
+            # 解码两个4位样本
+            for shift in [0, 4]:
+                adpcm_sample = (byte >> shift) & 0x0F
+                
+                # 计算差值
+                step = step_table[step_index]
+                diff = 0
+                
+                if adpcm_sample & 4:
+                    diff += step
+                step >>= 1
+                if adpcm_sample & 2:
+                    diff += step
+                step >>= 1
+                if adpcm_sample & 1:
+                    diff += step
+                step >>= 1
+                diff += step
+                
+                if adpcm_sample & 8:
+                    predicted -= diff
+                else:
+                    predicted += diff
+                
+                # 限制范围
+                if predicted > 32767:
+                    predicted = 32767
+                elif predicted < -32768:
+                    predicted = -32768
+                
+                pcm_samples.append(predicted)
+                
+                # 更新步长索引
+                step_index += index_table[adpcm_sample & 7]
+                if step_index < 0:
+                    step_index = 0
+                elif step_index > 88:
+                    step_index = 88
+        
+        return np.array(pcm_samples, dtype=np.int16).tobytes()
+    
+    @staticmethod
+    def downsample_pcm16(pcm_data: bytes, from_rate: int = 16000, to_rate: int = 8000) -> bytes:
+        """
+        降采样（可选）
+        16kHz -> 8kHz 可以再减少50%数据量
+        """
+        if from_rate == to_rate:
+            return pcm_data
+            
+        # 解析PCM数据
+        samples = np.frombuffer(pcm_data, dtype=np.int16)
+        
+        # 简单的降采样（每隔一个样本取一个）
+        if from_rate == 16000 and to_rate == 8000:
+            downsampled = samples[::2]
+        else:
+            # 更复杂的重采样需要scipy
+            ratio = to_rate / from_rate
+            new_length = int(len(samples) * ratio)
+            downsampled = np.interp(
+                np.linspace(0, len(samples) - 1, new_length),
+                np.arange(len(samples)),
+                samples
+            ).astype(np.int16)
+        
+        return downsampled.tobytes()
+
+
+class CompressedAudioCache:
+    """压缩音频缓存"""
+    
+    def __init__(self, compression_type: str = "adpcm", use_downsample: bool = False):
+        """
+        compression_type: "none", "ulaw", "adpcm"
+        """
+        self.compression_type = compression_type
+        self.use_downsample = use_downsample
+        self._cache = {}  # {filepath: compressed_data}
+        self._original_sizes = {}  # {filepath: original_size}
+        
+    def load_and_compress(self, filepath: str) -> Optional[bytes]:
+        """加载并压缩音频文件（统一转换为8kHz）"""
+        if filepath in self._cache:
+            return self._cache[filepath]
+        
+        try:
+            with wave.open(filepath, 'rb') as wav:
+                # 检查格式
+                channels = wav.getnchannels()
+                sampwidth = wav.getsampwidth()
+                framerate = wav.getframerate()
+                
+                if channels != 1:
+                    logger.warning(f"{filepath} 不是单声道")
+                if sampwidth != 2:
+                    logger.warning(f"{filepath} 不是16位音频")
+                
+                # 读取所有数据
+                frames = wav.readframes(wav.getnframes())
+                
+                # 如果是立体声，转换为单声道
+                if channels == 2:
+                    import audioop
+                    frames = audioop.tomono(frames, sampwidth, 1, 0)
+                
+                # 【修改】始终转换为16kHz（匹配客户端播放器）
+                if framerate != 16000:
+                    import audioop
+                    frames, _ = audioop.ratecv(frames, sampwidth, 1, framerate, 16000, None)
+                    framerate = 16000
+                
+                # 记录原始大小（转换后的大小）
+                self._original_sizes[filepath] = len(frames)
+                
+                # 压缩
+                if self.compression_type == "ulaw":
+                    compressed = AudioCompressor.pcm16_to_ulaw(frames)
+                    # 添加简单的头部信息（1字节标识 + 4字节原始长度）
+                    header = struct.pack('!BI', 0x01, len(frames))  # 0x01表示μ-law
+                    compressed = header + compressed
+                elif self.compression_type == "adpcm":
+                    compressed = AudioCompressor.pcm16_to_adpcm(frames)
+                    # 添加简单的头部信息（1字节标识 + 4字节原始长度）
+                    header = struct.pack('!BI', 0x02, len(frames))  # 0x02表示ADPCM
+                    compressed = header + compressed
+                else:
+                    compressed = frames
+                
+                self._cache[filepath] = compressed
+                
+                # 打印压缩率
+                compression_ratio = len(compressed) / self._original_sizes[filepath]
+                logger.info(f"[压缩] {os.path.basename(filepath)}: "
+                          f"{self._original_sizes[filepath]} -> {len(compressed)} bytes "
+                          f"({compression_ratio:.1%})")
+                
+                return compressed
+                
+        except Exception as e:
+            logger.error(f"压缩音频失败 {filepath}: {e}")
+            return None
+    
+    def decompress(self, compressed_data: bytes) -> Optional[bytes]:
+        """解压音频数据"""
+        if not compressed_data or len(compressed_data) < 5:
+            return compressed_data
+        
+        try:
+            # 检查头部
+            compression_type = compressed_data[0]
+            if compression_type == 0x01:  # μ-law标识
+                header_size = 5
+                original_length = struct.unpack('!I', compressed_data[1:5])[0]
+                ulaw_data = compressed_data[header_size:]
+                
+                # μ-law解压
+                pcm_data = AudioCompressor.ulaw_to_pcm16(ulaw_data)
+                
+                return pcm_data
+            elif compression_type == 0x02:  # ADPCM标识
+                header_size = 5
+                original_length = struct.unpack('!I', compressed_data[1:5])[0]
+                adpcm_data = compressed_data[header_size:]
+                
+                # ADPCM解压
+                pcm_data = AudioCompressor.adpcm_to_pcm16(adpcm_data)
+                
+                return pcm_data
+            else:
+                # 未压缩的数据
+                return compressed_data
+                
+        except Exception as e:
+            logger.error(f"解压音频失败: {e}")
+            return compressed_data
+    
+    def get_compression_stats(self) -> dict:
+        """获取压缩统计信息"""
+        total_original = sum(self._original_sizes.values())
+        total_compressed = sum(len(data) for data in self._cache.values())
+        
+        return {
+            "files_cached": len(self._cache),
+            "total_original_size": total_original,
+            "total_compressed_size": total_compressed,
+            "compression_ratio": total_compressed / total_original if total_original > 0 else 0,
+            "bytes_saved": total_original - total_compressed
+        }
+
+
+# 全局压缩音频缓存实例
+# 默认使用ADPCM压缩，音质更好，压缩率也不错（75%）
+# 可通过环境变量 AIGLASS_COMPRESS_TYPE 设置: none, ulaw, adpcm
+import os
+compression_type = os.getenv("AIGLASS_COMPRESS_TYPE", "adpcm").lower()
+if compression_type not in ["none", "ulaw", "adpcm"]:
+    compression_type = "adpcm"
+compressed_audio_cache = CompressedAudioCache(compression_type=compression_type, use_downsample=False) 
\ No newline at end of file
diff --git a/audio_player.py b/audio_player.py
new file mode 100644
index 0000000..da45a1a
--- /dev/null
+++ b/audio_player.py
@@ -0,0 +1,392 @@
+# audio_player.py
+# 处理预录音频文件的播放，通过ESP32扬声器输出
+
+import os
+import wave
+import json
+import asyncio
+import threading
+import queue
+import time
+from audio_stream import broadcast_pcm16_realtime
+from audio_compressor import compressed_audio_cache, AudioCompressor
+
+# 导入录制器（避免循环导入，在需要时动态导入）
+_recorder_imported = False
+_sync_recorder = None
+
+def _get_recorder():
+    """延迟导入录制器"""
+    global _recorder_imported, _sync_recorder
+    if not _recorder_imported:
+        try:
+            import sync_recorder as sr
+            _sync_recorder = sr
+            _recorder_imported = True
+        except Exception as e:
+            print(f"[AUDIO] 无法导入录制器: {e}")
+            _recorder_imported = True  # 标记已尝试，避免重复
+    return _sync_recorder
+
+# 兼容旧工程中的示例音频（保留）- 改为相对路径
+AUDIO_BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "music")
+
+# 新增：voice 目录与映射表
+# 使用脚本所在目录的 voice 文件夹，避免工作目录问题
+VOICE_DIR = os.getenv("VOICE_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "voice"))
+VOICE_MAP_FILE = os.path.join(VOICE_DIR, "map.zh-CN.json")
+
+# 音频文件映射（将合并 voice 映射）
+AUDIO_MAP = {
+    "检测到物体": os.path.join(AUDIO_BASE_DIR, "音频1.wav"),
+    "向上": os.path.join(AUDIO_BASE_DIR, "音频2.wav"),
+    "向下": os.path.join(AUDIO_BASE_DIR, "音频3.wav"),
+    "向左": os.path.join(AUDIO_BASE_DIR, "音频4.wav"),
+    "向右": os.path.join(AUDIO_BASE_DIR, "音频5.wav"),
+    "OK": os.path.join(AUDIO_BASE_DIR, "音频6.wav"),
+    "向前": os.path.join(AUDIO_BASE_DIR, "音频7.wav"),
+    "后退": os.path.join(AUDIO_BASE_DIR, "音频8.wav"),
+    "拿到物体": os.path.join(AUDIO_BASE_DIR, "音频9.wav"),
+}
+
+# 音频缓存，避免重复读取
+_audio_cache = {}
+
+# 音频播放队列和工作线程 - 使用优先级队列
+_audio_queue = queue.PriorityQueue(maxsize=10)
+_audio_priority = 0  # 递增的优先级计数器
+_worker_thread = None
+_worker_loop = None
+_is_playing = False  # 标记是否正在播放音频
+_playing_lock = threading.Lock()  # 播放锁
+_initialized = False
+_last_play_ts = 0.0  # 记录上次播放结束时间，用于决定预热静音长度
+
+def load_wav_file(filepath):
+    """加载WAV文件并返回PCM数据（自动转换为8kHz）"""
+    if filepath in _audio_cache:
+        return _audio_cache[filepath]
+    
+    # 使用压缩缓存
+    if os.getenv("AIGLASS_COMPRESS_AUDIO", "1") == "1":
+        compressed_data = compressed_audio_cache.load_and_compress(filepath)
+        if compressed_data:
+            # 存储压缩后的数据
+            _audio_cache[filepath] = compressed_data
+            return compressed_data
+    
+    # 原始加载方式（不压缩）
+    try:
+        with wave.open(filepath, 'rb') as wav:
+            # 检查音频格式
+            channels = wav.getnchannels()
+            sampwidth = wav.getsampwidth()
+            framerate = wav.getframerate()
+            
+            if channels != 1:
+                print(f"[AUDIO] 警告: {filepath} 不是单声道，将只使用第一个声道")
+            if sampwidth != 2:
+                print(f"[AUDIO] 警告: {filepath} 不是16位音频")
+            
+            # 读取所有帧
+            frames = wav.readframes(wav.getnframes())
+            
+            # 如果是立体声，只取左声道
+            if channels == 2:
+                import audioop
+                frames = audioop.tomono(frames, sampwidth, 1, 0)
+            
+            # 统一转换为16kHz（使用ratecv保证音调和速度不变）
+            if framerate != 16000:
+                import audioop
+                frames, _ = audioop.ratecv(frames, sampwidth, 1, framerate, 16000, None)
+                print(f"[AUDIO] 重采样: {filepath} {framerate}Hz -> 16000Hz")
+            
+            _audio_cache[filepath] = frames
+            return frames
+            
+    except Exception as e:
+        print(f"[AUDIO] 加载音频文件失败 {filepath}: {e}")
+        return None
+
+def _merge_voice_map():
+    """读取 voice/map.zh-CN.json 并合并到 AUDIO_MAP"""
+    try:
+        if not os.path.exists(VOICE_MAP_FILE):
+            print(f"[AUDIO] 未找到映射文件: {VOICE_MAP_FILE}")
+            return
+        with open(VOICE_MAP_FILE, "r", encoding="utf-8") as f:
+            m = json.load(f)
+        added = 0
+        for text, info in (m or {}).items():
+            files = (info or {}).get("files") or []
+            if not files:
+                continue
+            fname = files[0]
+            fpath = os.path.join(VOICE_DIR, fname)
+            if os.path.exists(fpath):
+                AUDIO_MAP[text] = fpath
+                added += 1
+            else:
+                print(f"[AUDIO] 映射文件缺失: {fpath}")
+        print(f"[AUDIO] 已合并 voice 映射 {added} 条")
+    except Exception as e:
+        print(f"[AUDIO] 读取 voice 映射失败: {e}")
+
+def preload_all_audio():
+    """预加载所有音频文件到内存"""
+    print("[AUDIO] 开始预加载音频文件...")
+    loaded_count = 0
+    
+    # 【暂时禁用变速】因为需要修改缓存机制
+    # 需要加速的音频列表（斑马线相关）
+    # speedup_keywords = ["斑马线", "画面"]
+    # speedup_factor = 1.3  # 加速30%
+    
+    for audio_key, filepath in AUDIO_MAP.items():
+        if os.path.exists(filepath):
+            # 【修复】暂时使用默认速度加载
+            # need_speedup = any(keyword in audio_key for keyword in speedup_keywords)
+            # speed = speedup_factor if need_speedup else 1.0
+            
+            data = load_wav_file(filepath)  # 使用默认参数
+            if data:
+                loaded_count += 1
+                # if need_speedup:
+                #     print(f"[AUDIO] 加载（加速{speedup_factor}x）: {audio_key}")
+        else:
+            # 降低噪声输出
+            pass
+    print(f"[AUDIO] 预加载完成，共加载 {loaded_count} 个音频文件")
+
+def _audio_worker():
+    """音频播放工作线程"""
+    global _worker_loop
+    
+    # 尝试设置线程优先级（Windows特定）
+    try:
+        import ctypes
+        import sys
+        if sys.platform == "win32":
+            # 设置线程为高优先级
+            ctypes.windll.kernel32.SetThreadPriority(
+                ctypes.windll.kernel32.GetCurrentThread(),
+                1  # THREAD_PRIORITY_ABOVE_NORMAL
+            )
+            print("[AUDIO] 设置音频线程为高优先级")
+    except Exception as e:
+        print(f"[AUDIO] 设置线程优先级失败: {e}")
+    
+    _worker_loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(_worker_loop)
+    
+    async def process_queue():
+        while True:
+            try:
+                # 从优先级队列获取数据
+                priority_data = await asyncio.get_event_loop().run_in_executor(None, _audio_queue.get, True)
+                if priority_data is None:
+                    break
+                # 解包优先级和实际音频数据
+                if isinstance(priority_data, tuple) and len(priority_data) == 2:
+                    _, audio_data = priority_data
+                else:
+                    audio_data = priority_data
+                await _broadcast_audio_optimized(audio_data)
+            except Exception as e:
+                print(f"[AUDIO] 工作线程错误: {e}")
+    
+    _worker_loop.run_until_complete(process_queue())
+
+async def _broadcast_audio_optimized(pcm_data: bytes):
+    """优化的音频广播：单次调用由底层按20ms节拍发送，移除重复节拍和Python层sleep"""
+    global _last_play_ts, _is_playing
+    try:
+        # 设置播放标志
+        with _playing_lock:
+            _is_playing = True
+        # 此时 pcm_data 应该已经是解压后的16位PCM数据了（8kHz）
+        now = time.monotonic()
+        idle_sec = now - (_last_play_ts or now)
+        # 首次或长时间空闲后，预热更长静音；否则小静音
+        lead_ms = 160 if idle_sec > 3.0 else 60
+        tail_ms = 40
+
+        lead_silence = b'\x00' * (lead_ms * 16000 * 2 // 1000)  # 16k * 2B
+        tail_silence = b'\x00' * (tail_ms * 16000 * 2 // 1000)
+
+        # 完整音频数据（包含静音）
+        full_audio = lead_silence + pcm_data + tail_silence
+        
+        # 注意：录制在 broadcast_pcm16_realtime 中统一完成，避免重复
+
+        # 单次调用交给底层 pacing（20ms节拍在 broadcast_pcm16_realtime 内部实现）
+        await broadcast_pcm16_realtime(full_audio)
+
+        _last_play_ts = time.monotonic()
+    except Exception as e:
+        print(f"[AUDIO] 广播音频失败: {e}")
+    finally:
+        # 清除播放标志
+        with _playing_lock:
+            _is_playing = False
+
+def initialize_audio_system():
+    """初始化音频系统"""
+    global _initialized, _worker_thread, _last_play_ts
+    
+    if _initialized:
+        return
+    
+    # 先合并 voice 映射，再预加载
+    _merge_voice_map()
+    preload_all_audio()
+    
+    _worker_thread = threading.Thread(target=_audio_worker, daemon=True)
+    _worker_thread.start()
+    _initialized = True
+    _last_play_ts = 0.0
+    
+    # 显示压缩统计
+    if os.getenv("AIGLASS_COMPRESS_AUDIO", "1") == "1":
+        stats = compressed_audio_cache.get_compression_stats()
+        print(f"[AUDIO] 音频压缩统计:")
+        print(f"  - 文件数: {stats['files_cached']}")
+        print(f"  - 原始大小: {stats['total_original_size'] / 1024:.1f} KB")
+        print(f"  - 压缩后: {stats['total_compressed_size'] / 1024:.1f} KB")
+        print(f"  - 压缩率: {stats['compression_ratio']:.1%}")
+        print(f"  - 节省: {stats['bytes_saved'] / 1024:.1f} KB")
+    
+    print("[AUDIO] 音频系统初始化完成（预加载+工作线程）")
+
+def play_audio_threadsafe(audio_key):
+    """线程安全的音频播放函数"""
+    global _audio_queue, _audio_priority
+    
+    if not _initialized:
+        initialize_audio_system()
+    
+    if audio_key not in AUDIO_MAP:
+        print(f"[AUDIO] 未知的音频键: {audio_key}")
+        return
+    
+    filepath = AUDIO_MAP[audio_key]
+    pcm_data = _audio_cache.get(filepath)
+    if pcm_data is None:
+        print(f"[AUDIO] 音频未在缓存中: {audio_key}")
+        return
+    
+    # 如果是压缩的数据，先解压
+    if pcm_data and len(pcm_data) > 5 and pcm_data[0] in [0x01, 0x02]:
+        pcm_data = compressed_audio_cache.decompress(pcm_data)
+        if not pcm_data:
+            print(f"[AUDIO] 解压失败: {audio_key}")
+            return
+    
+    # 【优化】实时播报策略：保持队列最小化，避免积压延迟
+    queue_size = _audio_queue.qsize()
+    
+    # 检查是否正在播放
+    with _playing_lock:
+        currently_playing = _is_playing
+    
+    # 实时策略：只允许1个积压，超过立即清空
+    if queue_size > 0 and not currently_playing:
+        # 未播放时立即清空，播放最新语音
+        print(f"[AUDIO] 清空队列（当前{queue_size}个），播放最新语音")
+        _audio_queue = queue.PriorityQueue(maxsize=10)
+    elif queue_size > 1 and currently_playing:
+        # 正在播放时，如果积压>1个则清空（保持实时性）
+        print(f"[AUDIO] 队列积压({queue_size}个)，清空以保持实时")
+        _audio_queue = queue.PriorityQueue(maxsize=10)
+    try:
+        # 使用优先级队列，确保音频按顺序播放
+        _audio_priority += 1
+        _audio_queue.put_nowait((_audio_priority, pcm_data))
+        if queue_size >= 1:
+            print(f"[AUDIO] 播放队列当前大小: {queue_size + 1}")
+    except queue.Full:
+        # 播放队列满则丢弃，保持实时性
+        print(f"[AUDIO] 队列满，丢弃: {audio_key}")
+        pass
+
+# 全局语音节流 - Day 22 优化: 降低冷却时间
+_last_voice_time = 0
+_last_voice_text = ""
+_voice_cooldown = 1.5  # 相同语音至少间隔1.5秒 (从3.0降低)
+
+# 语音优先级定义
+VOICE_PRIORITY = {
+    'obstacle': 100,     # 障碍物 - 最高优先级
+    'direction': 50,     # 转向/平移 - 中等优先级  
+    'straight': 10,      # 保持直行 - 最低优先级
+    'other': 30          # 其他 - 默认优先级
+}
+
+# 新增：根据中文提示文案直接播放（会做轻度规范化与降级）
+def play_voice_text(text: str):
+    """
+    传入中文提示，自动匹配 voice 映射并播放。
+    - 尝试原文
+    - 尝试补全/去除句末标点（。.!！?？）
+    - 若包含“前方有…注意避让”但未命中，降级到“前方有障碍物，注意避让。”
+    """
+    global _last_voice_time, _last_voice_text
+    
+    if not text:
+        return
+    if not _initialized:
+        initialize_audio_system()
+    
+    # 全局节流：相同文本短时间内不重复播放
+    current_time = time.time()
+    if text == _last_voice_text and current_time - _last_voice_time < _voice_cooldown:
+        return  # 静默跳过
+
+    candidates = []
+    t = text.strip()
+    candidates.append(t)
+    # 尝试补全句号
+    if t[-1:] not in ("。", "！", "!", "？", "?", "."):
+        candidates.append(t + "。")
+    else:
+        # 尝试去掉标点
+        t2 = t.rstrip("。.!！?？")
+        if t2 and t2 != t:
+            candidates.append(t2)
+
+    # 逐一尝试匹配
+    for ck in candidates:
+        if ck in AUDIO_MAP:
+            play_audio_threadsafe(ck)
+            _last_voice_text = text
+            _last_voice_time = current_time
+            return
+
+    # 针对“前方有…注意避让”降级
+    if ("前方有" in t) and ("注意避让" in t):
+        fallback = "前方有障碍物，注意避让。"
+        if fallback in AUDIO_MAP:
+            play_audio_threadsafe(fallback)
+            _last_voice_text = text
+            _last_voice_time = current_time
+            return
+
+    # 针对“请向…平移/微调/转动”类词条，常见变体尝试
+    base = t.rstrip("。.!！?？")
+    if base in AUDIO_MAP:
+        play_audio_threadsafe(base)
+        _last_voice_text = text
+        _last_voice_time = current_time
+        return
+    if base + "。" in AUDIO_MAP:
+        play_audio_threadsafe(base + "。")
+        _last_voice_text = text
+        _last_voice_time = current_time
+        return
+
+    # 未匹配则输出日志（便于调试）
+    print(f"[AUDIO] 未找到匹配语音: {text}")
+
+# 兼容旧接口
+play_audio_on_esp32 = play_audio_threadsafe
\ No newline at end of file
diff --git a/audio_stream.py b/audio_stream.py
new file mode 100644
index 0000000..d8ffd91
--- /dev/null
+++ b/audio_stream.py
@@ -0,0 +1,299 @@
+# audio_stream.py
+# -*- coding: utf-8 -*-
+import asyncio
+from collections import deque
+from dataclasses import dataclass
+from typing import Optional, Set, List, Tuple, Any, Dict
+from fastapi import Request
+from fastapi.responses import StreamingResponse
+
+# ===== 下行 WAV 流基础参数 =====
+STREAM_SR = 8000  # 改为8kHz，ESP32支持
+STREAM_CH = 1
+STREAM_SW = 2
+BYTES_PER_20MS_16K = STREAM_SR * STREAM_SW * 20 // 1000  # 320B (8kHz)
+
+# ===== Day 13: TTS 缓存队列 =====
+# 当 WebSocket 断开时，缓存 TTS 音频，等待重连后发送
+TTS_BUFFER_MAX_SECONDS = 30  # 最多缓存 30 秒音频
+TTS_BUFFER_MAX_BYTES = 16000 * 2 * TTS_BUFFER_MAX_SECONDS  # 16kHz * 2 bytes * 30s = ~960KB
+tts_audio_buffer: deque = deque()  # 每个元素是 (timestamp, pcm16k_bytes)
+tts_buffer_total_bytes = 0
+
+# Day 13: TTS 专用 WebSocket 引用
+# 在 AI 处理开始前保存，避免被 ws_audio 的 finally 块清空
+tts_websocket = None
+
+def set_tts_websocket(ws):
+    """保存 TTS 发送专用的 WebSocket 引用"""
+    global tts_websocket
+    tts_websocket = ws
+
+def get_tts_websocket():
+    """获取 TTS WebSocket（优先使用保存的引用，其次尝试全局变量）"""
+    global tts_websocket
+    if tts_websocket is not None:
+        return tts_websocket
+    # Day 15 修复：避免 import app_main，因为会触发模块顶层代码重新执行
+    # 改为通过 sys.modules 获取已加载的模块引用
+    try:
+        import sys
+        if 'app_main' in sys.modules:
+            return sys.modules['app_main'].esp32_audio_ws
+    except:
+        pass
+    return None
+
+
+# ===== AI 播放任务总闸 =====
+current_ai_task: Optional[asyncio.Task] = None
+
+async def cancel_current_ai():
+    """取消当前大模型语音任务，并等待其退出。"""
+    global current_ai_task
+    task = current_ai_task
+    current_ai_task = None
+    if task and not task.done():
+        task.cancel()
+        try:
+            await task
+        except asyncio.CancelledError:
+            pass
+        except Exception:
+            pass
+
+def is_playing_now() -> bool:
+    t = current_ai_task
+    return (t is not None) and (not t.done())
+
+# ===== /stream.wav 连接管理 =====
+@dataclass(frozen=True)
+class StreamClient:
+    q: asyncio.Queue
+    abort_event: asyncio.Event
+
+stream_clients: "Set[StreamClient]" = set()
+STREAM_QUEUE_MAX = 96  # 小缓冲，避免积压
+
+def _wav_header_unknown_size(sr=16000, ch=1, sw=2) -> bytes:
+    import struct
+    byte_rate = sr * ch * sw
+    block_align = ch * sw
+    data_size = 0x7FFFFFF0
+    riff_size = 36 + data_size
+    return struct.pack(
+        "<4sI4s4sIHHIIHH4sI",
+        b"RIFF", riff_size, b"WAVE",
+        b"fmt ", 16,
+        1, ch, sr, byte_rate, block_align, sw * 8,
+        b"data", data_size
+    )
+
+async def hard_reset_audio(reason: str = ""):
+    """
+    **一键清场**：取消当前AI任务。
+    注意：不再断开 HTTP /stream.wav 连接，因为 Avaota F1 使用这个通道播放 TTS。
+    """
+    # Day 14: 不再断开 HTTP 连接，只取消 AI 任务
+    # 因为 Avaota F1 的 HTTP TTS 客户端需要保持长连接
+    # 断开会导致客户端收不到后续 TTS 音频
+
+
+    # 2) 取消当前AI任务
+    await cancel_current_ai()
+
+    # 3) 日志
+    if reason:
+        print(f"[HARD-RESET] {reason}")
+
+async def flush_tts_buffer(ws) -> int:
+    """
+    Day 13: 刷新 TTS 缓存，发送所有缓存的音频到 WebSocket
+    返回发送的字节数
+    """
+    global tts_audio_buffer, tts_buffer_total_bytes
+    from starlette.websockets import WebSocketState
+    
+    if not tts_audio_buffer:
+        return 0
+    
+    total_sent = 0
+    items_to_send = list(tts_audio_buffer)
+    tts_audio_buffer.clear()
+    tts_buffer_total_bytes = 0
+    
+    try:
+        for _, audio_data in items_to_send:
+            if hasattr(ws, 'client_state') and ws.client_state != WebSocketState.CONNECTED:
+                print(f"[TTS->WS] ⚠️ WebSocket disconnected while flushing buffer")
+                break
+            await ws.send_bytes(audio_data)
+            total_sent += len(audio_data)
+        
+        if total_sent > 0:
+            duration = total_sent / (16000 * 2)
+            print(f"[TTS->WS] 📤 Flushed {total_sent} bytes ({duration:.1f}s) of cached TTS audio")
+    except Exception as e:
+        print(f"[TTS->WS] ❌ Error flushing buffer: {e}")
+    
+    return total_sent
+
+async def broadcast_pcm16_realtime(pcm16: bytes):
+    """
+    Day 14 优化：WebSocket 立即发送，HTTP 节拍广播在后台执行
+    避免 HTTP 20ms pacing 阻塞 WebSocket TTS 传输
+    """
+    # 【新增】录制音频（在分发之前整体录制，避免分片）
+    try:
+        import sync_recorder
+        sync_recorder.record_audio(pcm16, text="[Omni对话]")
+    except Exception:
+        pass  # 静默失败，不影响播放
+    
+    # Day 13: 同时发送给 WebSocket 客户端 (Avaota F1)
+    # 注意：Avaota 期望 16kHz PCM16 数据，而这里的 pcm16 是 8kHz
+    # 需要进行采样率转换
+    global tts_audio_buffer, tts_buffer_total_bytes
+    import time as _time
+    
+    try:
+        import audioop
+        
+        # Day 13: 使用 get_tts_websocket() 获取 WebSocket 引用
+        # 优先使用保存的引用，避免因 ws_audio 的 finally 清空全局变量
+        ws = get_tts_websocket()
+        
+        # Day 21 优化：输入现在已经是 16kHz，无需转换
+        # app_main.py 已经直接从 24kHz 转换到 16kHz
+        pcm16k = pcm16
+        
+        sent_ok = False
+        if ws is not None:
+            try:
+                # Day 13 修复：不检查 client_state，直接尝试发送
+                # WebSocketState 检查可能不准确，导致音频被错误缓存
+                
+                # 先发送缓存的音频
+                while tts_audio_buffer:
+                    _, buffered_audio = tts_audio_buffer.popleft()
+                    tts_buffer_total_bytes -= len(buffered_audio)
+                    await ws.send_bytes(buffered_audio)
+                    if not getattr(broadcast_pcm16_realtime, '_flush_logged', False):
+                        print(f"[TTS->WS] 📤 Flushing buffered TTS audio...")
+                        broadcast_pcm16_realtime._flush_logged = True
+                
+                # 发送当前音频
+                await ws.send_bytes(pcm16k)
+                sent_ok = True
+                
+                if len(pcm16k) > 320:
+                    print(f"[TTS->WS] 📤 Sent {len(pcm16k)} bytes (16kHz) to Avaota")
+                
+                # 重置警告标志
+                broadcast_pcm16_realtime._ws_warned = False
+                broadcast_pcm16_realtime._buffer_warned = False
+                broadcast_pcm16_realtime._flush_logged = False
+            except Exception as send_err:
+                # 发送失败，将当前音频放回缓存
+                if not getattr(broadcast_pcm16_realtime, '_send_err_warned', False):
+                    print(f"[TTS->WS] ❌ Send error: {send_err}, will buffer")
+                    broadcast_pcm16_realtime._send_err_warned = True
+        
+        # 如果发送失败或 WebSocket 断开，缓存音频
+        if not sent_ok:
+            # 添加到缓存队列
+            tts_audio_buffer.append((_time.time(), pcm16k))
+            tts_buffer_total_bytes += len(pcm16k)
+            
+            # 如果缓存过大，移除最旧的
+            while tts_buffer_total_bytes > TTS_BUFFER_MAX_BYTES and tts_audio_buffer:
+                _, old_audio = tts_audio_buffer.popleft()
+                tts_buffer_total_bytes -= len(old_audio)
+            
+            if not getattr(broadcast_pcm16_realtime, '_buffer_warned', False):
+                buffer_secs = tts_buffer_total_bytes / (16000 * 2)
+                print(f"[TTS->WS] 📦 Buffering TTS audio ({buffer_secs:.1f}s cached), will send when reconnected")
+                broadcast_pcm16_realtime._buffer_warned = True
+                
+    except Exception:
+        pass  # 静默忽略所有异常
+    
+    # Day 14 优化：将 HTTP 节拍广播放到后台任务，不阻塞 WebSocket 发送
+    # 这样下一个 Omni 音频块可以立即处理，不用等待 HTTP 节拍完成
+    if stream_clients:
+        asyncio.create_task(_http_pacing_broadcast(pcm16))
+
+
+async def _http_pacing_broadcast(pcm16: bytes):
+    """
+    Day 14: HTTP 客户端的 20ms 节拍广播（独立后台任务）
+    原来嵌入在 broadcast_pcm16_realtime 中，会阻塞 WebSocket 发送
+    """
+    loop = asyncio.get_event_loop()
+    next_tick = loop.time()
+    off = 0
+    while off < len(pcm16):
+        take = min(BYTES_PER_20MS_16K, len(pcm16) - off)
+        piece = pcm16[off:off + take]
+
+        dead: List[StreamClient] = []
+        # Day 14 调试：确认 stream_clients 状态
+        if len(stream_clients) > 0 and off == 0:
+            print(f"[TTS->HTTP] 📤 Sending to {len(stream_clients)} HTTP stream client(s)")
+        for sc in list(stream_clients):
+            if sc.abort_event.is_set():
+                dead.append(sc)
+                continue
+            try:
+                if sc.q.full():
+                    try: sc.q.get_nowait()
+                    except Exception: pass
+                sc.q.put_nowait(piece)
+            except Exception:
+                dead.append(sc)
+        for sc in dead:
+            try: stream_clients.discard(sc)
+            except Exception: pass
+
+        next_tick += 0.020
+        now = loop.time()
+        if now < next_tick:
+            await asyncio.sleep(next_tick - now)
+        else:
+            next_tick = now
+        off += take
+
+# ===== FastAPI 路由注册器 =====
+def register_stream_route(app):
+    @app.get("/stream.wav")
+    async def stream_wav(_: Request):
+        # —— 强制单连接（或少数连接），先拉闸所有旧连接 ——
+        for sc in list(stream_clients):
+            try: sc.abort_event.set()
+            except Exception: pass
+        stream_clients.clear()
+
+        q: asyncio.Queue[bytes | None] = asyncio.Queue(maxsize=STREAM_QUEUE_MAX)
+        abort_event = asyncio.Event()
+        sc = StreamClient(q=q, abort_event=abort_event)
+        stream_clients.add(sc)
+
+        async def gen():
+            yield _wav_header_unknown_size(STREAM_SR, STREAM_CH, STREAM_SW)
+            try:
+                while True:
+                    if abort_event.is_set():
+                        break
+                    try:
+                        chunk = await asyncio.wait_for(q.get(), timeout=0.5)
+                    except asyncio.TimeoutError:
+                        continue
+                    if abort_event.is_set():
+                        break
+                    if chunk is None:
+                        break
+                    if chunk:
+                        yield chunk
+            finally:
+                stream_clients.discard(sc)
+        return StreamingResponse(gen(), media_type="audio/wav")
\ No newline at end of file
diff --git a/bridge_io.py b/bridge_io.py
new file mode 100644
index 0000000..93e6870
--- /dev/null
+++ b/bridge_io.py
@@ -0,0 +1,92 @@
+# bridge_io.py
+# 极简桥：接原始JPEG → 提供BGR帧给外部算法；外部算法产出BGR → 广播给前端
+import threading
+from collections import deque
+import time
+import cv2
+import numpy as np
+
+# 原始JPEG帧缓冲（只保留最新 N 帧）
+_MAX_BUF = 4
+_frames = deque(maxlen=_MAX_BUF)
+_cond = threading.Condition()
+
+# 向前端发送JPEG的回调，由 app_main.py 在启动时注册
+_sender_lock = threading.Lock()
+_sender_cb = None
+
+# 向前端发送UI文本的回调（由 app_main.py 在启动时注册）
+_ui_sender_lock = threading.Lock()
+_ui_sender_cb = None
+
+def set_sender(cb):
+    """由 app_main.py 调用，注册一个函数：cb(jpeg_bytes)->None"""
+    global _sender_cb
+    with _sender_lock:
+        _sender_cb = cb
+
+def set_ui_sender(cb):
+    """由 app_main.py 调用，注册一个函数：cb(text:str)->None"""
+    global _ui_sender_cb
+    with _ui_sender_lock:
+        _ui_sender_cb = cb
+
+def push_raw_jpeg(jpeg_bytes: bytes):
+    """由 app_main.py 在收到 /ws/camera 帧时调用"""
+    if not jpeg_bytes:
+        return
+    with _cond:
+        _frames.append((time.time(), jpeg_bytes))
+        _cond.notify_all()
+
+def wait_raw_bgr(timeout_sec: float = 0.5):
+    """被 YOLO/MediaPipe 脚本调用：等待并拿到最新一帧BGR；超时返回 None"""
+    t_end = time.time() + timeout_sec
+    last = None
+    while time.time() < t_end:
+        with _cond:
+            if _frames:
+                last = _frames[-1]
+        if last is None:
+            time.sleep(0.01)
+            continue
+        # 解码JPEG为BGR
+        ts, jpeg = last
+        arr = np.frombuffer(jpeg, dtype=np.uint8)
+        bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+        if bgr is not None:
+            # 在最源头进行镜像处理
+            #bgr = cv2.flip(bgr, 1)
+            return bgr
+        # 解码失败，稍等重试
+        time.sleep(0.01)
+    return None
+
+def send_vis_bgr(bgr, quality: int = 80):
+    """被 YOLO/MediaPipe 脚本调用：把处理后画面推给前端 viewer"""
+    if bgr is None:
+        return
+    
+    # 直接编码，不做任何增强处理
+    ok, enc = cv2.imencode(".jpg", bgr, [int(cv2.IMWRITE_JPEG_QUALITY), int(quality)])
+    if not ok:
+        return
+    with _sender_lock:
+        cb = _sender_cb
+    if cb:
+        try:
+            cb(enc.tobytes())
+        except Exception:
+            pass
+
+def send_ui_final(text: str):
+    """把一条UI文案作为 final answer 推给前端（线程安全回调）"""
+    if not text:
+        return
+    with _ui_sender_lock:
+        cb = _ui_sender_cb
+    if cb:
+        try:
+            cb(str(text))
+        except Exception:
+            pass
diff --git a/crosswalk_awareness.py b/crosswalk_awareness.py
new file mode 100644
index 0000000..abddec3
--- /dev/null
+++ b/crosswalk_awareness.py
@@ -0,0 +1,342 @@
+# -*- coding: utf-8 -*-
+"""
+斑马线感知监控器
+基于面积变化的斑马线检测和语音提示
+不涉及状态切换，只提供语音引导
+"""
+import time
+import numpy as np
+from collections import deque
+from typing import Optional, Dict, Any
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class CrosswalkAwarenessMonitor:
+    """斑马线感知监控器 - 纯语音提示模块"""
+    
+    def __init__(self):
+        # 面积阈值（固定锚点）
+        self.THRESHOLDS = {
+            'discover': 0.01,      # 1% - 发现
+            'approaching': 0.08,   # 8% - 靠近
+            'near': 0.18,          # 18% - 很近
+            'arrival': 0.25,       # 25% - 到达（可以过马路）
+        }
+        
+        # 已播报的阈值（避免重复）
+        self.broadcasted_thresholds = set()
+        
+        # 面积历史记录
+        self.area_history = deque(maxlen=30)  # 保存最近30帧
+        
+        # 时间记录
+        self.last_broadcast_time = 0
+        self.arrival_first_broadcast_time = 0
+        
+        # 状态标志
+        self.in_arrival_state = False  # 是否在"可以过马路"状态
+        self.last_position_zone = None  # 上次播报的方位
+        
+        # 播报间隔配置（可调整参数 - 数值越小播报越频繁）
+        # 【参数调整】将所有间隔除以1.5，提高播报频率1.5倍
+        self.REPEAT_INTERVALS = {
+            'approaching': 6.7,   # 靠近阶段：每6.7秒重复（原10秒÷1.5）
+            'near': 3.3,          # 很近阶段：每3.3秒重复（原5秒÷1.5）
+            'arrival': 5.3,       # 到达阶段：每5.3秒重复（原8秒÷1.5）
+        }
+        # 提示：如需调整频率，修改这些数值即可
+        # - 数值越小 = 播报越频繁
+        # - 数值越大 = 播报越稀疏
+        
+        # 无遮挡判断阈值
+        self.OCCLUSION_THRESHOLD = 0.30  # 重叠>30%认为有遮挡
+    
+    def process_frame(self, crosswalk_mask, blind_path_mask=None) -> Optional[Dict[str, Any]]:
+        """
+        处理每帧的斑马线检测
+        
+        返回：
+        {
+            'voice_text': 语音文本,
+            'priority': 优先级,
+            'should_broadcast': 是否应该播报,
+            'area': 当前面积,
+            'position': 方位描述,
+            'visualization': 可视化信息（用于外部绘制）
+        }
+        或 None（无需播报）
+        """
+        # 如果没有斑马线，重置状态
+        if crosswalk_mask is None:
+            self._reset_if_needed()
+            return None
+        
+        # 1. 计算面积
+        total_pixels = crosswalk_mask.size
+        crosswalk_pixels = np.sum(crosswalk_mask > 0)
+        area_ratio = crosswalk_pixels / total_pixels
+        
+        # 2. 计算中心位置
+        y_coords, x_coords = np.where(crosswalk_mask > 0)
+        if len(y_coords) == 0:
+            return None
+        
+        center_x_ratio = np.mean(x_coords) / crosswalk_mask.shape[1]
+        center_y_ratio = np.mean(y_coords) / crosswalk_mask.shape[0]
+        
+        # 3. 记录历史
+        current_time = time.time()
+        self.area_history.append({
+            'area': area_ratio,
+            'center_x': center_x_ratio,
+            'center_y': center_y_ratio,
+            'time': current_time
+        })
+        
+        # 4. 检查遮挡
+        has_occlusion = self._check_occlusion(crosswalk_mask, blind_path_mask)
+        
+        # 5. 判断当前阶段和生成语音
+        return self._generate_guidance(area_ratio, center_x_ratio, center_y_ratio, 
+                                       has_occlusion, current_time)
+    
+    def _check_occlusion(self, crosswalk_mask, blind_path_mask) -> bool:
+        """检查斑马线是否被盲道遮挡"""
+        if blind_path_mask is None:
+            return False
+        
+        crosswalk_area = crosswalk_mask > 0
+        blind_path_area = blind_path_mask > 0
+        
+        # 计算重叠
+        overlap = np.logical_and(crosswalk_area, blind_path_area)
+        overlap_ratio = np.sum(overlap) / max(np.sum(crosswalk_area), 1)
+        
+        # 重叠超过阈值认为有遮挡
+        return overlap_ratio > self.OCCLUSION_THRESHOLD
+    
+    def _get_position_description(self, center_x_ratio) -> str:
+        """获取方位描述（3分法）"""
+        if center_x_ratio < 0.40:
+            return "在画面左侧"
+        elif center_x_ratio < 0.60:
+            return "在画面中间"
+        else:
+            return "在画面右侧"
+    
+    def _generate_guidance(self, area_ratio, center_x_ratio, center_y_ratio, 
+                          has_occlusion, current_time) -> Optional[Dict[str, Any]]:
+        """生成引导语音"""
+        
+        # 检查面积是否稳定（避免抖动）
+        if not self._is_area_stable(area_ratio):
+            return None
+        
+        position_desc = self._get_position_description(center_x_ratio)
+        
+        # 阶段1：发现阶段（0.01-0.08）
+        if area_ratio >= self.THRESHOLDS['discover'] and area_ratio < self.THRESHOLDS['approaching']:
+            if self.THRESHOLDS['discover'] not in self.broadcasted_thresholds:
+                self.broadcasted_thresholds.add(self.THRESHOLDS['discover'])
+                return {
+                    'voice_text': f"远处发现斑马线,{position_desc}",
+                    'priority': 55,  # 提高到55，超过盲道方向指令(50)
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': position_desc
+                }
+        
+        # 阶段2：靠近阶段（0.08-0.18）
+        elif area_ratio >= self.THRESHOLDS['approaching'] and area_ratio < self.THRESHOLDS['near']:
+            # 首次播报
+            if self.THRESHOLDS['approaching'] not in self.broadcasted_thresholds:
+                self.broadcasted_thresholds.add(self.THRESHOLDS['approaching'])
+                self.last_broadcast_time = current_time
+                self.last_position_zone = position_desc
+                return {
+                    'voice_text': f"正在靠近斑马线,{position_desc}",
+                    'priority': 55,  # 提高到55
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': position_desc
+                }
+            # 重复播报（每10秒或方位变化）
+            elif (current_time - self.last_broadcast_time >= self.REPEAT_INTERVALS['approaching'] or
+                  position_desc != self.last_position_zone):
+                self.last_broadcast_time = current_time
+                self.last_position_zone = position_desc
+                return {
+                    'voice_text': f"正在靠近斑马线,{position_desc}",
+                    'priority': 55,  # 提高到55
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': position_desc
+                }
+        
+        # 阶段3：很近阶段（0.18-0.25）
+        elif area_ratio >= self.THRESHOLDS['near'] and area_ratio < self.THRESHOLDS['arrival']:
+            # 首次播报
+            if self.THRESHOLDS['near'] not in self.broadcasted_thresholds:
+                self.broadcasted_thresholds.add(self.THRESHOLDS['near'])
+                self.last_broadcast_time = current_time
+                self.last_position_zone = position_desc
+                return {
+                    'voice_text': f"接近斑马线,{position_desc}",
+                    'priority': 60,
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': position_desc
+                }
+            # 重复播报（每5秒或方位变化）
+            elif (current_time - self.last_broadcast_time >= self.REPEAT_INTERVALS['near'] or
+                  position_desc != self.last_position_zone):
+                self.last_broadcast_time = current_time
+                self.last_position_zone = position_desc
+                return {
+                    'voice_text': f"接近斑马线,{position_desc}",
+                    'priority': 60,
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': position_desc
+                }
+        
+        # 阶段4：到达阶段（area ≥ 0.25，无遮挡）
+        elif area_ratio >= self.THRESHOLDS['arrival']:
+            # 必须无遮挡才能提示过马路
+            if has_occlusion:
+                # 有遮挡，暂不提示过马路，停留在阶段3
+                logger.info(f"[斑马线] 面积达到{area_ratio:.2f}但被遮挡，暂不提示过马路")
+                return None
+            
+            # 首次到达
+            if not self.in_arrival_state:
+                self.in_arrival_state = True
+                self.arrival_first_broadcast_time = current_time
+                self.last_broadcast_time = current_time
+                logger.info(f"[斑马线] 到达状态：area={area_ratio:.2f}, 无遮挡")
+                return {
+                    'voice_text': "斑马线到了可以过马路",
+                    'priority': 80,
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': '到达'
+                }
+            # 重复播报（每8秒）
+            elif current_time - self.last_broadcast_time >= self.REPEAT_INTERVALS['arrival']:
+                self.last_broadcast_time = current_time
+                return {
+                    'voice_text': "斑马线到了可以过马路",
+                    'priority': 80,
+                    'should_broadcast': True,
+                    'area': area_ratio,
+                    'position': '到达'
+                }
+            # 超时处理（30秒后自动退出到达状态）
+            elif current_time - self.arrival_first_broadcast_time > 30.0:
+                logger.info("[斑马线] 到达状态超时30秒，自动退出")
+                self.in_arrival_state = False
+                return None
+        
+        # 降级处理：如果从到达状态面积减小
+        if self.in_arrival_state and area_ratio < 0.20:
+            logger.info(f"[斑马线] 面积降至{area_ratio:.2f}，退出到达状态")
+            self.in_arrival_state = False
+            # 清除部分已播报标记，允许重新播报
+            self.broadcasted_thresholds.discard(self.THRESHOLDS['arrival'])
+        
+        return None
+    
+    def _is_area_stable(self, area_ratio, stability_frames=5) -> bool:
+        """检查面积是否稳定（避免抖动触发）"""
+        if len(self.area_history) < stability_frames:
+            return True  # 初始阶段，认为稳定
+        
+        recent_areas = [h['area'] for h in list(self.area_history)[-stability_frames:]]
+        
+        # 检查最近N帧是否都在当前面积附近（±20%）
+        for recent_area in recent_areas:
+            if abs(recent_area - area_ratio) / max(area_ratio, 0.001) > 0.20:
+                return False
+        
+        return True
+    
+    def _reset_if_needed(self):
+        """重置状态（斑马线消失时）"""
+        if len(self.area_history) > 0:
+            logger.info("[斑马线] 斑马线消失，重置状态")
+        
+        self.broadcasted_thresholds.clear()
+        self.area_history.clear()
+        self.in_arrival_state = False
+        self.last_position_zone = None
+    
+    def reset(self):
+        """完全重置"""
+        self.broadcasted_thresholds.clear()
+        self.area_history.clear()
+        self.in_arrival_state = False
+        self.last_broadcast_time = 0
+        self.arrival_first_broadcast_time = 0
+        self.last_position_zone = None
+        logger.info("[斑马线] 感知监控器已重置")
+    
+    def is_in_arrival_state(self) -> bool:
+        """是否在到达状态（用于外部判断是否暂停盲道语音）"""
+        return self.in_arrival_state
+    
+    def get_current_area(self) -> float:
+        """获取当前面积"""
+        if len(self.area_history) > 0:
+            return self.area_history[-1]['area']
+        return 0.0
+    
+    def get_visualization_data(self, crosswalk_mask, area_ratio, center_x_ratio, center_y_ratio, has_occlusion) -> Dict[str, Any]:
+        """
+        获取可视化数据
+        返回包含所有可视化元素的字典
+        """
+        if crosswalk_mask is None:
+            return {}
+        
+        # 确定当前阶段（统一使用橙色）
+        if area_ratio >= self.THRESHOLDS['arrival']:
+            stage = "到达"
+            stage_color = "rgba(255, 165, 0, 0.5)"  # 橙色
+        elif area_ratio >= self.THRESHOLDS['near']:
+            stage = "接近"
+            stage_color = "rgba(255, 165, 0, 0.45)"  # 橙色
+        elif area_ratio >= self.THRESHOLDS['approaching']:
+            stage = "靠近"
+            stage_color = "rgba(255, 165, 0, 0.40)"  # 橙色
+        else:
+            stage = "发现"
+            stage_color = "rgba(255, 165, 0, 0.35)"  # 橙色
+        
+        # 方位描述
+        position = self._get_position_description(center_x_ratio)
+        
+        return {
+            'area_ratio': area_ratio,
+            'stage': stage,
+            'stage_color': stage_color,
+            'position': position.replace("在画面", ""),  # 去掉"在画面"前缀
+            'center_x_ratio': center_x_ratio,
+            'center_y_ratio': center_y_ratio,
+            'has_occlusion': has_occlusion,
+            'in_arrival': self.in_arrival_state
+        }
+
+
+# 辅助函数
+def split_combined_voice(combined_text: str) -> list:
+    """
+    将组合语音拆分为多个独立语音
+    例如："远处发现斑马线,在画面左侧" → ["远处发现斑马线", "在画面左侧"]
+    """
+    if ',' in combined_text:
+        parts = combined_text.split(',')
+        return [p.strip() for p in parts if p.strip()]
+    return [combined_text]
+
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..c5d4d50
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,77 @@
+version: '3.8'
+
+services:
+  aiglass:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: aiglass
+    restart: unless-stopped
+    
+    # 端口映射
+    ports:
+      - "8081:8081"      # Web 服务
+      - "12345:12345/udp" # IMU UDP
+    
+    # 环境变量（从 .env 文件读取）
+    environment:
+      - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY}
+      - BLIND_PATH_MODEL=${BLIND_PATH_MODEL:-model/yolo-seg.pt}
+      - OBSTACLE_MODEL=${OBSTACLE_MODEL:-model/yoloe-11l-seg.pt}
+      - YOLOE_MODEL_PATH=${YOLOE_MODEL_PATH:-model/yoloe-11l-seg.pt}
+      - ENABLE_TTS=${ENABLE_TTS:-true}
+      - TTS_INTERVAL_SEC=${TTS_INTERVAL_SEC:-1.0}
+      - LOG_LEVEL=${LOG_LEVEL:-INFO}
+    
+    # 卷挂载
+    volumes:
+      - ./model:/app/model:ro              # 模型文件（只读）
+      - ./recordings:/app/recordings       # 录制文件
+      - ./music:/app/music:ro              # 音频文件（只读）
+      - ./voice:/app/voice:ro              # 语音文件（只读）
+      - ./static:/app/static:ro            # 静态文件（只读）
+      - ./templates:/app/templates:ro      # 模板文件（只读）
+    
+    # GPU 支持（需要 nvidia-docker）
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    
+    # 依赖服务（可选，如需添加数据库等）
+    # depends_on:
+    #   - redis
+    
+    # 网络模式
+    network_mode: bridge
+    
+    # 健康检查
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8081/api/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+
+  # 可选：添加 Redis 用于缓存
+  # redis:
+  #   image: redis:7-alpine
+  #   container_name: aiglass-redis
+  #   restart: unless-stopped
+  #   ports:
+  #     - "6379:6379"
+  #   volumes:
+  #     - redis-data:/data
+
+# 可选：数据卷
+# volumes:
+#   redis-data:
+
+# 可选：自定义网络
+# networks:
+#   aiglass-network:
+#     driver: bridge
+
diff --git a/edge_tts_client.py b/edge_tts_client.py
new file mode 100644
index 0000000..d3b870f
--- /dev/null
+++ b/edge_tts_client.py
@@ -0,0 +1,202 @@
+# edge_tts_client.py
+# -*- coding: utf-8 -*-
+"""
+EdgeTTS 流式语音合成客户端 - Day 21
+
+特点：
+- 完全免费
+- 流式输出（边合成边播放）
+- 低延迟
+"""
+
+import os
+import asyncio
+import edge_tts
+from typing import AsyncGenerator, Optional
+
+# 默认语音
+DEFAULT_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-XiaoxiaoNeural")
+
+# 语速调整 ("+0%", "+10%", "-10%" 等)
+DEFAULT_RATE = os.getenv("EDGE_TTS_RATE", "+0%")
+
+# 音量调整
+DEFAULT_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%")
+
+
+async def text_to_speech_stream(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    volume: str = DEFAULT_VOLUME,
+) -> AsyncGenerator[bytes, None]:
+    """
+    流式文本转语音
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        volume: 音量
+    
+    Yields:
+        MP3 音频数据块
+    """
+    if not text or not text.strip():
+        return
+    
+    try:
+        communicate = edge_tts.Communicate(
+            text=text,
+            voice=voice,
+            rate=rate,
+            volume=volume,
+        )
+        
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                yield chunk["data"]
+                
+    except Exception as e:
+        print(f"[EdgeTTS] 合成失败: {e}")
+
+
+async def text_to_speech(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    volume: str = DEFAULT_VOLUME,
+) -> bytes:
+    """
+    完整文本转语音（返回完整音频）
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        volume: 音量
+    
+    Returns:
+        MP3 音频数据
+    """
+    audio_chunks = []
+    async for chunk in text_to_speech_stream(text, voice, rate, volume):
+        audio_chunks.append(chunk)
+    return b"".join(audio_chunks)
+
+
+async def text_to_speech_pcm(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    target_sample_rate: int = 16000,
+) -> bytes:
+    """
+    文本转 PCM16 音频（用于直接播放）
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        target_sample_rate: 目标采样率
+    
+    Returns:
+        PCM16 音频数据
+    """
+    import io
+    from pydub import AudioSegment
+    
+    # 获取 MP3 数据
+    mp3_data = await text_to_speech(text, voice, rate)
+    
+    if not mp3_data:
+        return b""
+    
+    try:
+        # MP3 -> PCM 转换
+        audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
+        
+        # 设置采样率和通道
+        audio = audio.set_frame_rate(target_sample_rate)
+        audio = audio.set_channels(1)  # 单声道
+        audio = audio.set_sample_width(2)  # 16-bit
+        
+        return audio.raw_data
+        
+    except Exception as e:
+        print(f"[EdgeTTS] PCM 转换失败: {e}")
+        return b""
+
+
+async def text_to_speech_pcm_stream(
+    text: str,
+    voice: str = DEFAULT_VOICE,
+    rate: str = DEFAULT_RATE,
+    target_sample_rate: int = 16000,
+) -> AsyncGenerator[bytes, None]:
+    """
+    流式文本转 PCM16 音频
+    
+    注意：由于需要解码 MP3，这里采用分段合成的方式
+    每遇到标点符号就合成一段
+    
+    Args:
+        text: 要合成的文本
+        voice: 语音名称
+        rate: 语速
+        target_sample_rate: 目标采样率
+    
+    Yields:
+        PCM16 音频数据块
+    """
+    import io
+    from pydub import AudioSegment
+    
+    # 按标点分割文本
+    punctuation = "。，！？；：,.!?;:"
+    segments = []
+    current = ""
+    
+    for char in text:
+        current += char
+        if char in punctuation:
+            segments.append(current.strip())
+            current = ""
+    
+    if current.strip():
+        segments.append(current.strip())
+    
+    # 逐段合成
+    for segment in segments:
+        if not segment:
+            continue
+            
+        try:
+            mp3_data = await text_to_speech(segment, voice, rate)
+            
+            if mp3_data:
+                audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
+                audio = audio.set_frame_rate(target_sample_rate)
+                audio = audio.set_channels(1)
+                audio = audio.set_sample_width(2)
+                
+                yield audio.raw_data
+                
+        except Exception as e:
+            print(f"[EdgeTTS] 分段合成失败: {e}")
+
+
+# 语音列表（常用中文）
+CHINESE_VOICES = [
+    "zh-CN-XiaoxiaoNeural",    # 女声，自然
+    "zh-CN-YunxiNeural",       # 男声，自然
+    "zh-CN-XiaoyiNeural",      # 女声，活泼
+    "zh-CN-YunjianNeural",     # 男声，播报
+    "zh-CN-XiaochenNeural",    # 女声，温柔
+]
+
+
+async def list_voices() -> list:
+    """列出所有可用语音"""
+    voices = await edge_tts.list_voices()
+    return [v for v in voices if v["Locale"].startswith("zh")]
diff --git a/glm_client.py b/glm_client.py
new file mode 100644
index 0000000..abaad6c
--- /dev/null
+++ b/glm_client.py
@@ -0,0 +1,208 @@
+# glm_client.py
+# -*- coding: utf-8 -*-
+"""
+GLM-4.6v-Flash LLM 客户端 - Day 22
+
+使用官方 zai-sdk + glm-4.6v-flash 模型
+"""
+
+import os
+import asyncio
+from datetime import datetime
+from typing import AsyncGenerator, Optional
+from zai import ZhipuAiClient
+
+# API 配置
+API_KEY = os.getenv(
+    "GLM_API_KEY",
+    "5915240ea48d4e93b454bc2412d1cc54.e054ej4pPqi9G6rc"
+)
+MODEL = "glm-4.6v-flash"  # 升级到 glm-4.6v-flash (支持视觉)
+
+# 星期映射
+WEEKDAY_MAP = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
+
+
+def get_system_prompt() -> str:
+    """动态生成 system prompt，包含当前时间信息"""
+    now = datetime.now()
+    current_time = now.strftime("%H:%M")
+    current_date = now.strftime("%Y年%m月%d日")
+    current_weekday = WEEKDAY_MAP[now.weekday()]
+    
+    return f"""你是一个视障辅助AI助手，安装在智能导盲眼镜上。
+当前时间：{current_time}
+今天日期：{current_date} {current_weekday}
+
+请用极简短的语言回答，每次回答不超过2-3句话。
+避免冗长解释，只提供最关键的信息。
+语气友好但简洁。"""
+
+
+# 客户端和对话历史
+_client = None
+_conversation_history = []
+MAX_HISTORY_TURNS = 5  # 保留最近5轮对话
+
+
+def _get_client() -> ZhipuAiClient:
+    """获取智谱 AI 客户端"""
+    global _client
+    if _client is None:
+        _client = ZhipuAiClient(api_key=API_KEY)
+    return _client
+
+
+def clear_conversation_history():
+    """清除对话历史"""
+    global _conversation_history
+    _conversation_history = []
+    print("[GLM] 对话历史已清除")
+
+
+async def chat(user_message: str, image_base64: Optional[str] = None) -> str:
+    """
+    与 GLM-4.6v-Flash 对话（带上下文记忆）
+    
+    Args:
+        user_message: 用户消息文本
+        image_base64: 可选，Base64 编码的图片
+    
+    Returns:
+        AI 回复文本
+    """
+    global _conversation_history
+    client = _get_client()
+    
+    # 构建用户消息
+    if image_base64:
+        # 多模态消息（带图片）
+        user_content = [
+            {"type": "text", "text": user_message},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
+        ]
+    else:
+        user_content = user_message
+    
+    # 添加用户消息到历史
+    _conversation_history.append({"role": "user", "content": user_content})
+    
+    # 限制历史长度（每轮 = 1用户 + 1助手 = 2条消息）
+    max_messages = MAX_HISTORY_TURNS * 2
+    if len(_conversation_history) > max_messages:
+        _conversation_history = _conversation_history[-max_messages:]
+    
+    # 构建完整消息列表（每次动态生成包含当前时间的 system prompt）
+    messages = [{"role": "system", "content": get_system_prompt()}] + _conversation_history
+    
+    # Day 22: 添加重试逻辑处理速率限制
+    max_retries = 3
+    retry_delay = 1  # 初始延迟1秒
+    
+    for attempt in range(max_retries):
+        try:
+            # Day 22: 升级到 glm-4.6v-flash
+            # 【修正】根据官方文档，thinking 参数也是必须的 (即使是 Vision 模型)
+            response = await asyncio.to_thread(
+                client.chat.completions.create,
+                model=MODEL,
+                messages=messages,
+                thinking={"type": "disabled"},  # 显式禁用思考以降低延迟
+            )
+            
+            if response.choices and len(response.choices) > 0:
+                ai_reply = response.choices[0].message.content.strip()
+                # 添加助手回复到历史
+                _conversation_history.append({"role": "assistant", "content": ai_reply})
+                print(f"[GLM] 回复: {ai_reply[:50]}..." if len(ai_reply) > 50 else f"[GLM] 回复: {ai_reply}")
+                return ai_reply
+            return ""
+            
+        except Exception as e:
+            error_str = str(e)
+            # 检查是否是速率限制错误（429 或 1305）
+            if "429" in error_str or "1305" in error_str or "请求过多" in error_str:
+                if attempt < max_retries - 1:
+                    print(f"[GLM] 速率限制，{retry_delay}秒后重试... (尝试 {attempt + 1}/{max_retries})")
+                    await asyncio.sleep(retry_delay)
+                    retry_delay *= 2  # 指数退避
+                    continue
+            
+            print(f"[GLM] 调用失败: {e}")
+            import traceback
+            traceback.print_exc()
+            break
+    
+    # 所有重试失败，移除用户消息
+    if _conversation_history and _conversation_history[-1]["role"] == "user":
+        _conversation_history.pop()
+    return "抱歉，我暂时无法回答。"
+
+
+async def chat_stream(user_message: str, image_base64: Optional[str] = None) -> AsyncGenerator[str, None]:
+    """
+    流式对话（逐字返回）- GLM-4.6v-Flash
+    
+    Args:
+        user_message: 用户消息文本
+        image_base64: 可选，Base64 编码的图片
+    
+    Yields:
+        AI 回复的文本片段
+    """
+    global _conversation_history
+    client = _get_client()
+    
+    # 构建用户消息
+    if image_base64:
+        user_content = [
+            {"type": "text", "text": user_message},
+            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
+        ]
+    else:
+        user_content = user_message
+    
+    # 添加用户消息到历史
+    _conversation_history.append({"role": "user", "content": user_content})
+    
+    # 限制历史长度
+    max_messages = MAX_HISTORY_TURNS * 2
+    if len(_conversation_history) > max_messages:
+        _conversation_history = _conversation_history[-max_messages:]
+    
+    # 构建完整消息列表
+    messages = [{"role": "system", "content": get_system_prompt()}] + _conversation_history
+    
+    full_response = ""
+    
+    try:
+        # 流式调用
+        # Day 22: 升级到 glm-4.6v-flash
+        # 【修正】根据官方文档，thinking 参数也是必须的
+        response = await asyncio.to_thread(
+            client.chat.completions.create,
+            model=MODEL,
+            messages=messages,
+            thinking={"type": "disabled"},
+            stream=True,
+        )
+        
+        for chunk in response:
+            if chunk.choices[0].delta.content:
+                text = chunk.choices[0].delta.content
+                full_response += text
+                yield text
+        
+        # 添加完整回复到历史
+        if full_response:
+            _conversation_history.append({"role": "assistant", "content": full_response})
+            print(f"[GLM] 流式完成: {full_response[:50]}..." if len(full_response) > 50 else f"[GLM] 流式完成: {full_response}")
+            
+    except Exception as e:
+        print(f"[GLM] 流式调用失败: {e}")
+        import traceback
+        traceback.print_exc()
+        # 移除刚才添加的用户消息
+        if _conversation_history and _conversation_history[-1]["role"] == "user":
+            _conversation_history.pop()
+        yield "抱歉，我暂时无法回答。"
diff --git a/gpu_parallel.py b/gpu_parallel.py
new file mode 100644
index 0000000..3875475
--- /dev/null
+++ b/gpu_parallel.py
@@ -0,0 +1,271 @@
+# gpu_parallel.py
+# -*- coding: utf-8 -*-
+"""
+Day 20: GPU 并行推理优化模块
+使用 CUDA Stream 让盲道检测和障碍物检测并行执行
+"""
+import os
+import time
+import torch
+import numpy as np
+from typing import Tuple, Optional, List, Any
+from concurrent.futures import ThreadPoolExecutor
+import logging
+
+logger = logging.getLogger(__name__)
+
+# 全局 CUDA Stream（延迟初始化）
+_cuda_streams = None
+_parallel_executor = None
+
+def _init_cuda_streams():
+    """初始化 CUDA Streams"""
+    global _cuda_streams
+    if _cuda_streams is None and torch.cuda.is_available():
+        try:
+            _cuda_streams = [torch.cuda.Stream() for _ in range(2)]
+            logger.info("[GPU_PARALLEL] 已创建 2 个 CUDA Stream")
+        except Exception as e:
+            logger.warning(f"[GPU_PARALLEL] 创建 CUDA Stream 失败: {e}")
+            _cuda_streams = []
+    return _cuda_streams
+
+def _init_parallel_executor():
+    """初始化并行执行器（用于 CPU 后处理）"""
+    global _parallel_executor
+    if _parallel_executor is None:
+        _parallel_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="gpu_post")
+        logger.info("[GPU_PARALLEL] 已创建 GPU 后处理线程池")
+    return _parallel_executor
+
+class ParallelDetector:
+    """
+    并行检测器 - 同时执行盲道检测和障碍物检测
+    
+    使用方式:
+        detector = ParallelDetector(yolo_model, obstacle_detector)
+        blind_mask, cross_mask, obstacles = detector.detect_all(image, path_mask)
+    """
+    
+    def __init__(self, yolo_model, obstacle_detector):
+        self.yolo_model = yolo_model
+        self.obstacle_detector = obstacle_detector
+        
+        # 检测参数（从环境变量读取）
+        self.imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
+        self.use_half = os.getenv("AIGLASS_YOLO_HALF", "1") == "1"
+        self.blind_conf_threshold = 0.20
+        self.cross_conf_threshold = 0.30
+        
+        # 初始化
+        _init_cuda_streams()
+        _init_parallel_executor()
+        
+        logger.info(f"[GPU_PARALLEL] ParallelDetector 初始化完成: imgsz={self.imgsz}, half={self.use_half}")
+    
+    def detect_all(
+        self, 
+        image: np.ndarray, 
+        path_mask: Optional[np.ndarray] = None
+    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List[Any]]:
+        """
+        并行执行所有检测
+        
+        Args:
+            image: BGR 图像
+            path_mask: 盲道掩码（用于障碍物过滤）
+        
+        Returns:
+            (blind_path_mask, crosswalk_mask, obstacles)
+        """
+        t0 = time.perf_counter()
+        
+        streams = _init_cuda_streams()
+        
+        if streams and len(streams) >= 2:
+            return self._detect_with_streams(image, path_mask, streams)
+        else:
+            # 回退到串行执行
+            return self._detect_serial(image, path_mask)
+    
+    def _detect_with_streams(
+        self, 
+        image: np.ndarray, 
+        path_mask: Optional[np.ndarray],
+        streams: List[torch.cuda.Stream]
+    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List[Any]]:
+        """Day 21: 使用 ThreadPoolExecutor 真正并行检测（替代无效的 CUDA Stream）"""
+        
+        blind_mask = None
+        cross_mask = None
+        obstacles = []
+        
+        executor = _init_parallel_executor()
+        
+        # 定义两个检测任务
+        def task_blind_path():
+            if self.yolo_model is None:
+                return None, None
+            try:
+                results = self.yolo_model.predict(
+                    image,
+                    verbose=False,
+                    conf=min(self.blind_conf_threshold, self.cross_conf_threshold),
+                    classes=[0, 1],  # 0=crosswalk, 1=blind_path
+                    imgsz=self.imgsz,
+                    half=self.use_half
+                )
+                if results and results[0] and results[0].masks is not None:
+                    return self._parse_seg_results(results[0], image.shape)
+            except Exception as e:
+                logger.error(f"[GPU_PARALLEL] 盲道检测失败: {e}")
+            return None, None
+        
+        def task_obstacles():
+            if self.obstacle_detector is None:
+                return []
+            try:
+                return self.obstacle_detector.detect(image, path_mask=path_mask)
+            except Exception as e:
+                logger.error(f"[GPU_PARALLEL] 障碍物检测失败: {e}")
+            return []
+        
+        # 并行提交两个任务
+        from concurrent.futures import as_completed
+        futures = {
+            executor.submit(task_blind_path): 'blind',
+            executor.submit(task_obstacles): 'obstacle'
+        }
+        
+        # 等待所有任务完成
+        for future in as_completed(futures, timeout=2.0):
+            task_type = futures[future]
+            try:
+                result = future.result()
+                if task_type == 'blind':
+                    blind_mask, cross_mask = result
+                else:
+                    obstacles = result
+            except Exception as e:
+                logger.error(f"[GPU_PARALLEL] {task_type}任务异常: {e}")
+        
+        return blind_mask, cross_mask, obstacles
+    
+    def _detect_serial(
+        self, 
+        image: np.ndarray, 
+        path_mask: Optional[np.ndarray]
+    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List[Any]]:
+        """串行检测（回退模式）"""
+        
+        blind_mask = None
+        cross_mask = None
+        obstacles = []
+        
+        # 盲道检测
+        if self.yolo_model is not None:
+            try:
+                results = self.yolo_model.predict(
+                    image,
+                    verbose=False,
+                    conf=min(self.blind_conf_threshold, self.cross_conf_threshold),
+                    classes=[0, 1],
+                    imgsz=self.imgsz,
+                    half=self.use_half
+                )
+                
+                if results and results[0] and results[0].masks is not None:
+                    blind_mask, cross_mask = self._parse_seg_results(results[0], image.shape)
+            except Exception as e:
+                logger.error(f"[GPU_PARALLEL] 盲道检测失败: {e}")
+        
+        # 障碍物检测
+        if self.obstacle_detector is not None:
+            try:
+                obstacles = self.obstacle_detector.detect(image, path_mask=path_mask)
+            except Exception as e:
+                logger.error(f"[GPU_PARALLEL] 障碍物检测失败: {e}")
+        
+        return blind_mask, cross_mask, obstacles
+    
+    def _parse_seg_results(
+        self, 
+        result, 
+        image_shape: Tuple[int, int, int]
+    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+        """解析 YOLO 分割结果"""
+        
+        blind_mask = None
+        cross_mask = None
+        
+        h, w = image_shape[:2]
+        
+        if result.masks is None or result.boxes is None:
+            return None, None
+        
+        for mask_tensor, conf_tensor, cls_tensor in zip(
+            result.masks.data, result.boxes.conf, result.boxes.cls
+        ):
+            class_id = int(cls_tensor.item())
+            confidence = float(conf_tensor.item())
+            
+            # 置信度过滤
+            if class_id == 1 and confidence < self.blind_conf_threshold:
+                continue
+            if class_id == 0 and confidence < self.cross_conf_threshold:
+                continue
+            
+            # 转换掩码
+            current_mask = self._tensor_to_mask(mask_tensor, w, h)
+            
+            if class_id == 1:  # 盲道
+                if blind_mask is None:
+                    blind_mask = current_mask
+                else:
+                    blind_mask = np.bitwise_or(blind_mask, current_mask)
+            elif class_id == 0:  # 斑马线
+                if cross_mask is None:
+                    cross_mask = current_mask
+                else:
+                    cross_mask = np.bitwise_or(cross_mask, current_mask)
+        
+        return blind_mask, cross_mask
+    
+    def _tensor_to_mask(
+        self, 
+        mask_tensor: torch.Tensor, 
+        out_w: int, 
+        out_h: int
+    ) -> np.ndarray:
+        """将 PyTorch 张量掩码转换为 NumPy 数组"""
+        import cv2
+        
+        # 转换为 numpy
+        if mask_tensor.is_cuda:
+            mask_np = mask_tensor.cpu().numpy()
+        else:
+            mask_np = mask_tensor.numpy()
+        
+        # 调整大小
+        if mask_np.shape[0] != out_h or mask_np.shape[1] != out_w:
+            mask_np = cv2.resize(mask_np, (out_w, out_h), interpolation=cv2.INTER_NEAREST)
+        
+        # 二值化
+        mask_np = (mask_np > 0.5).astype(np.uint8) * 255
+        
+        return mask_np
+
+
+def detect_all_parallel(
+    yolo_model, 
+    obstacle_detector, 
+    image: np.ndarray, 
+    path_mask: Optional[np.ndarray] = None
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List[Any]]:
+    """
+    便捷函数：并行执行所有检测
+    
+    用于替换 workflow_blindpath.py 中的串行检测
+    """
+    detector = ParallelDetector(yolo_model, obstacle_detector)
+    return detector.detect_all(image, path_mask)
diff --git a/hand_landmarker.task b/hand_landmarker.task
new file mode 100644
index 0000000..0d53faf
Binary files /dev/null and b/hand_landmarker.task differ
diff --git a/model/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model b/model/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model
new file mode 100644
index 0000000..da7e375
Binary files /dev/null and b/model/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model differ
diff --git a/model/SenseVoiceSmall/config.yaml b/model/SenseVoiceSmall/config.yaml
new file mode 100644
index 0000000..b1dd72d
--- /dev/null
+++ b/model/SenseVoiceSmall/config.yaml
@@ -0,0 +1,97 @@
+encoder: SenseVoiceEncoderSmall
+encoder_conf:
+    output_size: 512
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 50
+    tp_blocks: 20
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.1
+    input_layer: pe
+    pos_enc_class: SinusoidalPositionEncoder
+    normalize_before: true
+    kernel_size: 11
+    sanm_shfit: 0
+    selfattention_layer_type: sanm
+
+
+model: SenseVoiceSmall
+model_conf:
+    length_normalized_loss: true
+    sos: 1
+    eos: 2
+    ignore_id: -1
+
+tokenizer: SentencepiecesTokenizer
+tokenizer_conf:
+  bpemodel: null
+  unk_symbol: <unk>
+  split_with_space: true
+
+frontend: WavFrontend
+frontend_conf:
+    fs: 16000
+    window: hamming
+    n_mels: 80
+    frame_length: 25
+    frame_shift: 10
+    lfr_m: 7
+    lfr_n: 6
+    cmvn_file: null
+
+
+dataset: SenseVoiceCTCDataset
+dataset_conf:
+  index_ds: IndexDSJsonl
+  batch_sampler: EspnetStyleBatchSampler
+  data_split_num: 32
+  batch_type: token
+  batch_size: 14000
+  max_token_length: 2000
+  min_token_length: 60
+  max_source_length: 2000
+  min_source_length: 60
+  max_target_length: 200
+  min_target_length: 0
+  shuffle: true
+  num_workers: 4
+  sos: ${model_conf.sos}
+  eos: ${model_conf.eos}
+  IndexDSJsonl: IndexDSJsonl
+  retry: 20
+
+train_conf:
+  accum_grad: 1
+  grad_clip: 5
+  max_epoch: 20
+  keep_nbest_models: 10
+  avg_nbest_model: 10
+  log_interval: 100
+  resume: true
+  validate_interval: 10000
+  save_checkpoint_interval: 10000
+
+optim: adamw
+optim_conf:
+  lr: 0.00002
+scheduler: warmuplr
+scheduler_conf:
+  warmup_steps: 25000
+
+specaug: SpecAugLFR
+specaug_conf:
+    apply_time_warp: false
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 30
+    lfr_rate: 6
+    num_freq_mask: 1
+    apply_time_mask: true
+    time_mask_width_range:
+    - 0
+    - 12
+    num_time_mask: 1
diff --git a/model/SenseVoiceSmall/configuration.json b/model/SenseVoiceSmall/configuration.json
new file mode 100644
index 0000000..c01fe5b
--- /dev/null
+++ b/model/SenseVoiceSmall/configuration.json
@@ -0,0 +1,14 @@
+{
+  "framework": "pytorch",
+  "task" : "auto-speech-recognition",
+  "model": {"type" : "funasr"},
+  "pipeline": {"type":"funasr-pipeline"},
+  "model_name_in_hub": {
+    "ms":"", 
+    "hf":""},
+  "file_path_metas": {
+    "init_param":"model.pt", 
+    "config":"config.yaml",
+    "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
+    "frontend_conf":{"cmvn_file": "am.mvn"}}
+}
\ No newline at end of file
diff --git a/model_utils.py b/model_utils.py
new file mode 100644
index 0000000..d069139
--- /dev/null
+++ b/model_utils.py
@@ -0,0 +1,37 @@
+# model_utils.py - Day 20 TensorRT 模型加载工具
+"""
+优先加载 TensorRT .engine 文件，不存在时回退到 .pt
+"""
+
+import os
+
+
+def get_best_model_path(pt_path: str) -> str:
+    """
+    根据 .pt 路径自动选择最佳模型文件：
+    1. 优先加载同目录下的 .engine 文件（TensorRT 加速）
+    2. 如果 .engine 不存在，回退到原 .pt 文件
+    
+    参数:
+        pt_path: 原始 .pt 模型路径（如 'model/yolo-seg.pt'）
+    
+    返回:
+        最佳模型路径（.engine 或 .pt）
+    """
+    if not pt_path.endswith('.pt'):
+        return pt_path
+    
+    engine_path = pt_path.replace('.pt', '.engine')
+    
+    if os.path.exists(engine_path):
+        print(f"[MODEL] 🚀 使用 TensorRT 加速: {engine_path}")
+        return engine_path
+    else:
+        print(f"[MODEL] 使用 PyTorch 模型: {pt_path}")
+        return pt_path
+
+
+def is_tensorrt_engine(model_path: str) -> bool:
+    """检查模型路径是否是 TensorRT 引擎（.engine 文件）"""
+    return model_path.endswith('.engine') or model_path.endswith('.trt')
+
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..2acf1a9
--- /dev/null
+++ b/models.py
@@ -0,0 +1,159 @@
+# app/models.py
+import os
+import logging
+import torch
+from threading import Semaphore
+from contextlib import contextmanager
+from typing import List
+from app.cloud.obstacle_detector_client import ObstacleDetectorClient
+# ==========================================================
+# 0. 导入所有需要的模型封装类 (Clients) 和 Ultralytics 基类
+# ==========================================================
+# 这是过马路工作流使用的封装类
+from app.cloud.crosswalk_detector_client import CrosswalkDetector
+from app.cloud.coco_perception_client import COCOClient
+from obstacle_detector_client import ObstacleDetectorClient
+
+# Day 20: TensorRT 模型加载工具
+from model_utils import get_best_model_path, is_tensorrt_engine
+
+# 这是盲道工作流直接使用的 Ultralytics 类
+from ultralytics import YOLO, YOLOE
+
+logger = logging.getLogger(__name__)
+
+# ==========================================================
+# 1. 全局设备与并发控制 (统一管理)
+# ==========================================================
+DEVICE = os.getenv("AIGLASS_DEVICE", "cuda:0")
+if DEVICE.startswith("cuda") and not torch.cuda.is_available():
+    logger.warning(f"AIGLASS_DEVICE={DEVICE} 但未检测到 CUDA，将回退到 CPU")
+    DEVICE = "cpu"
+IS_CUDA = DEVICE.startswith("cuda")
+
+# AMP (自动混合精度) 配置
+AMP_POLICY = os.getenv("AIGLASS_AMP", "bf16").lower()
+AMP_DTYPE = torch.bfloat16 if AMP_POLICY == "bf16" else (
+    torch.float16 if AMP_POLICY == "fp16" else None) if IS_CUDA else None
+
+# 🔥 核心：全局唯一的GPU并发信号量，所有工作流共享
+GPU_SLOTS = int(os.getenv("AIGLASS_GPU_SLOTS", "2"))
+gpu_semaphore = Semaphore(GPU_SLOTS)
+
+
+# 统一的推理上下文管理器，所有工作流都应使用它来调用模型
+@contextmanager
+def gpu_infer_slot():
+    """
+    统一管理：GPU 并发限流 + torch.inference_mode() + AMP autocast
+    """
+    with gpu_semaphore:
+        if IS_CUDA and AMP_POLICY != "off" and AMP_DTYPE is not None:
+            with torch.inference_mode(), torch.amp.autocast('cuda', dtype=AMP_DTYPE):
+                yield
+        else:
+            with torch.inference_mode():
+                yield
+
+
+# cuDNN 加速优化
+try:
+    if IS_CUDA:
+        torch.backends.cudnn.benchmark = True
+except Exception:
+    pass
+
+# ==========================================================
+# 2. 全局模型实例定义 (全部初始化为 None)
+# ==========================================================
+
+# --- 过马路工作流模型 (通过Client类封装) ---
+crosswalk_detector_client: CrosswalkDetector = None
+coco_client: COCOClient = None
+# ObstacleDetectorClient 将作为所有场景的通用障碍物检测器
+obstacle_detector_client: ObstacleDetectorClient = None
+
+# --- 盲道工作流模型 (直接使用Ultralytics类) ---
+# 它们主要用于分割和路径规划，与过马路场景的检测逻辑不同
+blindpath_seg_model: YOLO = None
+# 障碍物检测将复用 obstacle_detector_client，但YOLOE的文本特征需要单独保存
+blindpath_whitelist_embeddings = None
+
+# 全局加载状态标志
+models_are_loaded = False
+
+
+# ==========================================================
+# 3. 统一的模型加载函数 (由 celery.py 在启动时调用)
+# ==========================================================
+def init_all_models():
+    """
+    在Celery Worker进程启动时被调用一次。
+    负责加载所有工作流所需的模型到全局变量中。
+    """
+    global models_are_loaded
+    if models_are_loaded:
+        return
+
+    logger.info(f"========= 🚀 开始全局模型预加载 (目标设备: {DEVICE}) =========")
+
+    try:
+        # --- [1] 加载通用的障碍物检测器 (ObstacleDetectorClient) ---
+        global obstacle_detector_client
+        logger.info("[1/4] 正在加载通用障碍物检测模型 (ObstacleDetectorClient)...")
+        # Day 20: 优先使用 TensorRT 引擎
+        obs_model_path = get_best_model_path('model/yoloe-11l-seg.pt')
+        obstacle_detector_client = ObstacleDetectorClient(model_path=obs_model_path)
+
+        # Day 20: TensorRT 引擎不需要 .to()
+        if not is_tensorrt_engine(obs_model_path):
+            if hasattr(obstacle_detector_client, 'model') and obstacle_detector_client.model is not None:
+                obstacle_detector_client.model.to(DEVICE)
+
+        logger.info("...通用障碍物检测模型加载成功。")
+
+        # --- [2] 加载过马路专用的模型 (Clients) ---
+        global crosswalk_detector_client, coco_client
+        logger.info("[2/4] 正在加载过马路分割模型 (CrosswalkDetector)...")
+        # Day 20: 优先使用 TensorRT 引擎
+        crosswalk_model_path = get_best_model_path('model/yolo-seg.pt')
+        crosswalk_detector_client = CrosswalkDetector(model_path=crosswalk_model_path)
+        # Day 20: TensorRT 引擎不需要 .to()
+        if not is_tensorrt_engine(crosswalk_model_path):
+            if hasattr(crosswalk_detector_client, 'model') and crosswalk_detector_client.model is not None:
+                crosswalk_detector_client.model.to(DEVICE)
+        logger.info("...过马路分割模型加载成功。")
+
+        logger.info("[3/4] 正在加载通用感知模型 (COCOClient)...")
+        coco_client = COCOClient(model_path='model/yolov8l-world.pt')
+        # 将其内部的YOLO模型移动到指定设备
+        if hasattr(coco_client, 'model') and coco_client.model is not None:
+            coco_client.model.to(DEVICE)
+        logger.info("...通用感知模型加载成功。")
+
+        # --- [4] 加载盲道专用的模型 ---
+        global blindpath_seg_model, blindpath_whitelist_embeddings
+        logger.info("[4/4] 正在加载盲道专用分割模型 (YOLO)...")
+        # Day 20: 优先使用 TensorRT 引擎
+        blindpath_model_path = get_best_model_path('model/yolo-seg.pt')
+        blindpath_seg_model = YOLO(blindpath_model_path)
+        # Day 20: TensorRT 引擎不需要 .to() 和 .fuse()
+        if not is_tensorrt_engine(blindpath_model_path):
+            blindpath_seg_model.to(DEVICE)
+            blindpath_seg_model.fuse()
+        logger.info("...盲道专用分割模型加载成功。")
+
+        # 为盲道工作流保存其需要的YOLOE文本特征引用
+        if obstacle_detector_client:
+            blindpath_whitelist_embeddings = obstacle_detector_client.whitelist_embeddings
+            logger.info("...已为盲道工作流链接障碍物模型特征。")
+
+        # 所有模型加载完毕
+        models_are_loaded = True
+        logger.info("========= ✅ 所有模型已成功预加载。Worker准备就绪! =========")
+
+    except Exception as e:
+        logger.error(f"模型预加载过程中发生严重错误: {e}", exc_info=True)
+        # 抛出异常，这将导致Celery Worker启动失败，这是合理的行为
+        # 因为一个没有模型的Worker是无用的，提前暴露问题更好。
+        raise
\ No newline at end of file
diff --git a/music/converted_向上.wav b/music/converted_向上.wav
new file mode 100644
index 0000000..78f153b
Binary files /dev/null and b/music/converted_向上.wav differ
diff --git a/music/converted_向下.wav b/music/converted_向下.wav
new file mode 100644
index 0000000..8263f70
Binary files /dev/null and b/music/converted_向下.wav differ
diff --git a/music/converted_向前.wav b/music/converted_向前.wav
new file mode 100644
index 0000000..bd9202a
Binary files /dev/null and b/music/converted_向前.wav differ
diff --git a/music/converted_向右.wav b/music/converted_向右.wav
new file mode 100644
index 0000000..d72c46b
Binary files /dev/null and b/music/converted_向右.wav differ
diff --git a/music/converted_向后.wav b/music/converted_向后.wav
new file mode 100644
index 0000000..247c718
Binary files /dev/null and b/music/converted_向后.wav differ
diff --git a/music/converted_向左.wav b/music/converted_向左.wav
new file mode 100644
index 0000000..b7cff2e
Binary files /dev/null and b/music/converted_向左.wav differ
diff --git a/music/converted_已对中.wav b/music/converted_已对中.wav
new file mode 100644
index 0000000..4b1b66a
Binary files /dev/null and b/music/converted_已对中.wav differ
diff --git a/music/converted_找到啦.wav b/music/converted_找到啦.wav
new file mode 100644
index 0000000..dc2c4e3
Binary files /dev/null and b/music/converted_找到啦.wav differ
diff --git a/music/converted_拿到啦.wav b/music/converted_拿到啦.wav
new file mode 100644
index 0000000..a17b653
Binary files /dev/null and b/music/converted_拿到啦.wav differ
diff --git a/music/converted_音频1.WAV b/music/converted_音频1.WAV
new file mode 100644
index 0000000..fbe4c0e
Binary files /dev/null and b/music/converted_音频1.WAV differ
diff --git a/music/converted_音频2.WAV b/music/converted_音频2.WAV
new file mode 100644
index 0000000..1a5d453
Binary files /dev/null and b/music/converted_音频2.WAV differ
diff --git a/music/converted_音频3.WAV b/music/converted_音频3.WAV
new file mode 100644
index 0000000..4ee77ed
Binary files /dev/null and b/music/converted_音频3.WAV differ
diff --git a/music/converted_音频4.WAV b/music/converted_音频4.WAV
new file mode 100644
index 0000000..4654407
Binary files /dev/null and b/music/converted_音频4.WAV differ
diff --git a/music/converted_音频5.WAV b/music/converted_音频5.WAV
new file mode 100644
index 0000000..9d8c70b
Binary files /dev/null and b/music/converted_音频5.WAV differ
diff --git a/music/converted_音频6.WAV b/music/converted_音频6.WAV
new file mode 100644
index 0000000..a847d4e
Binary files /dev/null and b/music/converted_音频6.WAV differ
diff --git a/music/converted_音频7.WAV b/music/converted_音频7.WAV
new file mode 100644
index 0000000..dcf9c83
Binary files /dev/null and b/music/converted_音频7.WAV differ
diff --git a/music/converted_音频8.WAV b/music/converted_音频8.WAV
new file mode 100644
index 0000000..9d436a0
Binary files /dev/null and b/music/converted_音频8.WAV differ
diff --git a/music/converted_音频9.WAV b/music/converted_音频9.WAV
new file mode 100644
index 0000000..2e4c8f8
Binary files /dev/null and b/music/converted_音频9.WAV differ
diff --git a/music/向上.txt b/music/向上.txt
new file mode 100644
index 0000000..9b463d6
--- /dev/null
+++ b/music/向上.txt
@@ -0,0 +1 @@
+向上。如果还有其他想法，你可以随时告诉我哦。
\ No newline at end of file
diff --git a/music/向上.wav b/music/向上.wav
new file mode 100644
index 0000000..8e3b097
Binary files /dev/null and b/music/向上.wav differ
diff --git a/music/向下.txt b/music/向下.txt
new file mode 100644
index 0000000..f1df867
--- /dev/null
+++ b/music/向下.txt
@@ -0,0 +1 @@
+向下。如果还有啥想法，你可以再跟我说哦。
\ No newline at end of file
diff --git a/music/向下.wav b/music/向下.wav
new file mode 100644
index 0000000..1caa62a
Binary files /dev/null and b/music/向下.wav differ
diff --git a/music/向前.txt b/music/向前.txt
new file mode 100644
index 0000000..f4aac71
--- /dev/null
+++ b/music/向前.txt
@@ -0,0 +1 @@
+向前。如果还有啥想法，你可以再跟我说哦。
\ No newline at end of file
diff --git a/music/向前.wav b/music/向前.wav
new file mode 100644
index 0000000..a11a8a3
Binary files /dev/null and b/music/向前.wav differ
diff --git a/music/向右.txt b/music/向右.txt
new file mode 100644
index 0000000..ef5e13d
--- /dev/null
+++ b/music/向右.txt
@@ -0,0 +1 @@
+向右。如果还有啥想法，你可以再跟我说哦。
\ No newline at end of file
diff --git a/music/向右.wav b/music/向右.wav
new file mode 100644
index 0000000..5289914
Binary files /dev/null and b/music/向右.wav differ
diff --git a/music/向后.txt b/music/向后.txt
new file mode 100644
index 0000000..bf66582
--- /dev/null
+++ b/music/向后.txt
@@ -0,0 +1 @@
+向后。如果还有啥想法，你可以再跟我说哦。
\ No newline at end of file
diff --git a/music/向后.wav b/music/向后.wav
new file mode 100644
index 0000000..5d12772
Binary files /dev/null and b/music/向后.wav differ
diff --git a/music/向左.txt b/music/向左.txt
new file mode 100644
index 0000000..b988b32
--- /dev/null
+++ b/music/向左.txt
@@ -0,0 +1 @@
+向左。如果还有其他想法，你可以随时告诉我哦。
\ No newline at end of file
diff --git a/music/向左.wav b/music/向左.wav
new file mode 100644
index 0000000..c97dea1
Binary files /dev/null and b/music/向左.wav differ
diff --git a/music/在画面中间.WAV b/music/在画面中间.WAV
new file mode 100644
index 0000000..68befb0
Binary files /dev/null and b/music/在画面中间.WAV differ
diff --git a/music/在画面中间.txt b/music/在画面中间.txt
new file mode 100644
index 0000000..061d5aa
--- /dev/null
+++ b/music/在画面中间.txt
@@ -0,0 +1 @@
+“在画面中间”
\ No newline at end of file
diff --git a/music/在画面中间_24k.wav b/music/在画面中间_24k.wav
new file mode 100644
index 0000000..853a937
Binary files /dev/null and b/music/在画面中间_24k.wav differ
diff --git a/music/在画面右侧.WAV b/music/在画面右侧.WAV
new file mode 100644
index 0000000..e0d175b
Binary files /dev/null and b/music/在画面右侧.WAV differ
diff --git a/music/在画面右侧.txt b/music/在画面右侧.txt
new file mode 100644
index 0000000..43fcc5d
--- /dev/null
+++ b/music/在画面右侧.txt
@@ -0,0 +1 @@
+“在画面右侧”
\ No newline at end of file
diff --git a/music/在画面右侧_24k.wav b/music/在画面右侧_24k.wav
new file mode 100644
index 0000000..d83a129
Binary files /dev/null and b/music/在画面右侧_24k.wav differ
diff --git a/music/在画面左侧.WAV b/music/在画面左侧.WAV
new file mode 100644
index 0000000..2e10c3e
Binary files /dev/null and b/music/在画面左侧.WAV differ
diff --git a/music/在画面左侧.txt b/music/在画面左侧.txt
new file mode 100644
index 0000000..4bd9477
--- /dev/null
+++ b/music/在画面左侧.txt
@@ -0,0 +1 @@
+“在画面左侧”
\ No newline at end of file
diff --git a/music/在画面左侧_24k.wav b/music/在画面左侧_24k.wav
new file mode 100644
index 0000000..5dea225
Binary files /dev/null and b/music/在画面左侧_24k.wav differ
diff --git a/music/已对中.txt b/music/已对中.txt
new file mode 100644
index 0000000..d8d0bd6
--- /dev/null
+++ b/music/已对中.txt
@@ -0,0 +1 @@
+已对正！如果还有其他想法或者问题，你可以随时告诉我哦。
\ No newline at end of file
diff --git a/music/已对中.wav b/music/已对中.wav
new file mode 100644
index 0000000..e713fa2
Binary files /dev/null and b/music/已对中.wav differ
diff --git a/music/找到啦.txt b/music/找到啦.txt
new file mode 100644
index 0000000..1f2c468
--- /dev/null
+++ b/music/找到啦.txt
@@ -0,0 +1 @@
+找到了！
\ No newline at end of file
diff --git a/music/找到啦.wav b/music/找到啦.wav
new file mode 100644
index 0000000..93ad225
Binary files /dev/null and b/music/找到啦.wav differ
diff --git a/music/拿到啦.txt b/music/拿到啦.txt
new file mode 100644
index 0000000..3c48626
--- /dev/null
+++ b/music/拿到啦.txt
@@ -0,0 +1 @@
+拿到了！如果还有啥问题，你可以再跟我说哦。
\ No newline at end of file
diff --git a/music/拿到啦.wav b/music/拿到啦.wav
new file mode 100644
index 0000000..9d6aaa0
Binary files /dev/null and b/music/拿到啦.wav differ
diff --git a/music/接近斑马线.WAV b/music/接近斑马线.WAV
new file mode 100644
index 0000000..314ee6a
Binary files /dev/null and b/music/接近斑马线.WAV differ
diff --git a/music/接近斑马线.txt b/music/接近斑马线.txt
new file mode 100644
index 0000000..08bb25d
--- /dev/null
+++ b/music/接近斑马线.txt
@@ -0,0 +1 @@
+“接近斑马线”
\ No newline at end of file
diff --git a/music/接近斑马线_24k.wav b/music/接近斑马线_24k.wav
new file mode 100644
index 0000000..b128006
Binary files /dev/null and b/music/接近斑马线_24k.wav differ
diff --git a/music/斑马线到了可以过马路.WAV b/music/斑马线到了可以过马路.WAV
new file mode 100644
index 0000000..5d3d661
Binary files /dev/null and b/music/斑马线到了可以过马路.WAV differ
diff --git a/music/斑马线到了可以过马路.txt b/music/斑马线到了可以过马路.txt
new file mode 100644
index 0000000..2a77950
--- /dev/null
+++ b/music/斑马线到了可以过马路.txt
@@ -0,0 +1 @@
+“斑马线到了可以过马路”。如果还有类似的问题或者其他想法，你可以随时告诉我哦。
\ No newline at end of file
diff --git a/music/斑马线到了可以过马路_24k.wav b/music/斑马线到了可以过马路_24k.wav
new file mode 100644
index 0000000..0809f65
Binary files /dev/null and b/music/斑马线到了可以过马路_24k.wav differ
diff --git a/music/正在靠近斑马线.WAV b/music/正在靠近斑马线.WAV
new file mode 100644
index 0000000..cf0ec57
Binary files /dev/null and b/music/正在靠近斑马线.WAV differ
diff --git a/music/正在靠近斑马线.txt b/music/正在靠近斑马线.txt
new file mode 100644
index 0000000..ed31e7a
--- /dev/null
+++ b/music/正在靠近斑马线.txt
@@ -0,0 +1 @@
+“正在靠近斑马线”
\ No newline at end of file
diff --git a/music/正在靠近斑马线_24k.wav b/music/正在靠近斑马线_24k.wav
new file mode 100644
index 0000000..017ee91
Binary files /dev/null and b/music/正在靠近斑马线_24k.wav differ
diff --git a/music/红灯.WAV b/music/红灯.WAV
new file mode 100644
index 0000000..f53ff7f
Binary files /dev/null and b/music/红灯.WAV differ
diff --git a/music/绿灯.WAV b/music/绿灯.WAV
new file mode 100644
index 0000000..1a5725c
Binary files /dev/null and b/music/绿灯.WAV differ
diff --git a/music/远处发现斑马线.WAV b/music/远处发现斑马线.WAV
new file mode 100644
index 0000000..49488e3
Binary files /dev/null and b/music/远处发现斑马线.WAV differ
diff --git a/music/远处发现斑马线.txt b/music/远处发现斑马线.txt
new file mode 100644
index 0000000..975bed2
--- /dev/null
+++ b/music/远处发现斑马线.txt
@@ -0,0 +1 @@
+“远处发现斑马线”
\ No newline at end of file
diff --git a/music/远处发现斑马线_24k.wav b/music/远处发现斑马线_24k.wav
new file mode 100644
index 0000000..3a39e4f
Binary files /dev/null and b/music/远处发现斑马线_24k.wav differ
diff --git a/music/音频1.WAV b/music/音频1.WAV
new file mode 100644
index 0000000..fcb8e04
Binary files /dev/null and b/music/音频1.WAV differ
diff --git a/music/音频2.WAV b/music/音频2.WAV
new file mode 100644
index 0000000..c173480
Binary files /dev/null and b/music/音频2.WAV differ
diff --git a/music/音频3.WAV b/music/音频3.WAV
new file mode 100644
index 0000000..612f34b
Binary files /dev/null and b/music/音频3.WAV differ
diff --git a/music/音频4.WAV b/music/音频4.WAV
new file mode 100644
index 0000000..4df2cf3
Binary files /dev/null and b/music/音频4.WAV differ
diff --git a/music/音频5.WAV b/music/音频5.WAV
new file mode 100644
index 0000000..5672b13
Binary files /dev/null and b/music/音频5.WAV differ
diff --git a/music/音频6.WAV b/music/音频6.WAV
new file mode 100644
index 0000000..8ff8ce0
Binary files /dev/null and b/music/音频6.WAV differ
diff --git a/music/音频7.WAV b/music/音频7.WAV
new file mode 100644
index 0000000..b4d350d
Binary files /dev/null and b/music/音频7.WAV differ
diff --git a/music/音频8.WAV b/music/音频8.WAV
new file mode 100644
index 0000000..de6b1ff
Binary files /dev/null and b/music/音频8.WAV differ
diff --git a/music/音频9.WAV b/music/音频9.WAV
new file mode 100644
index 0000000..1dd502e
Binary files /dev/null and b/music/音频9.WAV differ
diff --git a/music/黄灯.WAV b/music/黄灯.WAV
new file mode 100644
index 0000000..e4051d3
Binary files /dev/null and b/music/黄灯.WAV differ
diff --git a/navigation_master.py b/navigation_master.py
new file mode 100644
index 0000000..76737fb
--- /dev/null
+++ b/navigation_master.py
@@ -0,0 +1,700 @@
+# navigation_master.py
+# -*- coding: utf-8 -*-
+import time
+import math
+import cv2
+import numpy as np
+from dataclasses import dataclass
+from typing import Optional, Dict, Any, Deque, List, Tuple
+from collections import deque
+
+# 工作流导入（与现有文件解耦）
+from workflow_blindpath import BlindPathNavigator, ProcessingResult as BlindResult
+from workflow_crossstreet import CrossStreetNavigator, CrossStreetResult as CrossResult
+
+# ========== 状态常量 ==========
+IDLE = "IDLE"                          # 空闲/未启用
+CHAT = "CHAT"                          # 对话模式（不进行导航，只返回原始画面）
+BLINDPATH_NAV = "BLINDPATH_NAV"        # 正在走盲道（复用 BlindPathNavigator）
+SEEKING_CROSSWALK = "SEEKING_CROSSWALK"# 盲道阶段发现斑马线，正对准/靠近
+WAIT_TRAFFIC_LIGHT = "WAIT_TRAFFIC_LIGHT" # 到达斑马线后等待交通灯（可选/占位）
+CROSSING = "CROSSING"                  # 正在过马路（复用 CrossStreetNavigator）
+SEEKING_NEXT_BLINDPATH = "SEEKING_NEXT_BLINDPATH" # 过完马路后寻找下一段盲道入口（上盲道）
+RECOVERY = "RECOVERY"                  # 兜底/恢复（感知暂时丢失时）
+TRAFFIC_LIGHT_DETECTION = "TRAFFIC_LIGHT_DETECTION"  # 红绿灯检测模式
+ITEM_SEARCH = "ITEM_SEARCH"            # 找物品模式（暂停导航，由yolomedia处理画面）
+
+# ========== 返回结构 ==========
+@dataclass
+class OrchestratorResult:
+    annotated_image: Optional[np.ndarray]
+    guidance_text: str
+    state: str
+    extras: Dict[str, Any]
+
+# ========== 实用：信号平滑/多数表决 ==========
+class MajorityFilter:
+    def __init__(self, size: int = 8):
+        self.buf: Deque[str] = deque(maxlen=size)
+
+    def push(self, v: str):
+        self.buf.append(v)
+
+    def majority(self) -> str:
+        if not self.buf:
+            return "unknown"
+        cnt = {}
+        for v in self.buf:
+            cnt[v] = cnt.get(v, 0) + 1
+        # 稳健排序：unknown 权重最低
+        items = sorted(cnt.items(), key=lambda x: (0 if x[0]=="unknown" else 1, x[1]), reverse=True)
+        return items[0][0]
+
+    def history(self) -> List[str]:
+        return list(self.buf)
+
+    def clear(self):
+        self.buf.clear()
+
+# ========== 红绿灯识别 ==========
+class TrafficLightDetector:
+    """
+    红绿灯识别器：
+    1) 优先尝试 yoloe_backend 风格的检测（如可用）；
+    2) 回退：无模型时，使用 HSV 颜色启发式在上半屏寻找亮红/黄/绿的“灯团”。
+    输出：('red'|'green'|'yellow'|'unknown', meta)
+    """
+    def __init__(self):
+        self.has_backend = False
+        self.backend = None
+        try:
+            # 尝试动态导入（根据你本地 yoloe_backend 的接口调整）
+            import yoloe_backend as _yeb  # noqa
+            self.backend = _yeb
+            self.has_backend = True
+        except Exception:
+            self.has_backend = False
+            self.backend = None
+
+    def _try_backend(self, bgr: np.ndarray) -> Tuple[str, Dict[str, Any]]:
+        """
+        尝试调用 yoloe_backend 风格的接口。由于各项目实现不同，这里做“宽容地调用”：
+        - 优先尝试 backend.detect(image, target_classes=['traffic light'])
+        - 次选 backend.infer_image(image) 后在结果中过滤 'traffic light'
+        - 以上都失败则返回 unknown
+        预期结果条目应含 bbox 或 mask，可自行扩展“颜色判定”逻辑（ROI 取样 HSV）
+        """
+        if not self.has_backend or self.backend is None:
+            return "unknown", {"reason": "backend_not_available"}
+
+        res = None
+        try:
+            if hasattr(self.backend, "detect"):
+                # 假定 detect 返回 [{'name': 'traffic light', 'box':[x1,y1,x2,y2], ...}, ...]
+                res = self.backend.detect(bgr, target_classes=["traffic light"])
+            elif hasattr(self.backend, "infer_image"):
+                # 假定 infer_image 返回 [{'label': 'traffic light', 'bbox': [x1,y1,x2,y2], ...}, ...]
+                res = self.backend.infer_image(bgr)
+            else:
+                return "unknown", {"reason": "backend_no_suitable_api"}
+        except Exception as e:
+            return "unknown", {"reason": f"backend_failed:{e}"}
+
+        if not res or len(res) == 0:
+            return "unknown", {"reason": "no_detection"}
+
+        # 拿到最大框作为主灯，做 HSV 颜色判断
+        H, W = bgr.shape[:2]
+        best = None
+        best_area = 0
+        boxes = []
+        for item in res:
+            # 统一盒字段
+            if "box" in item and isinstance(item["box"], (list, tuple)) and len(item["box"]) == 4:
+                x1, y1, x2, y2 = item["box"]
+            elif "bbox" in item and isinstance(item["bbox"], (list, tuple)) and len(item["bbox"]) == 4:
+                x1, y1, x2, y2 = item["bbox"]
+            else:
+                continue
+            x1 = int(max(0, min(W-1, x1))); x2 = int(max(0, min(W-1, x2)))
+            y1 = int(max(0, min(H-1, y1))); y2 = int(max(0, min(H-1, y2)))
+            if x2 <= x1 or y2 <= y1:
+                continue
+            area = (x2 - x1) * (y2 - y1)
+            boxes.append((x1, y1, x2, y2, area))
+            if area > best_area:
+                best_area = area
+                best = (x1, y1, x2, y2)
+
+        if best is None:
+            return "unknown", {"reason": "no_valid_bbox", "raw": len(res)}
+
+        x1, y1, x2, y2 = best
+        roi = bgr[y1:y2, x1:x2]
+        color = self._classify_color_hsv(roi)
+        return color, {"bbox": best, "count": len(res), "boxes": boxes}
+
+    def _classify_color_hsv(self, roi_bgr: np.ndarray) -> str:
+        """对 ROI 做 HSV 基于阈值的红/黄/绿简单判定；取面积最大的主色。"""
+        if roi_bgr is None or roi_bgr.size == 0:
+            return "unknown"
+        hsv = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2HSV)
+
+        # 红色范围（两段）
+        lower_red1 = np.array([0, 80, 120]); upper_red1 = np.array([10, 255, 255])
+        lower_red2 = np.array([160, 80, 120]); upper_red2 = np.array([180, 255, 255])
+        mask_r1 = cv2.inRange(hsv, lower_red1, upper_red1)
+        mask_r2 = cv2.inRange(hsv, lower_red2, upper_red2)
+        mask_red = cv2.bitwise_or(mask_r1, mask_r2)
+
+        # 绿色
+        lower_green = np.array([40, 60, 120]); upper_green = np.array([90, 255, 255])
+        mask_green = cv2.inRange(hsv, lower_green, upper_green)
+
+        # 黄色
+        lower_yellow = np.array([18, 80, 150]); upper_yellow = np.array([35, 255, 255])
+        mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)
+
+        # 面积阈值（相对 ROI）
+        total = roi_bgr.shape[0] * roi_bgr.shape[1] + 1e-6
+        r_ratio = float(np.count_nonzero(mask_red)) / total
+        g_ratio = float(np.count_nonzero(mask_green)) / total
+        y_ratio = float(np.count_nonzero(mask_yellow)) / total
+
+        # 简单抑制“脏背景导致的弱响应”
+        thr = 0.03
+        candidates = []
+        if r_ratio > thr: candidates.append(("red", r_ratio))
+        if g_ratio > thr: candidates.append(("green", g_ratio))
+        if y_ratio > thr: candidates.append(("yellow", y_ratio))
+        if not candidates:
+            return "unknown"
+        candidates.sort(key=lambda x: x[1], reverse=True)
+        return candidates[0][0]
+
+    def detect(self, bgr: np.ndarray) -> Tuple[str, Dict[str, Any]]:
+        """
+        总入口：先尝试后端；失败则在上半屏自行找“亮色灯团”（无需框）。
+        """
+        # 1) 尝试后端
+        if self.has_backend:
+            color, meta = self._try_backend(bgr)
+            if color != "unknown":
+                return color, {"method": "backend", **meta}
+
+        # 2) 回退：上半屏 HSV 聚类 + 连通域，选最大“灯团”判色
+        H, W = bgr.shape[:2]
+        roi = bgr[:int(H * 0.5), :]
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+
+        # 高亮阈值（抑制暗部/车灯）
+        v = hsv[:, :, 2]
+        bright = (v > 140).astype(np.uint8) * 255
+
+        # 粗分颜色
+        col = self._classify_color_hsv(roi)
+        return col, {"method": "fallback", "note": "no_backend", "bright_ratio": float(np.mean(bright > 0))}
+
+# ========== 视觉辅助工具 ==========
+def _color_bgr(name: str) -> Tuple[int, int, int]:
+    if name == "red": return (0, 0, 255)
+    if name == "green": return (0, 255, 0)
+    if name == "yellow": return (0, 255, 255)
+    if name == "blue": return (255, 0, 0)
+    if name == "orange": return (0, 165, 255)
+    if name == "cyan": return (255, 255, 0)
+    if name == "magenta": return (255, 0, 255)
+    if name == "gray": return (128, 128, 128)
+    if name == "white": return (255, 255, 255)
+    return (200, 200, 200)
+
+def _put_text(img, text, org, color=(255,255,255), scale=0.7, thick=2, outline=True):
+    if outline:
+        for dx in (-1,0,1):
+            for dy in (-1,0,1):
+                if dx==0 and dy==0: continue
+                cv2.putText(img, text, (org[0]+dx, org[1]+dy), cv2.FONT_HERSHEY_SIMPLEX, scale, (0,0,0), thick+1)
+    cv2.putText(img, text, org, cv2.FONT_HERSHEY_SIMPLEX, scale, color, thick)
+
+def _draw_badge(img, text, pos=(10, 28), fg="white", bg="blue"):
+    color_fg = _color_bgr(fg); color_bg = _color_bgr(bg)
+    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+    x, y = pos
+    pad = 6
+    cv2.rectangle(img, (x-4, y-th-pad), (x+tw+8, y+pad//2), color_bg, -1)
+    _put_text(img, text, (x, y), color=color_fg, scale=0.6, thick=2, outline=False)
+
+def _draw_state_panel(img, kv: Dict[str, Any], pos=(10, 60)):
+    x, y = pos
+    line_h = 22
+    for i, (k, v) in enumerate(kv.items()):
+        _put_text(img, f"{k}: {v}", (x, y + i*line_h), color=(255,255,255), scale=0.6, thick=2)
+
+def _draw_frame_border(img, color=(0,255,0), thickness=3):
+    h, w = img.shape[:2]
+    cv2.rectangle(img, (0,0), (w-1, h-1), color, thickness)
+
+def _draw_progress_bar(img, ratio: float, pos=(10, 90), size=(180, 10), color="cyan"):
+    ratio = max(0.0, min(1.0, float(ratio)))
+    x, y = pos
+    w, h = size
+    cv2.rectangle(img, (x, y), (x+w, y+h), (80,80,80), 1)
+    cv2.rectangle(img, (x+1, y+1), (x+1+int((w-2)*ratio), y+h-1), _color_bgr(color), -1)
+
+# ========== 统领器 ==========
+class NavigationMaster:
+    def __init__(self,
+                 blind_nav: BlindPathNavigator,
+                 cross_nav: CrossStreetNavigator,
+                 *,
+                 min_tts_interval: float = 1.2):
+        self.blind = blind_nav
+        self.cross = cross_nav
+        self.state = IDLE
+        self.last_guidance_ts = 0.0
+        self.min_tts_interval = min_tts_interval
+
+        # 防抖/稳定计数
+        self.cnt_crosswalk_seen = 0         # 盲道侧看见斑马线（approaching/ready）
+        self.cnt_align_ready = 0            # 斑马线 ready + 对准达标
+        self.cnt_cross_end = 0              # 过马路结束条件累计
+        self.cnt_lost = 0                   # 感知丢失累计（进入 RECOVERY）
+
+        # 冷却期避免状态抖动
+        self.cooldown_until = 0.0
+
+        # 紧急恢复目标
+        self.prev_target_state = BLINDPATH_NAV
+
+        # 交通灯
+        self.tld = TrafficLightDetector()
+        self.tl_major = MajorityFilter(size=8)
+        self.tl_last_color = "unknown"
+
+        # 参数（可按现场再调）
+        self.FRAMES_CROSS_SEEN = 8
+        self.FRAMES_ALIGN_READY = 12
+        self.FRAMES_CROSS_END = 12
+        self.FRAMES_NEXT_BLIND_OK = 8
+        self.FRAMES_LOST_MAX = 45
+
+        self.ANGLE_ALIGN_THR_DEG = 12.0
+        self.OFFSET_ALIGN_THR = 0.15
+
+        self.COOLDOWN_SEC = 0.6
+        
+        # 找物品状态管理
+        self.prev_nav_state_before_search = None  # 找物品前的导航状态，用于恢复
+
+    # ----- 外部交互 -----
+    def get_state(self) -> str:
+        return self.state
+    
+    def start_blind_path_navigation(self):
+        """启动盲道导航模式"""
+        self.state = BLINDPATH_NAV
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+        if self.blind:
+            self.blind.reset()
+    
+    def stop_navigation(self):
+        """停止导航，回到对话模式"""
+        self.state = CHAT
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+        if self.blind:
+            self.blind.reset()
+    
+    def start_crossing(self):
+        """启动过马路模式"""
+        self.state = CROSSING
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+        if self.cross:
+            self.cross.reset()
+    
+    def start_traffic_light_detection(self):
+        """启动红绿灯检测模式"""
+        self.state = TRAFFIC_LIGHT_DETECTION
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+    
+    def is_in_navigation_mode(self):
+        """检查是否在导航模式（非对话模式）"""
+        return self.state not in ["CHAT", "IDLE", "TRAFFIC_LIGHT_DETECTION", "ITEM_SEARCH"]
+    
+    def start_item_search(self):
+        """启动找物品模式，暂停当前导航"""
+        # 保存当前导航状态（如果在导航中）
+        if self.state in [BLINDPATH_NAV, SEEKING_CROSSWALK, WAIT_TRAFFIC_LIGHT, CROSSING, SEEKING_NEXT_BLINDPATH]:
+            self.prev_nav_state_before_search = self.state
+            print(f"[NAV MASTER] 暂停导航状态 {self.state}，切换到找物品模式")
+        else:
+            self.prev_nav_state_before_search = None
+        
+        self.state = ITEM_SEARCH
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+    
+    def stop_item_search(self, restore_nav: bool = True):
+        """停止找物品模式"""
+        # 如果需要恢复之前的导航状态
+        if restore_nav and self.prev_nav_state_before_search:
+            self.state = self.prev_nav_state_before_search
+            print(f"[NAV MASTER] 找物品结束，恢复到导航状态 {self.state}")
+            self.prev_nav_state_before_search = None
+        else:
+            # 否则回到对话模式
+            self.state = CHAT
+            print(f"[NAV MASTER] 找物品结束，回到对话模式")
+        
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+
+    def force_state(self, s: str):
+        self.state = s
+        self.cooldown_until = time.time() + self.COOLDOWN_SEC
+
+    def on_voice_command(self, text: str):
+        t = (text or "").strip()
+        if "开始过马路" in t:
+            # 直接进入等待/或立即过马路（低速环境可直过）
+            if self.state in (BLINDPATH_NAV, SEEKING_CROSSWALK, WAIT_TRAFFIC_LIGHT, IDLE, RECOVERY, SEEKING_NEXT_BLINDPATH):
+                self.state = WAIT_TRAFFIC_LIGHT
+                self.cooldown_until = time.time() + self.COOLDOWN_SEC
+        elif "立即通过" in t or "现在通过" in t:
+            self.state = CROSSING
+            self.cooldown_until = time.time() + self.COOLDOWN_SEC
+        elif "停止" in t or "结束" in t:
+            self.state = IDLE
+        elif "继续" in t:
+            if self.state == IDLE:
+                self.state = BLINDPATH_NAV
+
+    def reset(self):
+        self.state = IDLE
+        self.cnt_crosswalk_seen = 0
+        self.cnt_align_ready = 0
+        self.cnt_cross_end = 0
+        self.cnt_lost = 0
+        self.tl_major.clear()
+        self.tl_last_color = "unknown"
+        self.prev_target_state = BLINDPATH_NAV
+        self._last_wait_light_announce = 0  # 重置等待绿灯播报时间
+        try:
+            self.blind.reset()
+        except Exception:
+            pass
+        try:
+            self.cross.reset()
+        except Exception:
+            pass
+
+    # ----- 内部工具 -----
+    def _say(self, now: float, text: str) -> str:
+        if not text:
+            return ""
+        if now - self.last_guidance_ts >= self.min_tts_interval:
+            self.last_guidance_ts = now
+            return text
+        return ""
+
+    def _draw_tl_status(self, img: np.ndarray, color: str, meta: Dict[str, Any]):
+        if img is None:
+            return
+        color_bgr = _color_bgr(color)
+        # 角标与文本
+        cv2.circle(img, (24, 24), 10, color_bgr, -1)
+        _put_text(img, f"信号灯: {color}", (40, 30), color=color_bgr, scale=0.6, thick=2, outline=False)
+        # 画 bbox（若有）
+        if meta and "bbox" in meta:
+            x1, y1, x2, y2 = meta["bbox"]
+            cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color_bgr, 2)
+
+        # 多数表决历史（最近8帧）
+        hist = self.tl_major.history()
+        if hist:
+            x0, y0 = 10, 50
+            r = 6
+            gap = 16
+            for i, hcol in enumerate(hist[-12:]):
+                cv2.circle(img, (x0 + i*gap, y0), r, _color_bgr(hcol), -1)
+            _put_text(img, "信号历史", (x0, y0+20), color=(255,255,255), scale=0.5, thick=1)
+
+    # ----- 主循环 -----
+    def process_frame(self, bgr: np.ndarray) -> OrchestratorResult:
+        now = time.time()
+        
+        # 【修改】IDLE状态默认进入CHAT模式，而不是自动开始导航
+        if self.state == IDLE:
+            self.state = CHAT
+            self.cooldown_until = now + self.COOLDOWN_SEC
+        
+        # 【新增】CHAT模式：只返回原始画面，不进行导航
+        if self.state == CHAT:
+            return OrchestratorResult(
+                annotated_image=bgr,
+                guidance_text="",
+                state="CHAT",
+                extras={"mode": "对话模式"}
+            )
+        
+        # 【新增】红绿灯检测模式：只返回原始画面，由红绿灯模块处理
+        if self.state == TRAFFIC_LIGHT_DETECTION:
+            return OrchestratorResult(
+                annotated_image=bgr,
+                guidance_text="",
+                state="TRAFFIC_LIGHT_DETECTION",
+                extras={"mode": "红绿灯检测模式"}
+            )
+        
+        # 【新增】找物品模式：只返回原始画面，由yolomedia处理
+        if self.state == ITEM_SEARCH:
+            return OrchestratorResult(
+                annotated_image=bgr,
+                guidance_text="",
+                state="ITEM_SEARCH",
+                extras={"mode": "找物品模式", "prev_nav_state": self.prev_nav_state_before_search}
+            )
+
+        # 冷却期内允许继续输出画面，但避免"瞬时切换"
+        in_cooldown = now < self.cooldown_until
+
+        # 各状态处理
+        if self.state in (BLINDPATH_NAV, SEEKING_CROSSWALK, SEEKING_NEXT_BLINDPATH, RECOVERY):
+            # —— 盲道侧 —— 统一调用盲道导航器
+            try:
+                bres: BlindResult = self.blind.process_frame(bgr)
+            except Exception as e:
+                # 异常 → 进入恢复态
+                self.state = RECOVERY
+                self.cnt_lost += 5
+                ann_err = bgr.copy()
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann_err, "NAV ERROR", (10, 28), fg="white", bg="red")
+                # _put_text(ann_err, str(e), (10, 56), color=(255,255,255), scale=0.55)
+                return OrchestratorResult(ann_err, self._say(now, ""), self.state, {"error": str(e)})
+
+            ann = bres.annotated_image if bres.annotated_image is not None else bgr.copy()
+            say = bres.guidance_text or ""
+
+            state_info = bres.state_info or {}
+            cross_stage = state_info.get("crosswalk_stage", "not_detected")
+            blind_state = state_info.get("state", "UNKNOWN")
+            # 可选字段（若工作流未来补充）
+            angle = float(state_info.get("last_angle", 0.0))
+            center_x_ratio = float(state_info.get("last_center_x_ratio", 0.5))
+
+            # —— 盲道 → 发现斑马线（approaching/ready）
+            if self.state == BLINDPATH_NAV:
+                if cross_stage in ("approaching", "ready"):
+                    self.cnt_crosswalk_seen += 1
+                else:
+                    self.cnt_crosswalk_seen = max(0, self.cnt_crosswalk_seen - 1)
+
+                if self.cnt_crosswalk_seen >= self.FRAMES_CROSS_SEEN and not in_cooldown:
+                    self.state = SEEKING_CROSSWALK
+                    self.cooldown_until = now + self.COOLDOWN_SEC
+                    say = "正在接近斑马线，为您对准方向。"
+
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="blue")
+                # _draw_state_panel(ann, {
+                #     "盲道状态": blind_state,
+                #     "斑马线阶段": cross_stage,
+                #     "靠近计数": self.cnt_crosswalk_seen,
+                # }, pos=(10, 60))
+                # _draw_progress_bar(ann, max(0.0, min(1.0, self.cnt_crosswalk_seen / max(1, self.FRAMES_CROSS_SEEN))), pos=(10, 120), size=(180, 10), color="cyan")
+                # _draw_frame_border(ann, color=_color_bgr("blue"), thickness=3)
+
+            # —— 对准阶段：同时利用 blind 内部 crosswalk_tracker 的角度与偏移（若提供）
+            elif self.state == SEEKING_CROSSWALK:
+                aligned = (abs(angle) <= self.ANGLE_ALIGN_THR_DEG and abs(center_x_ratio - 0.5) <= self.OFFSET_ALIGN_THR)
+                if cross_stage == "ready" and aligned:
+                    self.cnt_align_ready += 1
+                else:
+                    self.cnt_align_ready = max(0, self.cnt_align_ready - 1)
+
+                if self.cnt_align_ready >= self.FRAMES_ALIGN_READY and not in_cooldown:
+                    self.state = WAIT_TRAFFIC_LIGHT
+                    self.cooldown_until = now + self.COOLDOWN_SEC
+                    say = "已到达斑马线，请等待红绿灯。"
+
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="orange")
+                # panel = {
+                #     "阶段": cross_stage,
+                #     "对准计数": self.cnt_align_ready,
+                # }
+                # if "last_angle" in state_info:
+                #     panel["角度(°)"] = f"{angle:.1f}"
+                # if "last_center_x_ratio" in state_info:
+                #     panel["偏移"] = f"{(center_x_ratio-0.5):+.2f}"
+                # _draw_state_panel(ann, panel, pos=(10, 60))
+                # _draw_progress_bar(ann, max(0.0, min(1.0, self.cnt_align_ready / max(1, self.FRAMES_ALIGN_READY))), pos=(10, 120), size=(220, 10), color="yellow")
+                # _draw_frame_border(ann, color=_color_bgr("orange"), thickness=3)
+
+            # —— 过马路后寻找下一段盲道（上盲道流程）
+            elif self.state == SEEKING_NEXT_BLINDPATH:
+                if blind_state == "NAVIGATING":
+                    self.cnt_cross_end += 1
+                else:
+                    self.cnt_cross_end = max(0, self.cnt_cross_end - 1)
+                if self.cnt_cross_end >= self.FRAMES_NEXT_BLIND_OK and not in_cooldown:
+                    self.state = BLINDPATH_NAV
+                    self.cooldown_until = now + self.COOLDOWN_SEC
+                    say = "方向正确，请继续前进。"
+
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="green")
+                # _draw_state_panel(ann, {
+                #     "盲道状态": blind_state,
+                #     "回归计数": self.cnt_cross_end
+                # }, pos=(10, 60))
+                # _draw_progress_bar(ann, max(0.0, min(1.0, self.cnt_cross_end / max(1, self.FRAMES_NEXT_BLIND_OK))), pos=(10, 120), size=(200, 10), color="green")
+                # _draw_frame_border(ann, color=_color_bgr("green"), thickness=3)
+
+            # —— 恢复态：一旦盲道恢复可用则回盲道
+            elif self.state == RECOVERY:
+                if blind_state in ("ONBOARDING", "NAVIGATING"):
+                    self.state = BLINDPATH_NAV
+                    self.cooldown_until = now + self.COOLDOWN_SEC
+                    say = ""
+                else:
+                    say = ""
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="red")
+                # _draw_state_panel(ann, {
+                #     "提示": "请缓慢环顾/抬头/降低手机角度",
+                #     "丢失计数": self.cnt_lost
+                # }, pos=(10, 60))
+                # _draw_frame_border(ann, color=_color_bgr("red"), thickness=3)
+
+            # 丢失计数（兜底）
+            if blind_state == "UNKNOWN" and cross_stage == "not_detected":
+                self.cnt_lost += 1
+            else:
+                self.cnt_lost = max(0, self.cnt_lost - 2)
+            if self.cnt_lost >= self.FRAMES_LOST_MAX and self.state != RECOVERY:
+                self.prev_target_state = self.state
+                self.state = RECOVERY
+                self.cooldown_until = now + self.COOLDOWN_SEC
+                say = "环境复杂，进入恢复模式。"
+
+            # 【移除】冷却进度条
+            # if in_cooldown:
+            #     remain = max(0.0, self.cooldown_until - now)
+            #     ratio = 1.0 - min(1.0, remain / self.COOLDOWN_SEC)
+            #     _draw_progress_bar(ann, ratio, pos=(10, 140), size=(160, 8), color="gray")
+
+            return OrchestratorResult(ann, self._say(now, say), self.state, {"source": "blind", "cross_stage": cross_stage, "blind_state": blind_state})
+
+        if self.state == WAIT_TRAFFIC_LIGHT:
+            ann = bgr.copy()
+            # 红绿灯识别（多数表决+冷却）
+            color, meta = self.tld.detect(bgr)
+            self.tl_major.push(color)
+            major = self.tl_major.majority()
+            self.tl_last_color = major
+
+            # 【移除】所有可视化干扰
+            # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="magenta")
+            # self._draw_tl_status(ann, major, meta)
+            # _draw_state_panel(ann, {
+            #     "提示": "请等待绿灯或语音确认"立即通过"",
+            #     "冷却": f"{max(0.0, self.cooldown_until - now):.1f}s"
+            # }, pos=(10, 80))
+            # _draw_frame_border(ann, color=_color_bgr("magenta"), thickness=3)
+
+            say = ""
+            if major == "green" and not in_cooldown:
+                self.state = CROSSING
+                self.cooldown_until = now + self.COOLDOWN_SEC
+                say = "绿灯稳定，开始通行。"
+            else:
+                # 只在刚进入状态或每隔一段时间才播报
+                if not hasattr(self, '_last_wait_light_announce'):
+                    self._last_wait_light_announce = 0
+                if now - self._last_wait_light_announce > 5.0:  # 5秒播报一次
+                    say = "正在等待绿灯…"
+                    self._last_wait_light_announce = now
+
+
+
+            # 【移除】冷却进度
+            # if in_cooldown:
+            #     remain = max(0.0, self.cooldown_until - now)
+            #     ratio = 1.0 - min(1.0, remain / self.COOLDOWN_SEC)
+            #     _draw_progress_bar(ann, ratio, pos=(10, 140), size=(160, 8), color="gray")
+
+            return OrchestratorResult(ann, self._say(now, say), self.state, {"traffic_light": major})
+
+        if self.state == CROSSING:
+            try:
+                cres: CrossResult = self.cross.process_frame(bgr)
+            except Exception as e:
+                # 异常 → 恢复
+                self.state = RECOVERY
+                ann_err = bgr.copy()
+                # 【移除】所有可视化干扰
+                # _draw_badge(ann_err, "CROSS ERROR", (10, 28), fg="white", bg="red")
+                # _put_text(ann_err, str(e), (10, 56), color=(255,255,255), scale=0.55)
+                return OrchestratorResult(ann_err, self._say(now, ""), self.state, {"error": str(e)})
+
+            ann = cres.annotated_image if cres.annotated_image is not None else bgr.copy()
+            say = cres.guidance_text or ""
+
+            # 新增：检查是否检测到盲道
+            blind_path_detected = getattr(cres, 'blind_path_detected', False)
+            blind_path_guidance = getattr(cres, 'blind_path_guidance', "")
+            
+            # 如果检测到盲道且需要引导，优先处理盲道引导
+            if blind_path_detected and blind_path_guidance:
+                # 如果应该切换到盲道导航（盲道很近），直接切换状态
+                if hasattr(cres, "should_switch_to_blindpath") and cres.should_switch_to_blindpath:
+                    if not in_cooldown:
+                        self.state = BLINDPATH_NAV
+                        self.cooldown_until = now + self.COOLDOWN_SEC
+                        say = "已到盲道跟前，切换到盲道导航。"  # 使用现有语音文件
+                        self.cnt_cross_end = 0  # 重置计数器
+                        # 重置盲道导航器状态
+                        if hasattr(self.blind, 'reset'):
+                            self.blind.reset()
+                else:
+                    # 盲道较远，继续过马路但给出盲道引导
+                    # say 已经在 cres.guidance_text 中包含了盲道引导信息
+                    pass
+
+            # 原有的结束条件：连续多帧"寻找斑马线"
+            end_hint = False
+            if "寻找斑马线" in (say or ""):
+                end_hint = True
+            # 注意：不再单纯因为 should_switch_to_blindpath 就结束过马路
+            # if hasattr(cres, "should_switch_to_blindpath") and cres.should_switch_to_blindpath:
+            #     end_hint = True
+
+            self.cnt_cross_end = self.cnt_cross_end + 1 if end_hint else max(0, self.cnt_cross_end - 1)
+
+            if self.cnt_cross_end >= self.FRAMES_CROSS_END and not in_cooldown:
+                self.state = SEEKING_NEXT_BLINDPATH
+                self.cooldown_until = now + self.COOLDOWN_SEC
+                say = "过马路结束，准备上人行道。"
+
+            # 【移除】所有可视化干扰
+            # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="cyan")
+            # _draw_state_panel(ann, {
+            #     "结束计数": self.cnt_cross_end,
+            #     "冷却": f"{max(0.0, self.cooldown_until - now):.1f}s"
+            # }, pos=(10, 60))
+            # _draw_progress_bar(ann, max(0.0, min(1.0, self.cnt_cross_end / max(1, self.FRAMES_CROSS_END))), pos=(10, 120), size=(220, 10), color="cyan")
+            # _draw_frame_border(ann, color=_color_bgr("cyan"), thickness=3)
+            # if in_cooldown:
+            #     remain = max(0.0, self.cooldown_until - now)
+            #     ratio = 1.0 - min(1.0, remain / self.COOLDOWN_SEC)
+            #     _draw_progress_bar(ann, ratio, pos=(10, 140), size=(160, 8), color="gray")
+
+            return OrchestratorResult(ann, self._say(now, say), self.state, {"source": "cross", "end_cnt": self.cnt_cross_end})
+
+        # 兜底
+        ann = bgr.copy()
+        # 【移除】所有可视化干扰
+        # _draw_badge(ann, f"STATE: {self.state}", (10, 28), fg="white", bg="gray")
+        # _draw_frame_border(ann, color=_color_bgr("gray"), thickness=2)
+        return OrchestratorResult(ann, "", self.state, {})
+
+
diff --git a/numba_utils.py b/numba_utils.py
new file mode 100644
index 0000000..f01ef2b
--- /dev/null
+++ b/numba_utils.py
@@ -0,0 +1,185 @@
+# numba_utils.py - Day 20 Numba 多核加速工具
+"""
+使用 Numba JIT 编译加速 numpy 密集操作，绕过 Python GIL 实现真正多核并行
+"""
+
+import numpy as np
+
+try:
+    from numba import jit, prange
+    NUMBA_AVAILABLE = True
+except ImportError:
+    NUMBA_AVAILABLE = False
+    print("[NUMBA] Numba 未安装，使用 numpy 回退实现")
+
+
+if NUMBA_AVAILABLE:
+    @jit(nopython=True, parallel=True, cache=True)
+    def count_mask_pixels_numba(mask: np.ndarray) -> int:
+        """快速计算 mask 中非零像素数量（多核并行）"""
+        count = 0
+        h, w = mask.shape
+        for i in prange(h):
+            for j in range(w):
+                if mask[i, j] > 0:
+                    count += 1
+        return count
+
+    @jit(nopython=True, parallel=True, cache=True)
+    def compute_mask_stats_numba(mask: np.ndarray) -> tuple:
+        """
+        快速计算 mask 的统计信息（多核并行）
+        返回: (area, center_x, center_y, min_x, max_x, min_y, max_y)
+        """
+        h, w = mask.shape
+        count = 0
+        sum_x = 0.0
+        sum_y = 0.0
+        min_x = w
+        max_x = 0
+        min_y = h
+        max_y = 0
+        
+        for i in prange(h):
+            for j in range(w):
+                if mask[i, j] > 0:
+                    count += 1
+                    sum_x += j
+                    sum_y += i
+                    if j < min_x:
+                        min_x = j
+                    if j > max_x:
+                        max_x = j
+                    if i < min_y:
+                        min_y = i
+                    if i > max_y:
+                        max_y = i
+        
+        if count > 0:
+            center_x = sum_x / count
+            center_y = sum_y / count
+        else:
+            center_x = 0.0
+            center_y = 0.0
+        
+        return (count, center_x, center_y, min_x, max_x, min_y, max_y)
+
+    @jit(nopython=True, parallel=True, cache=True)
+    def bitwise_and_count_numba(mask1: np.ndarray, mask2: np.ndarray) -> int:
+        """快速计算两个 mask 的交集像素数量（多核并行）"""
+        h, w = mask1.shape
+        count = 0
+        for i in prange(h):
+            for j in range(w):
+                if mask1[i, j] > 0 and mask2[i, j] > 0:
+                    count += 1
+        return count
+
+    @jit(nopython=True, parallel=True, cache=True)
+    def resize_mask_nearest_numba(mask: np.ndarray, new_h: int, new_w: int) -> np.ndarray:
+        """
+        快速最近邻插值缩放 mask（多核并行）
+        注意：这是简化实现，对于大多数情况足够用
+        """
+        old_h, old_w = mask.shape
+        result = np.zeros((new_h, new_w), dtype=np.uint8)
+        
+        scale_y = old_h / new_h
+        scale_x = old_w / new_w
+        
+        for i in prange(new_h):
+            for j in range(new_w):
+                src_y = int(i * scale_y)
+                src_x = int(j * scale_x)
+                if src_y >= old_h:
+                    src_y = old_h - 1
+                if src_x >= old_w:
+                    src_x = old_w - 1
+                result[i, j] = mask[src_y, src_x]
+        
+        return result
+
+
+# 对外接口：根据 Numba 是否可用选择实现
+def count_mask_pixels(mask: np.ndarray) -> int:
+    """计算 mask 中非零像素数量"""
+    if NUMBA_AVAILABLE:
+        return count_mask_pixels_numba(mask)
+    else:
+        return int(np.sum(mask > 0))
+
+
+def compute_mask_stats(mask: np.ndarray) -> dict:
+    """
+    计算 mask 的统计信息
+    返回: {'area': int, 'center_x': float, 'center_y': float, 'bbox': (x1, y1, x2, y2)}
+    """
+    if NUMBA_AVAILABLE:
+        area, cx, cy, min_x, max_x, min_y, max_y = compute_mask_stats_numba(mask)
+        return {
+            'area': int(area),
+            'center_x': float(cx),
+            'center_y': float(cy),
+            'bbox': (int(min_x), int(min_y), int(max_x), int(max_y))
+        }
+    else:
+        # numpy 回退
+        y_coords, x_coords = np.where(mask > 0)
+        if len(y_coords) == 0:
+            return {'area': 0, 'center_x': 0, 'center_y': 0, 'bbox': (0, 0, 0, 0)}
+        return {
+            'area': len(y_coords),
+            'center_x': float(np.mean(x_coords)),
+            'center_y': float(np.mean(y_coords)),
+            'bbox': (int(np.min(x_coords)), int(np.min(y_coords)), 
+                     int(np.max(x_coords)), int(np.max(y_coords)))
+        }
+
+
+def bitwise_and_count(mask1: np.ndarray, mask2: np.ndarray) -> int:
+    """计算两个 mask 的交集像素数量"""
+    if NUMBA_AVAILABLE:
+        return bitwise_and_count_numba(mask1.astype(np.uint8), mask2.astype(np.uint8))
+    else:
+        return int(np.sum(np.bitwise_and(mask1, mask2) > 0))
+
+
+# 预热 JIT 编译（首次调用时编译，之后使用缓存）
+def warmup():
+    """预热 Numba JIT 编译，避免首次调用时的延迟"""
+    if NUMBA_AVAILABLE:
+        dummy = np.zeros((10, 10), dtype=np.uint8)
+        dummy[5, 5] = 255
+        count_mask_pixels_numba(dummy)
+        compute_mask_stats_numba(dummy)
+        bitwise_and_count_numba(dummy, dummy)
+        print("[NUMBA] JIT 编译预热完成，已启用多核加速")
+
+
+if __name__ == "__main__":
+    # 测试和性能对比
+    import time
+    
+    # 创建测试数据
+    test_mask = np.zeros((480, 640), dtype=np.uint8)
+    test_mask[100:300, 200:400] = 255
+    
+    # 测试 numpy 版本
+    start = time.perf_counter()
+    for _ in range(100):
+        np.sum(test_mask > 0)
+    numpy_time = (time.perf_counter() - start) * 1000
+    
+    # 测试 numba 版本
+    if NUMBA_AVAILABLE:
+        # 预热
+        count_mask_pixels_numba(test_mask)
+        
+        start = time.perf_counter()
+        for _ in range(100):
+            count_mask_pixels_numba(test_mask)
+        numba_time = (time.perf_counter() - start) * 1000
+        
+        print(f"numpy: {numpy_time:.2f}ms / 100 次")
+        print(f"numba: {numba_time:.2f}ms / 100 次")
+        print(f"加速比: {numpy_time / numba_time:.1f}x")
diff --git a/obstacle_detector_client.py b/obstacle_detector_client.py
new file mode 100644
index 0000000..487fddd
--- /dev/null
+++ b/obstacle_detector_client.py
@@ -0,0 +1,244 @@
+# app/cloud/obstacle_detector_client.py (新文件)
+import logging
+import os
+import cv2
+import numpy as np
+import torch
+from threading import Semaphore
+from contextlib import contextmanager
+from ultralytics import YOLOE
+from typing import List, Dict, Any
+
+# Day 20: Numba 多核加速
+try:
+    from numba_utils import count_mask_pixels, compute_mask_stats, bitwise_and_count, warmup as numba_warmup
+    NUMBA_ENABLED = True
+except ImportError:
+    NUMBA_ENABLED = False
+
+logger = logging.getLogger(__name__)
+
+# --- GPU/CPU & AMP 配置 (从 blindpath 工作流迁移而来，保持一致) ---
+DEVICE = os.getenv("AIGLASS_DEVICE", "cuda:0")
+if DEVICE.startswith("cuda") and not torch.cuda.is_available():
+    logger.warning(f"AIGLASS_DEVICE={DEVICE} 但未检测到 CUDA，将回退到 CPU")
+    DEVICE = "cpu"
+IS_CUDA = DEVICE.startswith("cuda")
+
+AMP_POLICY = os.getenv("AIGLASS_AMP", "fp16").lower()
+if AMP_POLICY not in ("bf16", "fp16", "off"):
+    AMP_POLICY = "fp16"
+AMP_DTYPE = torch.bfloat16 if AMP_POLICY == "bf16" else (torch.float16 if AMP_POLICY == "fp16" else None)
+
+# --- GPU 并发限流 (从 blindpath 工作流迁移而来，保持一致) ---
+# Day 20: 增加默认槽位从 2 到 4，RTX 3090 可以处理更多并发
+GPU_SLOTS = int(os.getenv("AIGLASS_GPU_SLOTS", "4"))
+_gpu_slots = Semaphore(GPU_SLOTS)
+
+try:
+    torch.backends.cudnn.benchmark = True
+except Exception:
+    pass
+
+
+@contextmanager
+def gpu_infer_slot():
+    """统一管理 GPU 并发限流 + inference_mode + AMP autocast"""
+    with _gpu_slots:
+        if IS_CUDA and AMP_POLICY != "off":
+            # 新式接口：torch.amp.autocast(device_type='cuda', dtype=...)
+            with torch.inference_mode(), torch.amp.autocast(device_type='cuda', dtype=AMP_DTYPE):
+                yield
+        else:
+            with torch.inference_mode():
+                yield
+
+
+class ObstacleDetectorClient:
+    def __init__(self, model_path: str = 'model/yoloe-11l-seg.pt'):
+        self.model = None
+        self.whitelist_embeddings = None
+        self.WHITELIST_CLASSES = [
+            'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'animal', 'scooter', 'stroller', 'dog',
+            'pole', 'post', 'column', 'pillar', 'stanchion', 'bollard', 'utility pole',
+            'telegraph pole', 'light pole', 'street pole', 'signpost', 'support post',
+            'vertical post', 'bench', 'chair', 'potted plant', 'hydrant', 'cone', 'stone', 'box'
+        ]
+        # COCO 类别白名单 - TensorRT 模式下用于后处理过滤
+        # 从 COCO 80 类中筛选出可能构成障碍物的类别
+        self.COCO_WHITELIST = {
+            'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck',  # 交通
+            'dog', 'cat', 'horse', 'cow', 'sheep',  # 动物
+            'bench', 'chair', 'potted plant', 'fire hydrant', 'stop sign',  # 街道设施
+            'parking meter', 'suitcase', 'backpack', 'umbrella', 'handbag',  # 物品
+            'sports ball', 'skateboard', 'surfboard', 'bottle', 'cup',  # 可能障碍
+        }
+        try:
+            # Day 20: 优先使用 TensorRT 引擎
+            try:
+                from model_utils import get_best_model_path, is_tensorrt_engine
+                model_path = get_best_model_path(model_path)
+            except ImportError:
+                def is_tensorrt_engine(p): return p.endswith('.engine')
+            
+            logger.info(f"正在加载 YOLOE 障碍物模型: {model_path}")
+            self.model = YOLOE(model_path)
+            
+            # Day 20: TensorRT 引擎不需要 .to() 和 .fuse()
+            if is_tensorrt_engine(model_path):
+                logger.info(f"TensorRT 引擎已加载，跳过 .to() 和 .fuse()")
+                # TensorRT 引擎不支持 get_text_pe，跳过白名单特征计算
+                self.whitelist_embeddings = None
+                logger.info("TensorRT 模式：跳过白名单特征预计算")
+            else:
+                self.model.to(DEVICE)
+                self.model.fuse()
+                logger.info(f"YOLOE 障碍物模型加载成功，使用设备: {DEVICE}")
+
+                logger.info("正在为 YOLOE 预计算白名单文本特征...")
+                if IS_CUDA and AMP_DTYPE is not None:
+                    with torch.inference_mode(), torch.amp.autocast(device_type='cuda', dtype=AMP_DTYPE):
+                        self.whitelist_embeddings = self.model.get_text_pe(self.WHITELIST_CLASSES)
+                else:
+                    self.whitelist_embeddings = self.model.get_text_pe(self.WHITELIST_CLASSES)
+                logger.info("YOLOE 特征预计算完成。")
+        except Exception as e:
+            logger.error(f"YOLOE 模型加载或特征计算失败: {e}", exc_info=True)
+            raise
+    def tensor_to_numpy_mask(mask_tensor):
+        """安全地将各种类型的张量转换为 numpy 掩码"""
+        # 处理不同的数据类型
+        if mask_tensor.dtype in (torch.bfloat16, torch.float16):
+            mask_tensor = mask_tensor.float()
+        
+        # 转换为 numpy
+        mask = mask_tensor.cpu().numpy()
+        
+        # 确保是二值掩码
+        if mask.max() <= 1.0:
+            mask = (mask > 0.5).astype(np.uint8) * 255
+        else:
+            mask = mask.astype(np.uint8)
+        
+        return mask 
+    def detect(self, image: np.ndarray, path_mask: np.ndarray = None) -> List[Dict[str, Any]]:
+        """
+        利用白名单作为提示词寻找障碍物。
+        如果提供了 path_mask，则执行与路径相关的空间过滤。
+        如果 path_mask 为 None，则进行全局检测。
+        """
+        if self.model is None:
+            return []
+
+        H, W = image.shape[:2]
+        
+        # TensorRT 模式下没有 embeddings，跳过 set_classes
+        # 此时模型会使用默认的 COCO 类别进行检测
+        if self.whitelist_embeddings is not None:
+            try:
+                self.model.set_classes(self.WHITELIST_CLASSES, self.whitelist_embeddings)
+            except Exception as e:
+                logger.error(f"设置 YOLOE 提示词失败: {e}")
+                return []
+
+        conf_thr = float(os.getenv("AIGLASS_OBS_CONF", "0.25"))
+        # Day 22 优化: 动态输入尺寸和FP16加速
+        imgsz = int(os.getenv("AIGLASS_OBS_IMGSZ", "480"))  # 从默认640降低
+        use_half = os.getenv("AIGLASS_OBS_HALF", "1") == "1"
+        
+        with gpu_infer_slot():
+            results = self.model.predict(
+                image, 
+                verbose=False, 
+                conf=conf_thr,
+                imgsz=imgsz,      # 使用较小的输入尺寸
+                half=use_half     # FP16 半精度加速
+            )
+
+        if not (results and results[0].masks):
+            return []
+
+        # --- 过滤与后处理 (逻辑与 blindpath 工作流保持一致) ---
+        final_obstacles = []
+        num_masks = len(results[0].masks.data)
+        num_boxes = len(results[0].boxes.cls) if getattr(results[0].boxes, "cls", None) is not None else 0
+
+        for i, mask_tensor in enumerate(results[0].masks.data):
+            if i >= num_boxes: continue
+
+            # 【修复】处理 BFloat16 类型的掩码
+            # 先转换为 float32，避免 numpy 不支持 BFloat16 的问题
+            if mask_tensor.dtype == torch.bfloat16:
+                mask_tensor = mask_tensor.float()
+            
+            # 转换为 numpy 数组
+            mask = mask_tensor.cpu().numpy()
+            
+            # 处理概率掩码（值在0-1之间）或二值掩码
+            if mask.max() <= 1.0:
+                # 概率掩码，需要二值化
+                mask = (mask > 0.5).astype(np.uint8) * 255
+            else:
+                # 已经是二值掩码
+                mask = mask.astype(np.uint8)
+            
+            mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_NEAREST)
+            
+            # Day 20: 使用 Numba 多核加速计算 mask 统计信息
+            if NUMBA_ENABLED:
+                stats = compute_mask_stats(mask)
+                area = stats['area']
+                center_x = stats['center_x']
+                center_y = stats['center_y']
+                min_y, max_y = stats['bbox'][1], stats['bbox'][3]
+            else:
+                area = int(np.sum(mask > 0))
+                y_coords, x_coords = np.where(mask > 0)
+                if len(y_coords) == 0:
+                    continue
+                center_x = float(np.mean(x_coords))
+                center_y = float(np.mean(y_coords))
+                min_y, max_y = int(np.min(y_coords)), int(np.max(y_coords))
+
+            # 尺寸过滤：太大的物体（如整片地面）通常是误识别
+            if (area / (H * W)) > 0.7: continue
+            if area == 0: continue
+
+            # 空间过滤：如果提供了 path_mask，则只保留路径上的障碍物
+            if path_mask is not None:
+                # Day 20: 使用 Numba 加速交集计算
+                if NUMBA_ENABLED:
+                    intersection_area = bitwise_and_count(mask, path_mask)
+                else:
+                    intersection_area = int(np.sum(cv2.bitwise_and(mask, path_mask) > 0))
+                # 必须与路径有足够的重叠
+                if intersection_area < 100 or (intersection_area / area) < 0.01:
+                    continue
+
+            cls_id = int(results[0].boxes.cls[i])
+            class_names_map = results[0].names
+            class_name = "Unknown"
+            if isinstance(class_names_map, dict):
+                # 如果是字典，使用 .get() 方法
+                class_name = class_names_map.get(cls_id, "Unknown")
+            elif isinstance(class_names_map, list) and 0 <= cls_id < len(class_names_map):
+                # 如果是列表，通过索引安全地获取
+                class_name = class_names_map[cls_id]
+            
+            # TensorRT 模式下使用 COCO 白名单过滤
+            # 只保留可能构成障碍物的类别
+            if self.whitelist_embeddings is None:  # TensorRT 模式
+                if class_name.lower().strip() not in self.COCO_WHITELIST:
+                    continue  # 跳过非白名单类别
+
+            final_obstacles.append({
+                'name': class_name.strip(),
+                'mask': mask,
+                'area': area,
+                'area_ratio': area / (H * W),
+                'center_x': center_x,
+                'center_y': center_y,
+                'bottom_y_ratio': max_y / H
+            })
+
+        return final_obstacles
\ No newline at end of file
diff --git a/omni_client.py b/omni_client.py
new file mode 100644
index 0000000..0dd1f0c
--- /dev/null
+++ b/omni_client.py
@@ -0,0 +1,117 @@
+# omni_client.py
+# -*- coding: utf-8 -*-
+import os, base64, asyncio, threading
+from typing import AsyncGenerator, Dict, Any, List, Optional, Tuple
+
+from openai import OpenAI
+
+# ===== OpenAI 兼容（达摩院 DashScope 兼容模式）=====
+API_KEY = os.getenv("DASHSCOPE_API_KEY", "sk-a9440db694924559ae4ebdc2023d2b9a")
+if not API_KEY:
+    raise RuntimeError("未设置 DASHSCOPE_API_KEY")
+
+QWEN_MODEL = "qwen-omni-turbo"
+
+# 兼容模式
+oai_client = OpenAI(
+    api_key=API_KEY,
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+)
+
+class OmniStreamPiece:
+    """对外的统一增量数据：text/audio 二选一或同时。"""
+    def __init__(self, text_delta: Optional[str] = None, audio_b64: Optional[str] = None):
+        self.text_delta = text_delta
+        self.audio_b64  = audio_b64
+
+async def stream_chat(
+    content_list: List[Dict[str, Any]],
+    voice: str = "Cherry",
+    audio_format: str = "wav",
+) -> AsyncGenerator[OmniStreamPiece, None]:
+    """
+    发起一轮 Omni-Turbo ChatCompletions 流式对话：
+    - content_list: OpenAI chat 的 content，多模态（image_url/text）
+    - 以 stream=True 返回
+    - 增量产出：OmniStreamPiece(text_delta=?, audio_b64=?)
+    
+    Day 13 修复：使用队列+线程解耦同步 API 调用，避免阻塞事件循环
+    """
+    # 使用 asyncio.Queue 在线程和异步之间传递数据
+    queue: asyncio.Queue = asyncio.Queue()
+    loop = asyncio.get_running_loop()
+    
+    def _sync_stream():
+        """在独立线程中运行同步 API 调用"""
+        try:
+            # Day 21 优化：添加 system prompt 让 AI 回答简洁
+            # 导盲眼镜场景需要快速、简短的回答
+            system_prompt = """你是一个视障辅助AI助手，安装在智能导盲眼镜上。
+请用极简短的语言回答，每次回答不超过2-3句话。
+避免冗长解释，只提供最关键的信息。
+语气友好但简洁。"""
+            
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": content_list}
+            ]
+            
+            completion = oai_client.chat.completions.create(
+                model=QWEN_MODEL,
+                messages=messages,
+                modalities=["text", "audio"],
+                audio={"voice": voice, "format": audio_format},
+                stream=True,
+                stream_options={"include_usage": True},
+            )
+            
+            for chunk in completion:
+                text_delta: Optional[str] = None
+                audio_b64: Optional[str] = None
+
+                if getattr(chunk, "choices", None):
+                    c0 = chunk.choices[0]
+                    delta = getattr(c0, "delta", None)
+                    # 文本增量
+                    if delta and getattr(delta, "content", None):
+                        piece = delta.content
+                        if piece:
+                            text_delta = piece
+                    # 音频分片
+                    if delta and getattr(delta, "audio", None):
+                        aud = delta.audio
+                        audio_b64 = aud.get("data") if isinstance(aud, dict) else getattr(aud, "data", None)
+                    if audio_b64 is None:
+                        msg = getattr(c0, "message", None)
+                        if msg and getattr(msg, "audio", None):
+                            ma = msg.audio
+                            audio_b64 = ma.get("data") if isinstance(ma, dict) else getattr(ma, "data", None)
+
+                if (text_delta is not None) or (audio_b64 is not None):
+                    # 线程安全地放入队列
+                    loop.call_soon_threadsafe(
+                        queue.put_nowait,
+                        OmniStreamPiece(text_delta=text_delta, audio_b64=audio_b64)
+                    )
+        except Exception as e:
+            # 发生异常时也要通知
+            loop.call_soon_threadsafe(queue.put_nowait, e)
+        finally:
+            # 发送结束标记
+            loop.call_soon_threadsafe(queue.put_nowait, None)
+    
+    # 在独立线程中启动同步 API 调用
+    thread = threading.Thread(target=_sync_stream, daemon=True)
+    thread.start()
+    
+    # 异步消费队列
+    while True:
+        item = await queue.get()
+        if item is None:
+            # 流结束
+            break
+        if isinstance(item, Exception):
+            # 发生异常
+            raise item
+        yield item
+
diff --git a/qwen_extractor.py b/qwen_extractor.py
new file mode 100644
index 0000000..4389913
--- /dev/null
+++ b/qwen_extractor.py
@@ -0,0 +1,64 @@
+# qwen_extractor.py
+# -*- coding: utf-8 -*-
+from typing import List, Tuple
+import os
+from openai import OpenAI
+
+# —— 本地优先映射（可随时扩充/改名）——
+LOCAL_CN2EN = {
+    "红牛": "Red_Bull",
+    "ad钙奶": "AD_milk",
+    "ad 钙奶": "AD_milk",
+    "ad": "AD_milk",
+    "钙奶": "AD_milk",
+    "矿泉水": "bottle",
+    "水瓶": "bottle",
+    "可乐": "coke",
+    "雪碧": "sprite",
+}
+
+def _make_client() -> OpenAI:
+    # 复用你百炼兼容端点；支持从环境变量读取
+    base_url = os.getenv("DASHSCOPE_COMPAT_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+    api_key  = "sk-a9440db694924559ae4ebdc2023d2b9a"
+    return OpenAI(api_key=api_key, base_url=base_url)
+
+PROMPT_SYS = (
+    "You are a label normalizer. Convert the given Chinese object "
+    "description into a short, lowercase English YOLO/vision class name "
+    "(1~3 words). If multiple are given, return the single most likely one. "
+    "Output ONLY the label, no punctuation."
+)
+
+def extract_english_label(query_cn: str) -> Tuple[str, str]:
+    """
+    返回 (label_en, source)；source ∈ {'local', 'qwen', 'fallback'}
+    """
+    q = (query_cn or "").strip().lower()
+    if q in LOCAL_CN2EN:
+        return LOCAL_CN2EN[q], "local"
+
+    # 简单规则：去掉前缀修饰词
+    for k, v in LOCAL_CN2EN.items():
+        if k in q:
+            return v, "local"
+
+    # 调用 Qwen Turbo（兼容 Chat Completions）
+    try:
+        client = _make_client()
+        msgs = [
+            {"role": "system", "content": PROMPT_SYS},
+            {"role": "user",   "content": query_cn.strip()},
+        ]
+        rsp = client.chat.completions.create(
+            model=os.getenv("QWEN_MODEL", "qwen-turbo"),
+            messages=msgs,
+            stream=False
+        )
+        label = (rsp.choices[0].message.content or "").strip()
+        # 清洗一下
+        label = label.replace(".", "").replace(",", "").replace("  ", " ").strip()
+        # 兜底：空就回 'bottle'
+        return (label or "bottle"), "qwen"
+    except Exception:
+        return "bottle", "fallback"
diff --git a/qwenturbo_template.py b/qwenturbo_template.py
new file mode 100644
index 0000000..ca62b15
--- /dev/null
+++ b/qwenturbo_template.py
@@ -0,0 +1,28 @@
+from openai import OpenAI
+import os
+
+client = OpenAI(
+    # 如果没有配置环境变量，请用阿里云百炼API Key替换：api_key="sk-xxx"
+    api_key="sk-a9440db694924559ae4ebdc2023d2b9a",
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+)
+
+messages = [{"role": "user", "content": "你是谁"}]
+completion = client.chat.completions.create(
+    model="qwen-turbo",  # 您可以按需更换为其它深度思考模型
+    messages=messages,
+    extra_body={"enable_thinking": True},
+    stream=True
+)
+is_answering = False  # 是否进入回复阶段
+print("\n" + "=" * 20 + "思考过程" + "=" * 20)
+for chunk in completion:
+    delta = chunk.choices[0].delta
+    if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
+        if not is_answering:
+            print(delta.reasoning_content, end="", flush=True)
+    if hasattr(delta, "content") and delta.content:
+        if not is_answering:
+            print("\n" + "=" * 20 + "完整回复" + "=" * 20)
+            is_answering = True
+        print(delta.content, end="", flush=True)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..263f1de
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,71 @@
+# AI Glass System - Python Dependencies
+# Python 3.9 - 3.11 supported
+
+# Core Web Framework
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+websockets==12.0
+python-multipart==0.0.6
+starlette==0.27.0
+
+# Computer Vision & Deep Learning
+opencv-python==4.8.1.78
+numpy==1.24.3
+Pillow==10.1.0
+ultralytics==8.3.200
+torch==2.0.1
+torchvision==0.15.2
+
+# MediaPipe (Hand Detection)
+mediapipe==0.10.8
+
+# Audio Processing
+pyaudio==0.2.14
+pydub==0.25.1
+pygame==2.5.2
+
+# Aliyun DashScope SDK (ASR & Qwen-Omni) - 旧管道
+dashscope==1.14.1
+openai==1.3.5  # For DashScope compatibility mode
+
+# Day 21: 新 AI 管道 (SenseVoice + GLM + EdgeTTS)
+# torchaudio 需与 torch 版本匹配，使用以下命令安装：
+# pip install torchaudio==2.0.2+cu118 --index-url https://download.pytorch.org/whl/cu118
+funasr>=1.2.0  # SenseVoice 本地 ASR
+edge-tts>=6.1.0  # 免费 TTS
+
+# Environment & Configuration
+python-dotenv==1.0.0
+
+# Utilities
+opencv-contrib-python==4.8.1.78  # Extended OpenCV modules
+
+# Optional Performance Optimizations
+# Uncomment if needed:
+# onnxruntime-gpu==1.16.3  # For ONNX model acceleration
+# tensorrt==8.6.1  # For TensorRT optimization
+PyTurboJPEG>=1.7.0  # Day 19: 2-3x faster JPEG encode/decode than cv2
+# NOTE: PyTurboJPEG requires system library: sudo apt-get install libturbojpeg (Ubuntu)
+numba>=0.58.0  # Day 20: JIT compilation for multi-core parallel numpy operations
+
+# Development & Testing (Optional)
+# pytest==7.4.3
+# pytest-asyncio==0.21.1
+# black==23.11.0
+# flake8==6.1.0
+
+# Platform-specific dependencies
+# Windows:
+# - PyAudio requires separate installation: https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio
+# Linux:
+# - Install PyAudio dependencies: sudo apt-get install portaudio19-dev python3-pyaudio
+# - Install OpenCV dependencies: sudo apt-get install libgl1-mesa-glx
+# macOS:
+# - Install PyAudio: brew install portaudio && pip install pyaudio
+
+# CUDA Dependencies (GPU acceleration)
+# - CUDA Toolkit 11.8+: https://developer.nvidia.com/cuda-downloads
+# - cuDNN 8.6+: https://developer.nvidia.com/cudnn
+# PyTorch with CUDA support:
+# pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118
+
diff --git a/sensevoice_asr.py b/sensevoice_asr.py
new file mode 100644
index 0000000..14971b7
--- /dev/null
+++ b/sensevoice_asr.py
@@ -0,0 +1,168 @@
+# sensevoice_asr.py
+# -*- coding: utf-8 -*-
+"""
+SenseVoice 本地 ASR 模块 - Day 21
+参考 xiaozhi-esp32-server 的非流式实现
+
+特点：
+- 非流式识别（等语音说完再识别）
+- 内置 VAD 自动切分
+- 整句输出，不会"蹦字"
+"""
+
+import os
+import time
+import asyncio
+import numpy as np
+from typing import Optional, Tuple
+from funasr import AutoModel
+from funasr.utils.postprocess_utils import rich_transcription_postprocess
+
+# 模型路径 - 支持环境变量配置和相对路径
+# 【重要】FunASR 需要目录路径（包含 config.yaml 和 model.pt）
+# 本地模型目录（相对于当前文件）
+_LOCAL_MODEL_DIR = os.path.join(os.path.dirname(__file__), "model", "SenseVoiceSmall")
+
+# 自动选择可用路径：环境变量 > 本地目录 > 在线下载
+if os.path.exists(os.getenv("SENSEVOICE_MODEL_PATH", "")):
+    MODEL_PATH = os.getenv("SENSEVOICE_MODEL_PATH")
+elif os.path.isdir(_LOCAL_MODEL_DIR) and os.path.exists(os.path.join(_LOCAL_MODEL_DIR, "model.pt")):
+    MODEL_PATH = _LOCAL_MODEL_DIR
+else:
+    # 使用 FunASR 模型标识符（首次运行会自动下载到 ~/.cache）
+    MODEL_PATH = "iic/SenseVoiceSmall"
+
+# GPU 设备 - 使用与主程序相同的配置方式
+# 注意：服务器通过 CUDA_VISIBLE_DEVICES=1 选择 GPU，程序中统一使用 cuda:0
+import torch
+if torch.cuda.is_available():
+    DEVICE = os.getenv("SENSEVOICE_DEVICE", os.getenv("AIGLASS_DEVICE", "cuda:0"))
+else:
+    DEVICE = "cpu"
+
+# 全局模型实例
+_model: Optional[AutoModel] = None
+_model_lock = asyncio.Lock()
+
+
+def _load_model():
+    """加载 SenseVoice 模型"""
+    global _model
+    if _model is not None:
+        return _model
+    
+    print(f"[SenseVoice] 正在加载模型: {MODEL_PATH}")
+    print(f"[SenseVoice] 使用设备: {DEVICE}")
+    
+    start_time = time.time()
+    
+    _model = AutoModel(
+        model=MODEL_PATH,
+        vad_kwargs={"max_single_segment_time": 30000},  # VAD 最大 30 秒
+        disable_update=True,
+        hub="hf",  # 参考 xiaozhi-esp32-server
+        device=DEVICE,
+    )
+    
+    print(f"[SenseVoice] 模型加载完成，耗时: {time.time() - start_time:.2f}s")
+    return _model
+
+
+async def init_sensevoice():
+    """异步初始化 SenseVoice（服务器启动时调用）"""
+    async with _model_lock:
+        await asyncio.to_thread(_load_model)
+    print("[SenseVoice] 初始化完成")
+
+
+async def recognize(pcm_data: bytes, sample_rate: int = 16000) -> str:
+    """
+    识别 PCM 音频数据
+    
+    Args:
+        pcm_data: PCM 16-bit 音频数据 (bytes)
+        sample_rate: 采样率 (默认 16000)
+    
+    Returns:
+        识别结果文本
+    """
+    if _model is None:
+        await init_sensevoice()
+    
+    if not pcm_data or len(pcm_data) < 640:  # 至少 20ms 音频
+        return ""
+    
+    try:
+        start_time = time.time()
+        
+        # 在线程池中执行推理（避免阻塞事件循环）
+        # 【Day 22 修复】language 从 "auto" 改为 "zh"
+        # 避免误识别为韩语等其他语言
+        result = await asyncio.to_thread(
+            _model.generate,
+            input=pcm_data,
+            cache={},
+            language="zh",  # 固定为中文，避免 auto 误判
+            use_itn=True,
+            batch_size_s=60,
+        )
+        
+        # 后处理
+        if result and len(result) > 0 and "text" in result[0]:
+            text = await asyncio.to_thread(
+                rich_transcription_postprocess, 
+                result[0]["text"]
+            )
+            elapsed = time.time() - start_time
+            print(f"[SenseVoice] 识别耗时: {elapsed:.3f}s | 结果: {text}")
+            return text.strip()
+        else:
+            print("[SenseVoice] 识别结果为空")
+            return ""
+            
+    except Exception as e:
+        print(f"[SenseVoice] 识别失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return ""
+
+
+async def recognize_from_file(file_path: str) -> str:
+    """
+    从文件识别音频
+    
+    Args:
+        file_path: 音频文件路径
+    
+    Returns:
+        识别结果文本
+    """
+    if _model is None:
+        await init_sensevoice()
+    
+    try:
+        start_time = time.time()
+        
+        result = await asyncio.to_thread(
+            _model.generate,
+            input=file_path,
+            cache={},
+            language="zh",  # 【Day 22 修复】固定为中文
+            use_itn=True,
+            batch_size_s=60,
+        )
+        
+        if result and len(result) > 0 and "text" in result[0]:
+            text = await asyncio.to_thread(
+                rich_transcription_postprocess,
+                result[0]["text"]
+            )
+            elapsed = time.time() - start_time
+            print(f"[SenseVoice] 文件识别耗时: {elapsed:.3f}s | 结果: {text}")
+            return text.strip()
+        else:
+            return ""
+            
+    except Exception as e:
+        print(f"[SenseVoice] 文件识别失败: {e}")
+        return ""
diff --git a/server_vad.py b/server_vad.py
new file mode 100644
index 0000000..21ef7f3
--- /dev/null
+++ b/server_vad.py
@@ -0,0 +1,326 @@
+"""
+Silero VAD 服务器端语音活动检测
+参考 xiaozhi-esp32-server 实现
+"""
+import torch
+import numpy as np
+import os
+import collections  # Day 23: For VAD lookback buffer
+import time
+
+# 尝试加载模型
+_vad_model = None
+_model_loaded = False
+
+def get_vad_model():
+    """获取或加载 Silero VAD 模型"""
+    global _vad_model, _model_loaded
+    
+    if _model_loaded:
+        return _vad_model
+    
+    try:
+        # 尝试从本地加载
+        model_dir = os.path.join(os.path.dirname(__file__), "model", "snakers4_silero-vad")
+        if os.path.exists(model_dir):
+            print(f"[VAD] 从本地加载 Silero VAD: {model_dir}")
+            _vad_model, _ = torch.hub.load(
+                repo_or_dir=model_dir,
+                source="local",
+                model="silero_vad",
+                force_reload=False,
+            )
+        else:
+            # 优先使用缓存，避免每次检查 GitHub 更新
+            cache_dir = os.path.expanduser("~/.cache/torch/hub/snakers4_silero-vad_master")
+            if os.path.exists(cache_dir):
+                print(f"[VAD] 使用 torch hub 缓存: {cache_dir}")
+                _vad_model, _ = torch.hub.load(
+                    repo_or_dir=cache_dir,
+                    source="local",
+                    model="silero_vad",
+                    force_reload=False,
+                )
+            else:
+                # 缓存不存在，从网络下载
+                print("[VAD] 从 torch.hub 下载 Silero VAD...")
+                _vad_model, _ = torch.hub.load(
+                    repo_or_dir='snakers4/silero-vad',
+                    model='silero_vad',
+                    force_reload=False,
+                )
+        
+        _model_loaded = True
+        print("[VAD] Silero VAD 模型加载成功")
+        return _vad_model
+    except Exception as e:
+        print(f"[VAD] Silero VAD 加载失败: {e}")
+        _model_loaded = True  # 避免重复尝试加载
+        return None
+
+
+class SileroVAD:
+    """
+    服务器端 Silero VAD
+    用于检测语音开始和结束
+    """
+    
+    def __init__(self, 
+                 threshold: float = 0.5,           # Day 23: 再次降低阈值 (原 0.7)
+                 threshold_low: float = 0.3,       # Day 23: 再次降低低阈值 (原 0.4)
+                 min_silence_ms: int = 800,        # Day 23: 延长静默 (原 600)
+                 min_speech_ms: int = 300,         # Day 23: 降低最小语音 (原 500)
+                 sample_rate: int = 16000):
+        """
+        初始化 VAD
+        
+        Args:
+            threshold: 语音概率阈值（超过此值判断为语音）
+            threshold_low: 语音概率低阈值（低于此值判断为静默）
+            min_silence_ms: 最小静默时间（毫秒），超过此时间认为语音结束
+            min_speech_ms: 最小语音时间（毫秒），至少说这么久才算有效语音
+            sample_rate: 采样率
+        """
+        self.model = get_vad_model()
+        self.threshold = threshold
+        self.threshold_low = threshold_low
+        self.min_silence_ms = min_silence_ms
+        self.min_speech_ms = min_speech_ms
+        self.sample_rate = sample_rate
+        
+        # 状态
+        self.audio_buffer = bytearray()
+        self.is_speaking = False
+        self.last_speech_time = 0
+        self.speech_start_time = 0
+        self.speech_audio = bytearray()  # 存储语音音频
+        
+        # TTS 播放状态 - 播放期间暂停 VAD
+        self.tts_playing = False
+        self.tts_end_time = 0  # TTS 结束时间
+        self.tts_cooldown_ms = 500  # TTS 结束后等待 500ms 再开始检测
+        
+        # 滑动窗口
+        self.voice_window = []
+        self.window_size = 5  # 滑动窗口大小
+        self.frame_threshold = 3  # 至少多少帧语音才算开始说话
+        
+        # Day 23: Pre-speech buffer (Lookback) to fix "cut-off" start of words
+        # 300ms lookback approx. (each chunk is 32ms) -> 10 chunks
+        self.pre_speech_buffer = collections.deque(maxlen=10)
+        
+        print(f"[VAD] 初始化: threshold={threshold}, threshold_low={threshold_low}, "
+              f"min_silence_ms={min_silence_ms}, min_speech_ms={min_speech_ms}")
+    
+    def reset(self):
+        """重置 VAD 状态"""
+        self.audio_buffer.clear()
+        self.speech_audio.clear()
+        self.is_speaking = False
+        self.last_speech_time = 0
+        self.speech_start_time = 0
+        self.voice_window.clear()
+        self.tts_playing = False
+        self.tts_end_time = 0
+        if self.model:
+            self.model.reset_states()
+    
+    def set_tts_playing(self, playing: bool):
+        """设置 TTS 播放状态"""
+        self.tts_playing = playing
+        if not playing:
+            # TTS 结束，记录时间
+            self.tts_end_time = time.time() * 1000
+            print("[VAD] TTS 结束，等待冷却期...")
+        else:
+            print("[VAD] TTS 开始播放，暂停 VAD 检测")
+            # TTS 开始播放时，如果正在录音则中断
+            if self.is_speaking:
+                self.is_speaking = False
+                self.speech_audio.clear()
+                self.voice_window.clear()
+                # Day 23: Clear lookback buffer
+                if hasattr(self, 'pre_speech_buffer'):
+                    self.pre_speech_buffer.clear()
+                print("[VAD] TTS 播放打断语音录制")
+    
+    def process(self, audio_bytes: bytes) -> dict:
+        """
+        处理音频数据
+        
+        Args:
+            audio_bytes: PCM 16-bit 音频数据
+            
+        Returns:
+            dict: {
+                'speech_started': bool,  # 语音刚刚开始
+                'speech_ended': bool,    # 语音刚刚结束
+                'is_speaking': bool,     # 当前是否在说话
+                'speech_audio': bytes,   # 如果语音结束，返回完整语音音频
+            }
+        """
+        result = {
+            'speech_started': False,
+            'speech_ended': False,
+            'is_speaking': self.is_speaking,
+            'speech_audio': None,
+        }
+        
+        if self.model is None:
+            # 没有模型，使用简单能量检测
+            return self._fallback_energy_vad(audio_bytes, result)
+        
+        # TTS 播放期间，跳过 VAD 检测
+        current_time = time.time() * 1000
+        if self.tts_playing:
+            return result
+        
+        # TTS 刚结束，等待冷却期
+        if self.tts_end_time > 0 and (current_time - self.tts_end_time) < self.tts_cooldown_ms:
+            return result
+        
+        # 将音频添加到缓冲区
+        self.audio_buffer.extend(audio_bytes)
+        
+        # Silero VAD 需要 512 采样点 (32ms @ 16kHz)
+        chunk_size = 512 * 2  # 512 samples * 2 bytes
+        
+        while len(self.audio_buffer) >= chunk_size:
+            chunk = self.audio_buffer[:chunk_size]
+            self.audio_buffer = self.audio_buffer[chunk_size:]
+            
+            # 转换为模型需要的格式
+            audio_int16 = np.frombuffer(chunk, dtype=np.int16)
+            audio_float32 = audio_int16.astype(np.float32) / 32768.0
+            audio_tensor = torch.from_numpy(audio_float32)
+            
+            # 检测语音概率
+            with torch.no_grad():
+                speech_prob = self.model(audio_tensor, self.sample_rate).item()
+                # Day 23: Debug logging to diagnose low volume/mic issues
+                if speech_prob > 0.3:
+                    print(f"[VAD DEBUG] Prob: {speech_prob:.3f}")
+            
+            # 双阈值判断
+            if speech_prob >= self.threshold:
+                is_voice = True
+            elif speech_prob <= self.threshold_low:
+                is_voice = False
+            else:
+                is_voice = self.is_speaking  # 保持当前状态
+            
+            # 更新滑动窗口
+            self.voice_window.append(is_voice)
+            if len(self.voice_window) > self.window_size:
+                self.voice_window.pop(0)
+            
+            # 判断是否有语音
+            voice_count = self.voice_window.count(True)
+            has_voice = voice_count >= self.frame_threshold
+            
+            # Maintain lookback buffer (always add current chunk)
+            self.pre_speech_buffer.append(chunk)
+
+            current_time = time.time() * 1000  # 毫秒
+            
+            if has_voice:
+                if not self.is_speaking:
+                    # 语音开始
+                    self.is_speaking = True
+                    self.speech_start_time = current_time
+                    self.speech_audio.clear()
+                    result['speech_started'] = True
+                    result['speech_started'] = True
+                    print("[VAD] 🎤 Speech started")
+
+                    # Day 23: Prepend lookback buffer to recover the start of speech
+                    if self.pre_speech_buffer:
+                        for prev_chunk in self.pre_speech_buffer:
+                            self.speech_audio.extend(prev_chunk)
+                        print(f"[VAD] Recovered {len(self.pre_speech_buffer)} chunks ({len(self.pre_speech_buffer)*32}ms) from history")
+                
+                self.last_speech_time = current_time
+                self.speech_audio.extend(chunk)
+                
+            elif self.is_speaking:
+                # 仍在收集音频（可能是短暂停顿）
+                self.speech_audio.extend(chunk)
+                
+                # 检查是否静默时间过长
+                silence_duration = current_time - self.last_speech_time
+                speech_duration = current_time - self.speech_start_time
+                
+                if silence_duration >= self.min_silence_ms:
+                    # 语音结束
+                    self.is_speaking = False
+                    
+                    # 检查语音是否足够长
+                    if speech_duration >= self.min_speech_ms:
+                        result['speech_ended'] = True
+                        result['speech_audio'] = bytes(self.speech_audio)
+                        print(f"[VAD] 🔇 Speech ended, duration={speech_duration:.0f}ms, "
+                              f"audio_size={len(self.speech_audio)} bytes")
+                    else:
+                        print(f"[VAD] 语音太短 ({speech_duration:.0f}ms), 忽略")
+                    
+                    self.speech_audio.clear()
+        
+        result['is_speaking'] = self.is_speaking
+        return result
+    
+    def _fallback_energy_vad(self, audio_bytes: bytes, result: dict) -> dict:
+        """简单能量检测（作为备用）"""
+        # 计算 RMS 能量
+        audio_int16 = np.frombuffer(audio_bytes, dtype=np.int16)
+        rms = np.sqrt(np.mean(audio_int16.astype(np.float32) ** 2))
+        
+        # 简单阈值
+        threshold = 500
+        is_voice = rms > threshold
+        
+        current_time = time.time() * 1000
+        
+        if is_voice:
+            if not self.is_speaking:
+                self.is_speaking = True
+                self.speech_start_time = current_time
+                self.speech_audio.clear()
+                result['speech_started'] = True
+            
+            self.last_speech_time = current_time
+            self.speech_audio.extend(audio_bytes)
+            
+        elif self.is_speaking:
+            self.speech_audio.extend(audio_bytes)
+            
+            silence_duration = current_time - self.last_speech_time
+            if silence_duration >= self.min_silence_ms:
+                self.is_speaking = False
+                speech_duration = current_time - self.speech_start_time
+                
+                if speech_duration >= self.min_speech_ms:
+                    result['speech_ended'] = True
+                    result['speech_audio'] = bytes(self.speech_audio)
+                
+                self.speech_audio.clear()
+        
+        result['is_speaking'] = self.is_speaking
+        return result
+
+
+# 全局 VAD 实例
+_global_vad = None
+
+def get_server_vad() -> SileroVAD:
+    """获取全局 VAD 实例"""
+    global _global_vad
+    if _global_vad is None:
+        _global_vad = SileroVAD()
+    return _global_vad
+
+
+def reset_server_vad():
+    """重置全局 VAD 状态"""
+    global _global_vad
+    if _global_vad:
+        _global_vad.reset()
diff --git a/setup.bat b/setup.bat
new file mode 100644
index 0000000..1b72cf6
--- /dev/null
+++ b/setup.bat
@@ -0,0 +1,157 @@
+@echo off
+REM AI Glass System - Windows 快速安装脚本
+
+echo ==========================================
+echo   AI Glass System - 自动安装脚本
+echo ==========================================
+echo.
+
+REM 检查 Python
+echo 正在检查 Python...
+python --version >nul 2>&1
+if errorlevel 1 (
+    echo [错误] 未找到 Python
+    echo 请从 https://www.python.org/downloads/ 下载并安装 Python 3.9-3.11
+    pause
+    exit /b 1
+)
+
+python --version
+echo [成功] Python 已安装
+
+REM 检查 CUDA
+echo.
+echo 正在检查 CUDA...
+nvidia-smi >nul 2>&1
+if errorlevel 1 (
+    echo [警告] 未检测到 NVIDIA GPU，将使用 CPU 模式（速度较慢）
+    set HAS_GPU=0
+) else (
+    echo [成功] 检测到 NVIDIA GPU
+    nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
+    set HAS_GPU=1
+)
+
+REM 创建虚拟环境
+echo.
+echo 正在创建虚拟环境...
+if exist venv (
+    echo [警告] 虚拟环境已存在
+    set /p RECREATE="是否删除并重新创建? (y/n): "
+    if /i "%RECREATE%"=="y" (
+        rmdir /s /q venv
+        python -m venv venv
+        echo [成功] 虚拟环境已重新创建
+    )
+) else (
+    python -m venv venv
+    echo [成功] 虚拟环境已创建
+)
+
+REM 激活虚拟环境
+echo.
+echo 正在激活虚拟环境...
+call venv\Scripts\activate.bat
+
+REM 升级 pip
+echo.
+echo 正在升级 pip...
+python -m pip install --upgrade pip -q
+echo [成功] pip 已升级
+
+REM 安装 PyTorch
+echo.
+echo 正在安装 PyTorch...
+if %HAS_GPU%==1 (
+    echo 安装 GPU 版本 PyTorch ^(CUDA 11.8^)...
+    pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 -q
+) else (
+    echo 安装 CPU 版本 PyTorch...
+    pip install torch torchvision -q
+)
+echo [成功] PyTorch 已安装
+
+REM 验证 PyTorch
+echo.
+echo 验证 PyTorch 安装...
+python -c "import torch; print(f'PyTorch 版本: {torch.__version__}'); print(f'CUDA 可用: {torch.cuda.is_available()}')"
+
+REM 安装 PyAudio
+echo.
+echo 正在安装 PyAudio...
+echo [警告] PyAudio 在 Windows 上可能需要手动安装
+echo 如果自动安装失败，请从以下地址下载 wheel 文件:
+echo https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio
+echo.
+pip install pyaudio -q
+if errorlevel 1 (
+    echo [警告] PyAudio 自动安装失败，请手动安装
+) else (
+    echo [成功] PyAudio 已安装
+)
+
+REM 安装其他依赖
+echo.
+echo 正在安装 Python 依赖...
+pip install -r requirements.txt -q
+echo [成功] Python 依赖已安装
+
+REM 创建 .env 文件
+echo.
+if not exist .env (
+    echo 正在创建 .env 配置文件...
+    copy .env.example .env >nul
+    echo [成功] .env 文件已创建
+    echo [提示] 请编辑 .env 文件，填入您的 DASHSCOPE_API_KEY
+) else (
+    echo [跳过] .env 文件已存在
+)
+
+REM 创建必要的目录
+echo.
+echo 正在创建目录结构...
+if not exist recordings mkdir recordings
+if not exist model mkdir model
+if not exist music mkdir music
+if not exist voice mkdir voice
+echo [成功] 目录结构已创建
+
+REM 检查模型文件
+echo.
+echo 正在检查模型文件...
+set MISSING=0
+if exist model\yolo-seg.pt (echo [成功] yolo-seg.pt) else (echo [缺失] yolo-seg.pt & set MISSING=1)
+if exist model\yoloe-11l-seg.pt (echo [成功] yoloe-11l-seg.pt) else (echo [缺失] yoloe-11l-seg.pt & set MISSING=1)
+if exist model\shoppingbest5.pt (echo [成功] shoppingbest5.pt) else (echo [缺失] shoppingbest5.pt & set MISSING=1)
+if exist model\trafficlight.pt (echo [成功] trafficlight.pt) else (echo [缺失] trafficlight.pt & set MISSING=1)
+if exist model\hand_landmarker.task (echo [成功] hand_landmarker.task) else (echo [缺失] hand_landmarker.task & set MISSING=1)
+
+if %MISSING%==1 (
+    echo.
+    echo [警告] 部分模型文件缺失，请将模型文件放入 model\ 目录
+)
+
+REM 完成
+echo.
+echo ==========================================
+echo [成功] 安装完成!
+echo ==========================================
+echo.
+echo 下一步:
+echo 1. 编辑 .env 文件，填入您的 API 密钥:
+echo    notepad .env
+echo.
+echo 2. 确保所有模型文件已放入 model\ 目录
+echo.
+echo 3. 启动系统:
+echo    venv\Scripts\activate
+echo    python app_main.py
+echo.
+echo 4. 访问 http://localhost:8081
+echo.
+echo [提示] 每次使用前请激活虚拟环境:
+echo   venv\Scripts\activate
+echo.
+
+pause
+
diff --git a/setup.sh b/setup.sh
new file mode 100644
index 0000000..37626aa
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# AI Glass System - Linux/macOS 快速安装脚本
+
+set -e  # 遇到错误立即退出
+
+echo "=========================================="
+echo "  AI Glass System - 自动安装脚本"
+echo "=========================================="
+echo ""
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# 检查 Python 版本
+echo "正在检查 Python 版本..."
+if ! command -v python3 &> /dev/null; then
+    echo -e "${RED}错误: 未找到 Python 3${NC}"
+    echo "请先安装 Python 3.9-3.11"
+    exit 1
+fi
+
+PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
+echo -e "${GREEN}✓ 找到 Python $PYTHON_VERSION${NC}"
+
+# 检查 Python 版本是否在支持范围内
+PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
+PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
+
+if [ "$PYTHON_MAJOR" -ne 3 ] || [ "$PYTHON_MINOR" -lt 9 ] || [ "$PYTHON_MINOR" -gt 11 ]; then
+    echo -e "${YELLOW}警告: Python 版本 $PYTHON_VERSION 可能不受支持${NC}"
+    echo "推荐使用 Python 3.9-3.11"
+    read -p "是否继续? (y/n) " -n 1 -r
+    echo
+    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+        exit 1
+    fi
+fi
+
+# 检查 CUDA（可选）
+echo ""
+echo "正在检查 CUDA..."
+if command -v nvidia-smi &> /dev/null; then
+    echo -e "${GREEN}✓ 检测到 NVIDIA GPU${NC}"
+    nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
+    HAS_GPU=true
+else
+    echo -e "${YELLOW}! 未检测到 NVIDIA GPU，将使用 CPU 模式（速度较慢）${NC}"
+    HAS_GPU=false
+fi
+
+# 创建虚拟环境
+echo ""
+echo "正在创建虚拟环境..."
+if [ -d "venv" ]; then
+    echo -e "${YELLOW}虚拟环境已存在${NC}"
+    read -p "是否删除并重新创建? (y/n) " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        rm -rf venv
+        python3 -m venv venv
+        echo -e "${GREEN}✓ 虚拟环境已重新创建${NC}"
+    fi
+else
+    python3 -m venv venv
+    echo -e "${GREEN}✓ 虚拟环境已创建${NC}"
+fi
+
+# 激活虚拟环境
+echo "正在激活虚拟环境..."
+source venv/bin/activate
+
+# 升级 pip
+echo ""
+echo "正在升级 pip..."
+pip install --upgrade pip -q
+echo -e "${GREEN}✓ pip 已升级${NC}"
+
+# 安装 PyTorch
+echo ""
+echo "正在安装 PyTorch..."
+if [ "$HAS_GPU" = true ]; then
+    echo "安装 GPU 版本 PyTorch (CUDA 11.8)..."
+    pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 -q
+else
+    echo "安装 CPU 版本 PyTorch..."
+    pip install torch torchvision -q
+fi
+echo -e "${GREEN}✓ PyTorch 已安装${NC}"
+
+# 验证 PyTorch
+echo "验证 PyTorch 安装..."
+python3 -c "import torch; print(f'PyTorch 版本: {torch.__version__}'); print(f'CUDA 可用: {torch.cuda.is_available()}')"
+
+# 安装系统依赖（Linux）
+if [[ "$OSTYPE" == "linux-gnu"* ]]; then
+    echo ""
+    echo "正在检查系统依赖..."
+    
+    # 检测发行版
+    if [ -f /etc/os-release ]; then
+        . /etc/os-release
+        OS=$ID
+    else
+        OS="unknown"
+    fi
+    
+    if [ "$OS" = "ubuntu" ] || [ "$OS" = "debian" ]; then
+        echo "检测到 Ubuntu/Debian 系统"
+        echo "可能需要 sudo 权限来安装系统依赖..."
+        sudo apt-get update -qq
+        sudo apt-get install -y -qq portaudio19-dev libgl1-mesa-glx libglib2.0-0
+        echo -e "${GREEN}✓ 系统依赖已安装${NC}"
+    else
+        echo -e "${YELLOW}! 未知的 Linux 发行版，请手动安装依赖${NC}"
+        echo "  需要: portaudio19-dev, libgl1-mesa-glx, libglib2.0-0"
+    fi
+fi
+
+# 安装 Python 依赖
+echo ""
+echo "正在安装 Python 依赖..."
+pip install -r requirements.txt -q
+echo -e "${GREEN}✓ Python 依赖已安装${NC}"
+
+# 创建 .env 文件
+echo ""
+if [ ! -f ".env" ]; then
+    echo "正在创建 .env 配置文件..."
+    cp .env.example .env
+    echo -e "${GREEN}✓ .env 文件已创建${NC}"
+    echo -e "${YELLOW}请编辑 .env 文件，填入您的 DASHSCOPE_API_KEY${NC}"
+else
+    echo -e "${YELLOW}.env 文件已存在，跳过${NC}"
+fi
+
+# 创建必要的目录
+echo ""
+echo "正在创建目录结构..."
+mkdir -p recordings model music voice
+echo -e "${GREEN}✓ 目录结构已创建${NC}"
+
+# 检查模型文件
+echo ""
+echo "正在检查模型文件..."
+MODELS=("yolo-seg.pt" "yoloe-11l-seg.pt" "shoppingbest5.pt" "trafficlight.pt" "hand_landmarker.task")
+MISSING_MODELS=()
+
+for model in "${MODELS[@]}"; do
+    if [ -f "model/$model" ]; then
+        echo -e "${GREEN}✓ $model${NC}"
+    else
+        echo -e "${RED}✗ $model (缺失)${NC}"
+        MISSING_MODELS+=("$model")
+    fi
+done
+
+if [ ${#MISSING_MODELS[@]} -gt 0 ]; then
+    echo ""
+    echo -e "${YELLOW}警告: 缺少以下模型文件:${NC}"
+    for model in "${MISSING_MODELS[@]}"; do
+        echo "  - $model"
+    done
+    echo "请将模型文件放入 model/ 目录"
+fi
+
+# 完成
+echo ""
+echo "=========================================="
+echo -e "${GREEN}安装完成!${NC}"
+echo "=========================================="
+echo ""
+echo "下一步:"
+echo "1. 编辑 .env 文件，填入您的 API 密钥:"
+echo "   nano .env"
+echo ""
+echo "2. 确保所有模型文件已放入 model/ 目录"
+echo ""
+echo "3. 启动系统:"
+echo "   source venv/bin/activate"
+echo "   python app_main.py"
+echo ""
+echo "4. 访问 http://localhost:8081"
+echo ""
+
+# 提示激活虚拟环境
+echo -e "${YELLOW}注意: 每次使用前请激活虚拟环境:${NC}"
+echo "  source venv/bin/activate"
+echo ""
+
diff --git a/static/favicon.png b/static/favicon.png
new file mode 100644
index 0000000..14ad066
Binary files /dev/null and b/static/favicon.png differ
diff --git a/static/main.js b/static/main.js
new file mode 100644
index 0000000..df26478
--- /dev/null
+++ b/static/main.js
@@ -0,0 +1,883 @@
+// static/main.js
+
+// ================= 摄像头 + ASR =================
+(() => {
+  const $camStatus = document.getElementById('camStatus');
+  const $asrStatus = document.getElementById('asrStatus');
+  const $partial = document.getElementById('partial');
+  const $finalList = document.getElementById('finalList');
+  const $btnClear = document.getElementById('btnClear');
+  const $btnRe = document.getElementById('btnReconnect');
+  const $fps = document.getElementById('fps');
+  const canvas = document.getElementById('canvas');
+  const ctx = canvas.getContext('2d');
+
+  // === 获取/创建聊天容器（关键补丁） ===
+  let chatContainer = document.getElementById('chatContainer');
+
+  function ensureChatContainer() {
+    // 已缓存且仍在文档中
+    if (chatContainer && document.body.contains(chatContainer)) return chatContainer;
+
+    // 重新获取，防热更新或 DOM 移动
+    chatContainer = document.getElementById('chatContainer');
+    if (!chatContainer) {
+      chatContainer = document.createElement('div');
+      chatContainer.id = 'chatContainer';
+
+      // 优先挂到 finalList 的父容器；否则挂到 partial 的父容器；再否则挂到 body 兜底
+      if ($finalList && $finalList.parentElement) {
+        // 隐藏原来的 finalList
+        $finalList.style.display = 'none';
+        // 将聊天容器挂载到 finals div 内
+        $finalList.parentElement.appendChild(chatContainer);
+        console.log('[chat] 创建并挂载 #chatContainer 到 finalList 区域');
+      } else if ($partial && $partial.parentElement) {
+        $partial.parentElement.appendChild(chatContainer);
+        console.log('[chat] 创建并挂载 #chatContainer 到 partial 区域');
+      } else {
+        document.body.appendChild(chatContainer);
+        console.warn('[chat] 未找到合适锚点，已挂到 <body>');
+      }
+    }
+    return chatContainer;
+  }
+
+  // === 注入聊天样式（左右两侧气泡 + 时间戳，增加权重）===
+  (function injectChatStyles() {
+    if (document.getElementById('chat-style-injected')) return;
+    const s = document.createElement('style');
+    s.id = 'chat-style-injected';
+    s.textContent = `
+      #chatContainer{
+        position: relative !important;
+        overflow-y: auto !important;
+        flex: 1 !important;  /* 改为使用 flex: 1 占满剩余空间 */
+        min-height: 0 !important;  /* 确保 flex 子元素能正确收缩 */
+        padding: 12px 12px 4px !important;
+        background: #0b1020 !important;
+        border: 1px solid #1d2438 !important;
+        border-radius: 10px !important;
+        margin-top: 12px !important;
+      }
+      
+      /* 自定义滚动条样式 */
+      #chatContainer::-webkit-scrollbar {
+        width: 8px !important;
+      }
+      
+      #chatContainer::-webkit-scrollbar-track {
+        background: #0d1420 !important;
+        border-radius: 4px !important;
+      }
+      
+      #chatContainer::-webkit-scrollbar-thumb {
+        background: #2a3446 !important;
+        border-radius: 4px !important;
+        transition: background 0.2s !important;
+      }
+      
+      #chatContainer::-webkit-scrollbar-thumb:hover {
+        background: #3a4556 !important;
+      }
+      
+      /* Firefox 滚动条 */
+      #chatContainer {
+        scrollbar-width: thin !important;
+        scrollbar-color: #2a3446 #0d1420 !important;
+      }
+      .timestamp{
+        text-align:center !important;
+        font-size:12px !important;
+        color:#8a93a5 !important;
+        margin:10px 0 !important;
+        user-select:none !important;
+      }
+      .message{
+        display:flex !important;
+        gap:8px !important;
+        margin:6px 0 !important;
+        align-items:flex-end !important;
+      }
+      .message.ai{ justify-content:flex-start !important; }
+      .message.user{ justify-content:flex-end !important; }
+
+      .avatar{
+        width:28px !important; height:28px !important; border-radius:50% !important;
+        background:#232a3d !important; flex:0 0 28px !important;
+        display:flex !important; align-items:center !important; justify-content:center !important;
+        color:#9fb0c3 !important; font-size:12px !important; user-select:none !important;
+        border:1px solid #29314a !important;
+      }
+      .message.user .avatar{ display:none !important; }
+
+      .bubble{
+        max-width: 72% !important;
+        padding:10px 12px !important;
+        line-height:1.45 !important;
+        border-radius:14px !important;
+        word-break:break-word !important;
+        white-space:pre-wrap !important;
+        border:1px solid transparent !important;
+        box-shadow:0 2px 8px rgba(0,0,0,0.15) !important;
+        font-size:14px !important;
+      }
+      .message.ai .bubble{
+        background:#111a2e !important;
+        color:#e6edf3 !important;
+        border-color:#1e2740 !important;
+        border-top-left-radius:6px !important;
+      }
+      .message.user .bubble{
+        background:#2a6df4 !important;
+        color:#fff !important;
+        border-color:#2a6df4 !important;
+        border-top-right-radius:6px !important;
+      }
+    `;
+    document.head.appendChild(s);
+  })();
+
+  // 聊天消息管理
+  let lastTimestamp = 0;
+  const TIMESTAMP_INTERVAL = 5 * 60 * 1000; // 5分钟
+
+  function shouldShowTimestamp() {
+    const now = Date.now();
+    if (now - lastTimestamp > TIMESTAMP_INTERVAL) {
+      lastTimestamp = now;
+      return true;
+    }
+    return false;
+  }
+
+  function formatTime(timestamp = Date.now()) {
+    const date = new Date(timestamp);
+    const hours = date.getHours().toString().padStart(2, '0');
+    const minutes = date.getMinutes().toString().padStart(2, '0');
+    return `${hours}:${minutes}`;
+  }
+
+  function addTimestamp() {
+    const container = ensureChatContainer();
+    const timestampDiv = document.createElement('div');
+    timestampDiv.className = 'timestamp';
+    timestampDiv.textContent = formatTime();
+    container.appendChild(timestampDiv);
+  }
+
+  function addMessage(text, isUser = false) {
+    // 时间戳
+    if (shouldShowTimestamp()) addTimestamp();
+
+    const container = ensureChatContainer();
+
+    // 行容器
+    const messageDiv = document.createElement('div');
+    messageDiv.className = `message ${isUser ? 'user' : 'ai'}`;
+
+    // 左侧头像（AI）
+    const avatar = document.createElement('div');
+    avatar.className = 'avatar';
+    avatar.textContent = isUser ? '' : 'AI';
+
+    // 气泡
+    const bubbleDiv = document.createElement('div');
+    bubbleDiv.className = 'bubble';
+    bubbleDiv.textContent = text;
+
+    if (isUser) {
+      // 右侧：气泡在右
+      messageDiv.appendChild(bubbleDiv);
+    } else {
+      // 左侧：头像 + 气泡
+      messageDiv.appendChild(avatar);
+      messageDiv.appendChild(bubbleDiv);
+    }
+
+    container.appendChild(messageDiv);
+
+    // 滚动到底部
+    container.scrollTop = container.scrollHeight;
+  }
+
+  // Day 20: 更新 badge 样式，支持 connecting 状态动画
+  function setBadge(el, status, text) {
+    el.textContent = text;
+    // status: 'ok', 'err', 'connecting'
+    if (status === true) status = 'ok';
+    if (status === false) status = 'err';
+    el.className = 'badge ' + (status || '');
+  }
+
+  function navLabelAndText(raw) {
+    // 去掉前缀 “[导航] ”
+    const t = raw.startsWith('[导航]') ? raw.substring(4).trim() : raw;
+    // 粗略判断：含“斑马线/绿灯/红灯/黄灯/过马路”归为斑马线导航，否则盲道导航
+    const crossHints = ['斑马线', '绿灯', '红灯', '黄灯', '过马路'];
+    const isCross = crossHints.some(k => t.includes(k));
+    const label = isCross ? '【斑马线导航】' : '【盲道导航】';
+    return { label, text: `${label} ${t}` };
+  }
+
+  // 改进的 fitCanvas: 支持移动端尺寸计算
+  function fitCanvas() {
+    const rect = canvas.getBoundingClientRect();
+    // 使用容器实际宽高，添加最小值保护
+    const w = Math.max(320, Math.floor(rect.width) || 320);
+    let h = Math.floor(rect.height) || 0;
+    // 如果容器高度太小或为0，使用4:3比例回退
+    if (h < 100) {
+      h = Math.max(240, Math.floor(w * 3 / 4));
+    }
+    if (canvas.width !== w || canvas.height !== h) {
+      canvas.width = w;
+      canvas.height = h;
+      console.log('[Canvas] 尺寸调整:', w, 'x', h);
+    }
+  }
+  window.addEventListener('resize', fitCanvas);
+  // 延迟初始化，确保布局完成
+  setTimeout(fitCanvas, 100);
+  fitCanvas();
+
+  let wsCam, wsUI, frames = 0, fpsTimer = 0;
+
+  function drawBlob(buf) {
+    const blob = new Blob([buf], { type: 'image/jpeg' });
+    if ('createImageBitmap' in window) {
+      createImageBitmap(blob).then(bmp => {
+        fitCanvas();
+        ctx.drawImage(bmp, 0, 0, canvas.width, canvas.height);
+      }).catch(() => { });
+    } else {
+      const img = new Image();
+      img.onload = () => { fitCanvas(); ctx.drawImage(img, 0, 0, canvas.width, canvas.height); URL.revokeObjectURL(img.src); };
+      img.src = URL.createObjectURL(blob);
+    }
+    frames++;
+    const now = performance.now();
+    if (!fpsTimer) fpsTimer = now;
+    if (now - fpsTimer >= 1000) {
+      $fps.textContent = 'FPS: ' + frames;
+      frames = 0; fpsTimer = now;
+    }
+  }
+
+  function connectCamera() {
+    try { if (wsCam) wsCam.close(); } catch (e) { }
+    const proto = location.protocol === 'https:' ? 'wss' : 'ws';
+    wsCam = new WebSocket(`${proto}://${location.host}/ws/viewer`);
+    setBadge($camStatus, 'connecting', '📷 连接中...');
+    wsCam.binaryType = 'arraybuffer';
+    wsCam.onopen = () => setBadge($camStatus, 'ok', '📷 已连接');
+    wsCam.onclose = () => setBadge($camStatus, 'err', '📷 已断开');
+    wsCam.onerror = () => setBadge($camStatus, 'err', '📷 连接错误');
+    wsCam.onmessage = (ev) => drawBlob(ev.data);
+  }
+
+  function connectASR() {
+    try { if (wsUI) wsUI.close(); } catch (e) { }
+    const proto = location.protocol === 'https:' ? 'wss' : 'ws';
+    wsUI = new WebSocket(`${proto}://${location.host}/ws_ui`);
+    setBadge($asrStatus, 'connecting', '🎤 连接中...');
+    wsUI.onopen = () => setBadge($asrStatus, 'ok', '🎤 已连接');
+    wsUI.onclose = () => setBadge($asrStatus, 'err', '🎤 已断开');
+    wsUI.onerror = () => setBadge($asrStatus, 'err', '🎤 连接错误');
+    wsUI.onmessage = (ev) => {
+      const s = ev.data || '';
+      if (s.startsWith('INIT:')) {
+        try {
+          const data = JSON.parse(s.slice(5));
+          $partial.textContent = data.partial || '（等待音频…）';
+
+          // 初始化时加载历史消息（识别 [AI] 与 [导航]）
+          if (data.finals && data.finals.length > 0) {
+            data.finals.forEach(text => {
+              if (text.startsWith('[AI]')) {
+                addMessage(text.substring(4).trim(), false);
+              } else if (text.startsWith('[导航]')) {
+                const { text: show } = navLabelAndText(text);
+                addMessage(show, false);
+              } else {
+                addMessage(text, true);
+              }
+            });
+          }
+        } catch (e) { }
+        return;
+      }
+      if (s.startsWith('PARTIAL:')) {
+        $partial.textContent = s.slice(8);
+        return;
+      }
+      if (s.startsWith('FINAL:')) {
+        const text = s.slice(6);
+        if (text.startsWith('[AI]')) {
+          addMessage(text.substring(4).trim(), false);
+        } else if (text.startsWith('[导航]')) {
+          const { text: show } = navLabelAndText(text);
+          addMessage(show, false); // 左侧 AI
+        } else {
+          addMessage(text, true);  // 其它仍按右侧
+        }
+        $partial.textContent = '（等待音频…）';
+        return;
+      }
+    }
+  }
+
+  $btnClear.onclick = () => {
+    const container = ensureChatContainer();
+    // 清空聊天记录
+    const messages = container.querySelectorAll('.message, .timestamp');
+    messages.forEach(msg => msg.remove());
+    lastTimestamp = 0; // 重置时间戳计数
+  };
+  $btnRe.onclick = () => { connectCamera(); connectASR(); };
+
+  connectCamera();
+  connectASR();
+})();
+
+
+// ================= IMU 3D（无虚线框、无滚动条、上下对齐、自适应） =================
+import * as THREE from 'three';
+import { GLTFLoader } from 'https://unpkg.com/three@0.155.0/examples/jsm/loaders/GLTFLoader.js';
+
+// Day 20: IMU 浮窗折叠功能 - 修复：兼容模块延迟加载
+// Day 23: 移动端优化 - 默认折叠
+function initImuToggle() {
+  const imuFloat = document.getElementById('imuFloat');
+  const imuToggle = document.getElementById('imuToggle');
+  console.log('[IMU] 初始化折叠功能, imuFloat:', !!imuFloat, 'imuToggle:', !!imuToggle);
+
+  if (imuFloat && imuToggle) {
+    // 检测移动端 - 默认折叠
+    const isMobile = window.innerWidth < 1100;
+    if (isMobile) {
+      imuFloat.classList.add('collapsed');
+      imuFloat.classList.remove('expanded');
+      imuToggle.textContent = '+';
+      console.log('[IMU] 移动端检测，默认折叠');
+    } else {
+      imuFloat.classList.add('expanded');
+    }
+
+    imuToggle.onclick = function (e) {
+      e.preventDefault();
+      e.stopPropagation();
+      const isCollapsed = imuFloat.classList.toggle('collapsed');
+      imuFloat.classList.toggle('expanded', !isCollapsed);
+      this.textContent = isCollapsed ? '+' : '−';
+      console.log('[IMU] 折叠状态:', isCollapsed);
+    };
+    console.log('[IMU] 折叠按钮事件已绑定');
+  }
+}
+
+// 确保 DOM 加载后执行（兼容模块延迟加载）
+if (document.readyState === 'loading') {
+  document.addEventListener('DOMContentLoaded', initImuToggle);
+} else {
+  // DOM 已加载完成，直接执行
+  initImuToggle();
+}
+
+(() => {
+  const container = document.getElementById('imu_view'); // 左侧3D容器
+  const hud = document.getElementById('imu_hud');  // 右侧IMU容器
+
+  // 左右窗口统一半透明底色
+  if (container) container.style.background = 'rgba(0,0,0,0.2)';
+  if (hud) {
+    // 关键：右侧容器作为定位参考，同时禁止滚动、清理边框
+    Object.assign(hud.style, {
+      position: 'relative',
+      overflow: 'hidden',
+      border: 'none',
+      outline: 'none',
+      background: 'rgba(0,0,0,0.2)', // 右侧也给统一底色（整块），干净无额外面板底色
+      borderRadius: '10px'
+    });
+  }
+
+  // —— 彻底去掉“虚线框”和一切边框/阴影（含可能的外层壳）——
+  (function killFraming() {
+    const s = document.createElement('style');
+    s.textContent = `
+      #imu_view, #imu_hud, #data-panel, #imu_dock,
+      .imu-card, .imu-wrap, .panel, .card, .window {
+        border: none !important;
+        outline: none !important;
+        box-shadow: none !important;
+        background-image: none !important;
+      }
+      /* 兜底：清除任何内联 dashed/ dotted */
+      [style*="dashed"], [style*="dotted"] {
+        border-style: none !important;
+        outline: none !important;
+      }
+    `;
+    document.head.appendChild(s);
+
+    // 同时清理父级（最多向上两层）里的边框与滚动条，避免外层虚线框和滚动条
+    [container, hud].forEach(el => {
+      let p = el ? el.parentElement : null;
+      for (let i = 0; i < 2 && p; i++, p = p.parentElement) {
+        p.style.border = 'none';
+        p.style.outline = 'none';
+        p.style.boxShadow = 'none';
+        p.style.overflow = 'hidden';
+        p.style.backgroundImage = 'none';
+      }
+    });
+  })();
+
+  // 右侧：不再额外创建 dock 背板（直接用 hud 当整块背景）
+  // 数据面板只负责显示文字，不再自带背景与边框
+
+  // three.js 渲染器
+  const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
+  const scene = new THREE.Scene();
+  const camera = new THREE.PerspectiveCamera(70, 1, 0.1, 1000);
+
+  // 画质相关
+  renderer.shadowMap.enabled = true;
+  renderer.shadowMap.type = THREE.PCFSoftShadowMap;
+  renderer.outputColorSpace = THREE.SRGBColorSpace;
+  renderer.toneMapping = THREE.ACESFilmicToneMapping;
+  renderer.toneMappingExposure = 1.0;
+  renderer.setClearColor(0x000000, 0); // 透明背景
+
+  // ——— 核心：左右窗口“上下齐+自适应等比” ———
+  let syncRaf = 0;
+  function syncHeights() {
+    if (!container || !hud) return;
+    const w = container.clientWidth || 300;
+
+    // 恢复合理高度设置
+    const padding = 20;
+    const contentH = (document.getElementById('data-panel')?.offsetHeight || 0) + padding;
+    let targetH = Math.max(180, contentH);  // 最小高度 180px
+
+    hud.style.height = `${targetH}px`;
+    hud.style.maxHeight = 'none';
+    hud.style.overflow = 'hidden';
+
+    container.style.height = `${targetH}px`;
+    renderer.setSize(w, targetH);
+    camera.aspect = w / targetH;
+    camera.updateProjectionMatrix();
+  }
+
+  function requestSync() {
+    cancelAnimationFrame(syncRaf);
+    syncRaf = requestAnimationFrame(syncHeights);
+  }
+
+  // 初次与窗口变化时，同步左右高度
+  requestSync();
+  window.addEventListener('resize', requestSync);
+
+  // 数据变化时也同步（放在 updateDataPanel 内）
+  function updateDataPanel(roll, pitch, yaw, gx, gy, gz, ax, ay, az) {
+    document.getElementById('panel-roll').textContent = roll.toFixed(1) + '°';
+    document.getElementById('panel-pitch').textContent = pitch.toFixed(1) + '°';
+    document.getElementById('panel-yaw').textContent = yaw.toFixed(1) + '°';
+    document.getElementById('panel-gx').textContent = gx.toFixed(1);
+    document.getElementById('panel-gy').textContent = gy.toFixed(1);
+    document.getElementById('panel-gz').textContent = gz.toFixed(1);
+    document.getElementById('panel-ax').textContent = ax.toFixed(2);
+    document.getElementById('panel-ay').textContent = ay.toFixed(2);
+    document.getElementById('panel-az').textContent = az.toFixed(2);
+
+    requestSync(); // 数据刷新后同步高度
+  }
+
+
+  container.appendChild(renderer.domElement);
+
+  // ========== 场景 ==========
+  const group = new THREE.Group();
+  scene.add(group);
+
+  const axesHelper = new THREE.AxesHelper(4);
+  scene.add(axesHelper);
+
+  function createAxisLabel(text, position, color) {
+    const c = document.createElement('canvas');
+    const ctx = c.getContext('2d');
+    c.width = 128; c.height = 64;
+    ctx.fillStyle = color;
+    ctx.font = 'Bold 24px Arial';
+    ctx.textAlign = 'center';
+    ctx.textBaseline = 'middle';
+    ctx.fillText(text, 64, 32);
+    const tex = new THREE.CanvasTexture(c);
+    const mat = new THREE.SpriteMaterial({ map: tex });
+    const spr = new THREE.Sprite(mat);
+    spr.position.copy(position);
+    spr.scale.set(0.8, 0.4, 1);
+    return spr;
+  }
+  scene.add(createAxisLabel('X', new THREE.Vector3(4.5, 0, 0), '#ff0000'));
+  scene.add(createAxisLabel('Y', new THREE.Vector3(0, 4.5, 0), '#00ff00'));
+  scene.add(createAxisLabel('Z', new THREE.Vector3(0, 0, 4.5), '#0000ff'));
+
+  function createScale() {
+    const g = new THREE.Group();
+    for (let i = 1; i <= 4; i++) {
+      const geo = new THREE.SphereGeometry(0.05, 8, 6);
+      const mk = (c) => new THREE.Mesh(geo, new THREE.MeshBasicMaterial({ color: c }));
+      const mx = mk(0xff4444); mx.position.set(i, 0, 0); g.add(mx);
+      const my = mk(0x44ff44); my.position.set(0, i, 0); g.add(my);
+      const mz = mk(0x4444ff); mz.position.set(0, 0, i); g.add(mz);
+    }
+    return g;
+  }
+  scene.add(createScale());
+
+  function createDirectionLabels() {
+    [
+      { t: '前', p: new THREE.Vector3(0, 0, 5), c: '#00ffff' },
+      { t: '后', p: new THREE.Vector3(0, 0, -5), c: '#00ffff' },
+      { t: '左', p: new THREE.Vector3(-5, 0, 0), c: '#ffff00' },
+      { t: '右', p: new THREE.Vector3(5, 0, 0), c: '#ffff00' },
+      { t: '上', p: new THREE.Vector3(0, 5, 0), c: '#ff00ff' },
+      { t: '下', p: new THREE.Vector3(0, -5, 0), c: '#ff00ff' },
+    ].forEach(d => scene.add(createAxisLabel(d.t, d.p, d.c)));
+  }
+  createDirectionLabels();
+
+  camera.position.set(4, 4, 6);
+  camera.lookAt(0, 0, 0);
+
+  // ========== 右侧 IMU 数据展示 ==========
+  function createDataPanel() {
+    const panel = document.createElement('div');
+    panel.id = 'data-panel';
+    panel.style.cssText = `
+      position: relative;
+      background: transparent;
+      border: none;
+      padding: 10px;
+      width: 100%;
+      color: #e6edf3;
+      font-family: 'Consolas','Monaco',monospace;
+      font-size: 11px;
+      box-shadow: none;
+      overflow: hidden;
+    `;
+    panel.innerHTML = `
+      <div style="margin-bottom:8px;font-weight:bold;color:#61dafb;font-size:12px;border-bottom:1px solid #2a3446;padding-bottom:4px;">IMU 实时数据</div>
+      <div style="display:grid;grid-template-columns:1fr 1fr;gap:6px;margin-bottom:8px;">
+        <div><div style="color:#9fb0c3;font-size:9px;">翻滚角 (Roll)</div>
+             <div id="panel-roll" style="color:#ff6b6b;font-size:14px;font-weight:bold;">0.0°</div></div>
+        <div><div style="color:#9fb0c3;font-size:9px;">俯仰角 (Pitch)</div>
+             <div id="panel-pitch" style="color:#4ecdc4;font-size:14px;font-weight:bold;">0.0°</div></div>
+      </div>
+      <div style="margin-bottom:8px;">
+        <div style="color:#9fb0c3;font-size:9px;">偏航角 (Yaw)</div>
+        <div id="panel-yaw" style="color:#45b7d1;font-size:14px;font-weight:bold;">0.0°</div>
+      </div>
+      <div style="border-top:1px solid #2a3446;padding-top:6px;margin-top:6px;">
+        <div style="color:#9fb0c3;font-size:9px;margin-bottom:4px;">角速度 (°/s)</div>
+        <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:4px;">
+          <div><span style="color:#ff9999;font-size:9px;">gX:</span><span id="panel-gx" style="color:#ff9999;font-size:10px;">0.0</span></div>
+          <div><span style="color:#99ff99;font-size:9px;">gY:</span><span id="panel-gy" style="color:#99ff99;font-size:10px;">0.0</span></div>
+          <div><span style="color:#9999ff;font-size:9px;">gZ:</span><span id="panel-gz" style="color:#9999ff;font-size:10px;">0.0</span></div>
+        </div>
+      </div>
+      <div style="border-top:1px solid #2a3446;padding-top:6px;margin-top:6px;">
+        <div style="color:#9fb0c3;font-size:9px;margin-bottom:4px;">加速度 (m/s²)</div>
+        <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:4px;">
+          <div><span style="color:#ff9999;font-size:9px;">aX:</span><span id="panel-ax" style="color:#ff9999;font-size:10px;">0.0</span></div>
+          <div><span style="color:#99ff99;font-size:9px;">aY:</span><span id="panel-ay" style="color:#99ff99;font-size:10px;">0.0</span></div>
+          <div><span style="color:#9999ff;font-size:9px;">aZ:</span><span id="panel-az" style="color:#9999ff;font-size:10px;">0.0</span></div>
+        </div>
+      </div>
+    `;
+    hud.appendChild(panel);
+    return panel;
+  }
+  const dataPanel = createDataPanel();
+
+  function updateDataPanel(roll, pitch, yaw, gx, gy, gz, ax, ay, az) {
+    document.getElementById('panel-roll').textContent = roll.toFixed(1) + '°';
+    document.getElementById('panel-pitch').textContent = pitch.toFixed(1) + '°';
+    document.getElementById('panel-yaw').textContent = yaw.toFixed(1) + '°';
+    document.getElementById('panel-gx').textContent = gx.toFixed(1);
+    document.getElementById('panel-gy').textContent = gy.toFixed(1);
+    document.getElementById('panel-gz').textContent = gz.toFixed(1);
+    document.getElementById('panel-ax').textContent = ax.toFixed(2);
+    document.getElementById('panel-ay').textContent = ay.toFixed(2);
+    document.getElementById('panel-az').textContent = az.toFixed(2);
+  }
+
+  // ========== 灯光 ==========
+  const ambientLight = new THREE.AmbientLight(0x404080, 0.3);
+  scene.add(ambientLight);
+
+  const mainLight = new THREE.DirectionalLight(0x00aaff, 1.2);
+  mainLight.position.set(5, 8, 5);
+  mainLight.castShadow = true;
+  mainLight.shadow.mapSize.width = 2048;
+  mainLight.shadow.mapSize.height = 2048;
+  mainLight.shadow.camera.near = 0.5;
+  mainLight.shadow.camera.far = 50;
+  scene.add(mainLight);
+
+  const fillLight = new THREE.DirectionalLight(0xff6633, 0.8);
+  fillLight.position.set(-5, 3, -3);
+  scene.add(fillLight);
+
+  const rimLight = new THREE.DirectionalLight(0x66ffff, 0.6);
+  rimLight.position.set(0, -5, 8);
+  scene.add(rimLight);
+
+  const pointLight1 = new THREE.PointLight(0x00ff88, 0.5, 20);
+  pointLight1.position.set(3, 2, 4);
+  scene.add(pointLight1);
+
+  const pointLight2 = new THREE.PointLight(0xff3388, 0.4, 15);
+  pointLight2.position.set(-3, -2, 2);
+  scene.add(pointLight2);
+
+  const spotLight = new THREE.SpotLight(0xffffff, 1.0, 30, Math.PI / 6, 0.3, 1);
+  spotLight.position.set(0, 10, 8);
+  spotLight.target.position.set(0, 0, 0);
+  spotLight.castShadow = true;
+  scene.add(spotLight);
+  scene.add(spotLight.target);
+
+  let lightTime = 0;
+  function updateLighting() {
+    lightTime += 0.01;
+    mainLight.intensity = 1.2 + Math.sin(lightTime * 2) * 0.2;
+    pointLight1.intensity = 0.5 + Math.sin(lightTime * 3) * 0.2;
+    pointLight2.intensity = 0.4 + Math.cos(lightTime * 2.5) * 0.2;
+    const hue = (Math.sin(lightTime * 0.5) + 1) * 0.3;
+    rimLight.color.setHSL(0.5 + hue, 1.0, 0.7);
+  }
+
+  // ========== 模型 ==========
+  let glassModel = null;
+  const loader = new GLTFLoader();
+  loader.load(
+    '/static/models/aiglass.glb',
+    (gltf) => {
+      glassModel = gltf.scene;
+      glassModel.scale.set(2, 2, 2);
+      glassModel.position.set(0, 0, 0);
+      glassModel.traverse((child) => {
+        if (child.isMesh) {
+          child.castShadow = true;
+          child.receiveShadow = true;
+          if (child.material) {
+            if (child.material.transparent || child.material.opacity < 1) {
+              child.material.envMapIntensity = 1.5;
+              child.material.roughness = 0.1;
+              child.material.metalness = 0.8;
+            }
+          }
+        }
+      });
+      group.add(glassModel);
+    },
+    undefined,
+    (error) => {
+      console.error('GLB加载失败:', error);
+      const fallbackCube = new THREE.Mesh(
+        new THREE.BoxGeometry(2, 2, 2),
+        new THREE.MeshStandardMaterial({ color: 0x00aaff, metalness: 0.7, roughness: 0.3, envMapIntensity: 1.0 })
+      );
+      fallbackCube.castShadow = true;
+      fallbackCube.receiveShadow = true;
+      group.add(fallbackCube);
+    }
+  );
+
+  // 渲染循环
+  (function animate() {
+    requestAnimationFrame(animate);
+    updateLighting();
+    renderer.render(scene, camera);
+  })();
+
+  // ===== IMU 数学与数据通道（原逻辑保持） =====
+  // 安装补偿
+  const MOUNT_RX = 0, MOUNT_RY = -90, MOUNT_RZ = 0;
+  const qMount = new THREE.Quaternion()
+    .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(0, 1, 0), THREE.MathUtils.degToRad(MOUNT_RY)))
+    .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(0, 0, 1), THREE.MathUtils.degToRad(MOUNT_RZ)))
+    .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(1, 0, 0), THREE.MathUtils.degToRad(MOUNT_RX)));
+
+  const FOLLOW = 0.85;
+  const $ = id => document.getElementById(id);
+  const updateSlider = (idBase, v) => { const sl = $(`${idBase}_sl`), tv = $(`${idBase}_val`); if (sl) { const min = +sl.min, max = +sl.max; sl.value = Math.max(min, Math.min(max, v)); } if (tv) tv.textContent = (typeof v === 'number' ? v.toFixed(2) : '-'); };
+
+  let MED_N = Number($('medn').value);
+  $('medn').onchange = e => MED_N = Number(e.target.value);
+
+  let STILL_W = Number($('still_w').value);
+  $('still_w').onchange = e => STILL_W = Number(e.target.value);
+
+  let ANG_EMA = Number($('ang_ema').value);
+  $('ang_ema').onchange = e => ANG_EMA = Number(e.target.value);
+
+  let GRAV_BETA = Number($('grav_beta').value);
+  $('grav_beta').onchange = e => GRAV_BETA = Number(e.target.value);
+
+  let YAW_DB = Number($('yaw_db').value);
+  $('yaw_db').onchange = e => YAW_DB = Number(e.target.value);
+
+  let YAW_LEAK = Number($('yaw_leak').value);
+  $('yaw_leak').onchange = e => YAW_LEAK = Number(e.target.value);
+
+  let autoRezero = true;
+  $('auto_rezero').onchange = e => { autoRezero = e.target.checked; };
+
+  let autoBias = true;
+  $('auto_bias').onchange = e => { autoBias = e.target.checked; };
+
+  let useProj = true;
+  $('use_proj').onchange = e => { useProj = e.target.checked; };
+
+  let freezeStill = true;
+  $('freeze_still').onchange = e => { freezeStill = e.target.checked; };
+
+  const mkMed = () => ({ buf: [], push(v) { this.buf.push(v); if (this.buf.length > MED_N) this.buf.shift(); const arr = [...this.buf].sort((a, b) => a - b); const m = arr[Math.floor(arr.length / 2)]; return { median: m, valid: this.buf.length === MED_N }; } });
+  const fx = mkMed(), fy = mkMed(), fz = mkMed();
+  const gx = mkMed(), gy = mkMed(), gz = mkMed();
+
+  const rad2deg = r => r * 180 / Math.PI;
+  const wrap180 = a => { a %= 360; if (a >= 180) a -= 360; if (a < -180) a += 360; return a; };
+
+  let lastTS = 0;
+  let yaw = 0;
+  let ref = { roll: 0, pitch: 0, yaw: 0 };
+  let holdStart = 0, isStill = false;
+
+  let gLP = { x: 0, y: 0, z: 0 };
+  const G = 9.807, A_TOL = 0.08 * G;
+
+  let gOff = { x: 0, y: 0, z: 0 };
+  const BIAS_ALPHA = 0.002;
+
+  let Rf = 0, Pf = 0, Yf = 0;
+
+  document.getElementById('btn_zero').onclick = () => { ref = { roll: Rf, pitch: Pf, yaw: Yf }; };
+  document.getElementById('btn_reset').onclick = () => { ref = { roll: 0, pitch: 0, yaw: 0 }; yaw = 0; Rf = 0; Pf = 0; Yf = 0; };
+  document.getElementById('btn_bias_now').onclick = () => { gOff = { ...lastGy }; };
+
+  let lastGy = { x: 0, y: 0, z: 0 };
+
+  const imu_ws_state = document.getElementById('imu_ws_state');
+  function setImuBadge(ok, text) {
+    imu_ws_state.textContent = text;
+    imu_ws_state.className = 'badge ' + (ok ? 'ok' : 'err');
+  }
+
+  const ws = new WebSocket((location.protocol === 'https:' ? 'wss://' : 'ws://') + location.host + '/ws');
+  setImuBadge(false, 'connecting…');
+  ws.onopen = () => setImuBadge(true, 'connected');
+  ws.onclose = () => setImuBadge(false, 'disconnected');
+  ws.onerror = () => setImuBadge(false, 'error');
+  ws.onmessage = (ev) => {
+    try {
+      const d = JSON.parse(ev.data);
+      const t = (typeof d.ts === 'number') ? d.ts : performance.now();
+      let dt = (!lastTS || (t - lastTS) <= 0 || (t - lastTS) > 300) ? 0.02 : (t - lastTS) / 1000;
+      lastTS = t;
+
+      let ax = Number(d?.accel?.x) || 0, ay = Number(d?.accel?.y) || 0, az = Number(d?.accel?.z) || 0;
+      let wx = Number(d?.gyro?.x) || 0, wy = Number(d?.gyro?.y) || 0, wz = Number(d?.gyro?.z) || 0;
+
+      const fxr = fx.push(ax), fyr = fy.push(ay), fzr = fz.push(az);
+      const gxr = gx.push(wx), gyr = gy.push(wy), gzr = gz.push(wz);
+      if (fxr.valid) { ax = fxr.median; ay = fyr.median; az = fzr.median; }
+      if (gxr.valid) { wx = gxr.median; wy = gyr.median; wz = gzr.median; }
+
+      lastGy = { x: wx, y: wy, z: wz };
+
+      gLP.x = GRAV_BETA * gLP.x + (1 - GRAV_BETA) * ax;
+      gLP.y = GRAV_BETA * gLP.y + (1 - GRAV_BETA) * ay;
+      gLP.z = GRAV_BETA * gLP.z + (1 - GRAV_BETA) * az;
+      const gmag = Math.hypot(gLP.x, gLP.y, gLP.z) || 1;
+      const gHat = { x: gLP.x / gmag, y: gLP.y / gmag, z: gLP.z / gmag };
+
+      const roll = rad2deg(Math.atan2(az, ay));
+      const pitch = rad2deg(Math.atan2(-ax, ay));
+
+      const aNorm = Math.hypot(ax, ay, az);
+      const wNorm = Math.hypot(wx, wy, wz);
+      const nearFlat = Math.abs(roll) < 2.0 && Math.abs(pitch) < 2.0;
+      const stillCond = (Math.abs(aNorm - G) < A_TOL) && (wNorm < STILL_W);
+
+      if (stillCond) {
+        if (!holdStart) holdStart = t;
+        if (!isStill && (t - holdStart) > 350) isStill = true;
+        if (autoBias) {
+          gOff.x = (1 - BIAS_ALPHA) * gOff.x + BIAS_ALPHA * wx;
+          gOff.y = (1 - BIAS_ALPHA) * gOff.y + BIAS_ALPHA * wy;
+          gOff.z = (1 - BIAS_ALPHA) * gOff.z + BIAS_ALPHA * wz;
+        }
+      } else { holdStart = 0; isStill = false; }
+
+      let yawdot = useProj
+        ? ((wx - gOff.x) * gHat.x + (wy - gOff.y) * gHat.y + (wz - gOff.z) * gHat.z)
+        : (wy - gOff.y);
+
+      if (Math.abs(yawdot) < YAW_DB) yawdot = 0;
+      if (freezeStill && stillCond) yawdot = 0;
+
+      yaw = wrap180(yaw + yawdot * dt);
+
+      if (YAW_LEAK > 0 && nearFlat && stillCond && Math.abs(yaw) > 0) {
+        const step = YAW_LEAK * dt * Math.sign(-yaw);
+        if (Math.abs(yaw) <= Math.abs(step)) yaw = 0; else yaw += step;
+      }
+
+      const alpha = ANG_EMA;
+      Rf = alpha * roll + (1 - alpha) * Rf;
+      Pf = alpha * pitch + (1 - alpha) * Pf;
+      Yf = alpha * yaw + (1 - alpha) * Yf;
+
+      if (autoRezero && nearFlat && wNorm < STILL_W) {
+        if (!holdStart) holdStart = t;
+        if (!isStill && (t - holdStart) > 350) {
+          ref = { roll: Rf, pitch: Pf, yaw: Yf };
+          isStill = true;
+        }
+      }
+
+      const R = wrap180(Rf - ref.roll);
+      const P = wrap180(Pf - ref.pitch);
+      const Y = wrap180(Yf - ref.yaw);
+
+      const qBody = new THREE.Quaternion()
+        .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(0, 1, 0), THREE.MathUtils.degToRad(Y)))
+        .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(0, 0, 1), THREE.MathUtils.degToRad(P)))
+        .multiply(new THREE.Quaternion().setFromAxisAngle(new THREE.Vector3(1, 0, 0), THREE.MathUtils.degToRad(R)));
+      const q = qMount.clone().multiply(qBody);
+
+      if (FOLLOW >= 0.999) group.setRotationFromQuaternion(q);
+      else group.quaternion.slerp(q, FOLLOW);
+
+      updateSlider('roll', R);
+      updateSlider('pitch', P);
+      updateSlider('yaw', Y);
+
+      updateSlider('gx', wx); updateSlider('gy', wy); updateSlider('gz', wz);
+      updateSlider('ax', ax); updateSlider('ay', ay); updateSlider('az', az);
+
+      // 更新右侧数据
+      updateDataPanel(R, P, Y, wx, wy, wz, ax, ay, az);
+    } catch (e) { }
+  };
+
+  // 初次与窗口改变时，保持左右上下对齐
+  window.addEventListener('resize', resize);
+  resize();
+})();
diff --git a/static/models/aiglass.glb b/static/models/aiglass.glb
new file mode 100644
index 0000000..c96e88d
Binary files /dev/null and b/static/models/aiglass.glb differ
diff --git a/static/vision.css b/static/vision.css
new file mode 100644
index 0000000..87dfebc
--- /dev/null
+++ b/static/vision.css
@@ -0,0 +1,195 @@
+/* 科技感配色方案 */
+:root {
+  --tech-bg: #0a0e1b;
+  --tech-card: #111827;
+  --tech-border: #1e293b;
+  --tech-primary: #3b82f6;
+  --tech-secondary: #8b5cf6;
+  --tech-accent: #06b6d4;
+  --tech-success: #10b981;
+  --tech-warning: #f59e0b;
+  --tech-text: #e0e7ff;
+  --tech-muted: #94a3b8;
+  --glow-primary: 0 0 30px rgba(59, 130, 246, 0.5);
+  --glow-secondary: 0 0 30px rgba(139, 92, 246, 0.5);
+}
+
+/* 视觉识别画布容器 */
+.vision-container {
+  position: relative;
+  background: var(--tech-bg);
+  border: 1px solid var(--tech-border);
+  border-radius: 16px;
+  overflow: hidden;
+  box-shadow: 0 20px 40px rgba(0, 0, 0, 0.6);
+}
+
+.vision-canvas {
+  width: 100%;
+  height: auto;
+  display: block;
+}
+
+/* 覆盖层 */
+.vision-overlay {
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+  pointer-events: none;
+}
+
+/* HUD样式 */
+.hud-element {
+  position: absolute;
+  color: var(--tech-text);
+  font-family: 'Inter', 'Noto Sans SC', sans-serif;
+  text-shadow: 0 0 10px rgba(59, 130, 246, 0.8);
+}
+
+/* 状态指示器 */
+.status-indicator {
+  position: absolute;
+  top: 20px;
+  left: 20px;
+  padding: 12px 24px;
+  background: rgba(17, 24, 39, 0.9);
+  border: 1px solid var(--tech-primary);
+  border-radius: 8px;
+  backdrop-filter: blur(10px);
+  box-shadow: var(--glow-primary);
+}
+
+.status-main {
+  font-size: 18px;
+  font-weight: 600;
+  color: var(--tech-primary);
+  margin-bottom: 4px;
+}
+
+.status-sub {
+  font-size: 12px;
+  color: var(--tech-muted);
+  text-transform: uppercase;
+  letter-spacing: 1px;
+}
+
+/* 进度条 */
+.progress-container {
+  position: absolute;
+  bottom: 40px;
+  left: 20px;
+  width: 300px;
+}
+
+.progress-item {
+  margin-bottom: 20px;
+}
+
+.progress-label {
+  display: flex;
+  justify-content: space-between;
+  margin-bottom: 8px;
+}
+
+.progress-label-text {
+  font-size: 14px;
+  font-weight: 500;
+}
+
+.progress-label-sub {
+  font-size: 11px;
+  color: var(--tech-muted);
+  margin-left: 8px;
+}
+
+.progress-bar {
+  height: 8px;
+  background: rgba(30, 41, 59, 0.8);
+  border-radius: 4px;
+  overflow: hidden;
+  position: relative;
+}
+
+.progress-fill {
+  height: 100%;
+  background: linear-gradient(90deg, var(--tech-primary), var(--tech-accent));
+  border-radius: 4px;
+  transition: width 0.3s ease;
+  box-shadow: 0 0 20px rgba(59, 130, 246, 0.6);
+}
+
+/* 手部追踪样式 */
+.hand-skeleton {
+  stroke: var(--tech-accent);
+  stroke-width: 2;
+  fill: none;
+  filter: drop-shadow(0 0 6px rgba(6, 182, 212, 0.8));
+}
+
+.hand-joint {
+  fill: var(--tech-accent);
+  filter: drop-shadow(0 0 8px rgba(6, 182, 212, 1));
+}
+
+/* 目标锁定样式 */
+.target-lock {
+  stroke: var(--tech-success);
+  stroke-width: 3;
+  fill: none;
+  stroke-dasharray: 10 5;
+  animation: rotate 20s linear infinite;
+  filter: drop-shadow(0 0 10px rgba(16, 185, 129, 0.8));
+}
+
+@keyframes rotate {
+  from { transform: rotate(0deg); }
+  to { transform: rotate(360deg); }
+}
+
+/* 闪烁动画 */
+.flash-overlay {
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+  background: radial-gradient(circle, transparent 0%, rgba(139, 92, 246, 0.3) 100%);
+  animation: flash-pulse 1s ease-in-out;
+}
+
+@keyframes flash-pulse {
+  0%, 100% { opacity: 0; }
+  50% { opacity: 1; }
+}
+
+/* 数据显示面板 */
+.data-panel {
+  position: absolute;
+  top: 20px;
+  right: 20px;
+  background: rgba(17, 24, 39, 0.9);
+  border: 1px solid var(--tech-secondary);
+  border-radius: 8px;
+  padding: 16px;
+  backdrop-filter: blur(10px);
+  min-width: 200px;
+}
+
+.data-item {
+  display: flex;
+  justify-content: space-between;
+  margin-bottom: 12px;
+  font-size: 14px;
+}
+
+.data-label {
+  color: var(--tech-muted);
+}
+
+.data-value {
+  color: var(--tech-text);
+  font-weight: 600;
+  font-family: 'JetBrains Mono', monospace;
+} 
\ No newline at end of file
diff --git a/static/vision.js b/static/vision.js
new file mode 100644
index 0000000..b0209fe
--- /dev/null
+++ b/static/vision.js
@@ -0,0 +1,291 @@
+// 科技感视觉识别系统
+class VisionSystem {
+  constructor(canvasId) {
+    this.canvas = document.getElementById(canvasId);
+    this.ctx = this.canvas.getContext('2d');
+    this.overlay = document.createElement('div');
+    this.overlay.className = 'vision-overlay';
+    this.canvas.parentElement.appendChild(this.overlay);
+    
+    // 状态
+    this.mode = 'SEGMENT';
+    this.fps = 0;
+    this.detectedObjects = [];
+    this.handData = null;
+    this.trackingData = null;
+    
+    // 初始化UI元素
+    this.initUI();
+    
+    // 连接WebSocket
+    this.connectVisionWS();
+  }
+  
+  initUI() {
+    // 状态指示器
+    this.statusElement = this.createStatusIndicator();
+    this.overlay.appendChild(this.statusElement);
+    
+    // 进度条
+    this.progressElement = this.createProgressBars();
+    this.overlay.appendChild(this.progressElement);
+    
+    // 数据面板
+    this.dataPanel = this.createDataPanel();
+    this.overlay.appendChild(this.dataPanel);
+  }
+  
+  createStatusIndicator() {
+    const status = document.createElement('div');
+    status.className = 'status-indicator';
+    status.innerHTML = `
+      <div class="status-main">系统就绪 <span class="status-sub">System Ready</span></div>
+      <div class="status-sub">等待目标 Waiting for Target</div>
+    `;
+    return status;
+  }
+  
+  createProgressBars() {
+    const container = document.createElement('div');
+    container.className = 'progress-container';
+    container.innerHTML = `
+      <div class="progress-item">
+        <div class="progress-label">
+          <span class="progress-label-text">对齐度 <span class="progress-label-sub">Alignment</span></span>
+          <span class="progress-value">0%</span>
+        </div>
+        <div class="progress-bar">
+          <div class="progress-fill" id="align-progress" style="width: 0%"></div>
+        </div>
+      </div>
+      <div class="progress-item">
+        <div class="progress-label">
+          <span class="progress-label-text">距离匹配 <span class="progress-label-sub">Distance Match</span></span>
+          <span class="progress-value">0%</span>
+        </div>
+        <div class="progress-bar">
+          <div class="progress-fill" id="distance-progress" style="width: 0%"></div>
+        </div>
+      </div>
+    `;
+    return container;
+  }
+  
+  createDataPanel() {
+    const panel = document.createElement('div');
+    panel.className = 'data-panel';
+    panel.innerHTML = `
+      <div class="data-item">
+        <span class="data-label">FPS</span>
+        <span class="data-value" id="fps-value">--</span>
+      </div>
+      <div class="data-item">
+        <span class="data-label">模式 Mode</span>
+        <span class="data-value" id="mode-value">检测</span>
+      </div>
+      <div class="data-item">
+        <span class="data-label">目标数 Objects</span>
+        <span class="data-value" id="objects-value">0</span>
+      </div>
+      <div class="data-item">
+        <span class="data-label">握持分 Grasp</span>
+        <span class="data-value" id="grasp-value">0.00</span>
+      </div>
+    `;
+    return panel;
+  }
+  
+  connectVisionWS() {
+    const proto = location.protocol === 'https:' ? 'wss' : 'ws';
+    this.ws = new WebSocket(`${proto}://${location.host}/ws/viewer`);  // 改为 /ws/viewer
+    
+    this.ws.onopen = () => {
+      console.log('[Vision] WebSocket connected');
+      // ... rest of the code
+    };
+    
+    this.ws.onmessage = (event) => {
+      // 处理二进制图像数据
+      if (event.data instanceof Blob) {
+        // 创建图像URL并显示
+        const url = URL.createObjectURL(event.data);
+        const img = new Image();
+        img.onload = () => {
+          this.ctx.drawImage(img, 0, 0, this.canvas.width, this.canvas.height);
+          URL.revokeObjectURL(url);
+        };
+        img.src = url;
+      }
+    };
+    
+    this.ws.onerror = () => {
+      console.error('Vision WebSocket error');
+    };
+  }
+  
+  updateVisualization(data) {
+    // 更新状态
+    this.mode = data.mode || 'SEGMENT';
+    this.fps = data.fps || 0;
+    
+    // 更新UI
+    this.updateStatus(data);
+    this.updateProgress(data);
+    this.updateDataPanel(data);
+    
+    // 绘制可视化
+    if (data.frame) {
+      this.drawFrame(data.frame);
+    }
+    
+    if (data.hand) {
+      this.drawHand(data.hand);
+    }
+    
+    if (data.objects) {
+      this.drawObjects(data.objects);
+    }
+    
+    if (data.tracking) {
+      this.drawTracking(data.tracking);
+    }
+  }
+  
+  updateStatus(data) {
+    const statusMain = this.statusElement.querySelector('.status-main');
+    const statusSub = this.statusElement.querySelector('.status-sub:last-child');
+    
+    switch(this.mode) {
+      case 'SEGMENT':
+        statusMain.innerHTML = '目标检测中 <span class="status-sub">Detecting</span>';
+        statusSub.textContent = data.message || '扫描环境 Scanning Environment';
+        break;
+      case 'FLASH':
+        statusMain.innerHTML = '锁定中 <span class="status-sub">Locking</span>';
+        statusSub.textContent = '准备追踪 Preparing to Track';
+        break;
+      case 'TRACK':
+        statusMain.innerHTML = '追踪中 <span class="status-sub">Tracking</span>';
+        statusSub.textContent = '保持对准 Maintain Alignment';
+        break;
+    }
+  }
+  
+  updateProgress(data) {
+    if (data.alignScore !== undefined) {
+      const alignPercent = Math.round(data.alignScore * 100);
+      document.getElementById('align-progress').style.width = `${alignPercent}%`;
+      this.progressElement.querySelector('.progress-value').textContent = `${alignPercent}%`;
+    }
+    
+    if (data.distanceScore !== undefined) {
+      const distPercent = Math.round(data.distanceScore * 100);
+      document.getElementById('distance-progress').style.width = `${distPercent}%`;
+      this.progressElement.querySelectorAll('.progress-value')[1].textContent = `${distPercent}%`;
+    }
+  }
+  
+  updateDataPanel(data) {
+    document.getElementById('fps-value').textContent = Math.round(this.fps);
+    document.getElementById('mode-value').textContent = this.getModeText(this.mode);
+    document.getElementById('objects-value').textContent = data.objectCount || 0;
+    document.getElementById('grasp-value').textContent = (data.graspScore || 0).toFixed(2);
+  }
+  
+  getModeText(mode) {
+    const modeMap = {
+      'SEGMENT': '检测 Detect',
+      'FLASH': '锁定 Lock',
+      'TRACK': '追踪 Track'
+    };
+    return modeMap[mode] || mode;
+  }
+  
+  drawFrame(frameData) {
+    // 绘制基础图像
+    const img = new Image();
+    img.onload = () => {
+      this.canvas.width = img.width;
+      this.canvas.height = img.height;
+      this.ctx.drawImage(img, 0, 0);
+    };
+    img.src = 'data:image/jpeg;base64,' + frameData;
+  }
+  
+  drawHand(handData) {
+    // 使用SVG绘制手部骨骼
+    const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
+    svg.style.position = 'absolute';
+    svg.style.top = '0';
+    svg.style.left = '0';
+    svg.style.width = '100%';
+    svg.style.height = '100%';
+    svg.style.pointerEvents = 'none';
+    
+    // 绘制连接线
+    handData.connections.forEach(conn => {
+      const line = document.createElementNS('http://www.w3.org/2000/svg', 'line');
+      line.setAttribute('x1', conn.start.x);
+      line.setAttribute('y1', conn.start.y);
+      line.setAttribute('x2', conn.end.x);
+      line.setAttribute('y2', conn.end.y);
+      line.setAttribute('class', 'hand-skeleton');
+      svg.appendChild(line);
+    });
+    
+    // 绘制关节点
+    handData.landmarks.forEach(point => {
+      const circle = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
+      circle.setAttribute('cx', point.x);
+      circle.setAttribute('cy', point.y);
+      circle.setAttribute('r', '3');
+      circle.setAttribute('class', 'hand-joint');
+      svg.appendChild(circle);
+    });
+    
+    // 添加到覆盖层
+    const oldSvg = this.overlay.querySelector('svg');
+    if (oldSvg) oldSvg.remove();
+    this.overlay.appendChild(svg);
+  }
+  
+  drawObjects(objects) {
+    // 绘制检测到的物体
+    objects.forEach((obj, index) => {
+      if (obj.isTarget) {
+        // 目标物体用特殊样式
+        this.drawTargetObject(obj);
+      } else {
+        // 其他物体用普通样式
+        this.drawNormalObject(obj);
+      }
+    });
+  }
+  
+  drawTargetObject(obj) {
+    // 创建目标锁定效果
+    const target = document.createElement('div');
+    target.className = 'target-lock';
+    target.style.position = 'absolute';
+    target.style.left = `${obj.x}px`;
+    target.style.top = `${obj.y}px`;
+    target.style.width = `${obj.width}px`;
+    target.style.height = `${obj.height}px`;
+    
+    // 添加锁定动画
+    const svg = `
+      <svg width="${obj.width}" height="${obj.height}" style="position: absolute; top: 0; left: 0;">
+        <rect x="2" y="2" width="${obj.width-4}" height="${obj.height-4}" 
+              class="target-lock" rx="8" ry="8"/>
+      </svg>
+    `;
+    target.innerHTML = svg;
+    
+    this.overlay.appendChild(target);
+  }
+}
+
+// 初始化
+document.addEventListener('DOMContentLoaded', () => {
+  const visionSystem = new VisionSystem('vision-canvas');
+}); 
\ No newline at end of file
diff --git a/static/vision_renderer.js b/static/vision_renderer.js
new file mode 100644
index 0000000..f4041ba
--- /dev/null
+++ b/static/vision_renderer.js
@@ -0,0 +1,443 @@
+// vision_renderer.js - 前端可视化渲染器
+
+class VisionRenderer {
+    constructor(canvasId) {
+        this.canvas = document.getElementById(canvasId);
+        this.ctx = this.canvas.getContext('2d');
+        this.ws = null;
+        this.currentData = null;
+        
+        // UI配色方案
+        this.colors = {
+            primaryBlue: '#00C8FF',
+            secondaryPurple: '#9664FF',
+            accentCyan: '#00FFFF',
+            white: '#FFFFFF',
+            lightGray: '#C8C8C8',
+            darkBg: 'rgba(40, 40, 40, 0.8)',
+            success: '#7FFF00',
+            warning: '#FFA500',
+            error: '#FF7272',
+            glassBg: 'rgba(20, 20, 20, 0.3)',
+        };
+        
+        // 动画状态
+        this.animations = {
+            flashAlpha: 0,
+            messageAlpha: 1,
+            progressAnimations: {}
+        };
+        
+        this.setupCanvas();
+        this.connect();
+        this.startRenderLoop();
+    }
+    
+    setupCanvas() {
+        // 设置画布大小
+        const resizeCanvas = () => {
+            const rect = this.canvas.getBoundingClientRect();
+            this.canvas.width = rect.width;
+            this.canvas.height = rect.height;
+        };
+        
+        resizeCanvas();
+        window.addEventListener('resize', resizeCanvas);
+    }
+    
+    connect() {
+        const proto = location.protocol === 'https:' ? 'wss' : 'ws';
+        this.ws = new WebSocket(`${proto}://${location.host}/ws/vision_data`);
+        
+        this.ws.onopen = () => {
+            console.log('[VisionRenderer] Connected');
+            this.updateConnectionStatus(true);
+        };
+        
+        this.ws.onclose = () => {
+            console.log('[VisionRenderer] Disconnected');
+            this.updateConnectionStatus(false);
+            // 自动重连
+            setTimeout(() => this.connect(), 2000);
+        };
+        
+        this.ws.onmessage = (event) => {
+            try {
+                this.currentData = JSON.parse(event.data);
+            } catch (e) {
+                console.error('[VisionRenderer] Parse error:', e);
+            }
+        };
+    }
+    
+    updateConnectionStatus(connected) {
+        const badge = document.getElementById('visionStatus');
+        if (badge) {
+            badge.textContent = connected ? 'Vision: connected' : 'Vision: disconnected';
+            badge.className = 'badge ' + (connected ? 'ok' : 'err');
+        }
+    }
+    
+    startRenderLoop() {
+        const render = () => {
+            this.clearCanvas();
+            
+            if (this.currentData) {
+                this.renderFrame(this.currentData);
+            }
+            
+            requestAnimationFrame(render);
+        };
+        
+        render();
+    }
+    
+    clearCanvas() {
+        this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
+    }
+    
+    renderFrame(data) {
+        const ctx = this.ctx;
+        const W = this.canvas.width;
+        const H = this.canvas.height;
+        
+        // 渲染手部骨骼
+        if (data.hand_detected && data.hand_landmarks) {
+            this.drawHandSkeleton(data.hand_landmarks);
+            
+            // 手部边界框
+            if (data.hand_box) {
+                this.drawBox(data.hand_box, this.colors.accentCyan, 1);
+            }
+            
+            // 握持评分
+            this.drawTextWithBg(
+                `握持评分 Grasp Score: ${data.grasp_score.toFixed(2)}`,
+                10, 60, 18, this.colors.accentCyan
+            );
+        }
+        
+        // 渲染检测到的物体
+        if (data.mode === 'SEGMENT' && data.objects) {
+            data.objects.forEach((obj, index) => {
+                const isSelected = index === data.selected_object_index;
+                const color = isSelected ? this.colors.success : this.colors.primaryBlue;
+                
+                // 绘制轮廓
+                if (obj.contour) {
+                    this.drawContour(obj.contour, color, isSelected ? 3 : 2);
+                }
+                
+                // 选中物体的标记
+                if (isSelected && obj.center) {
+                    this.drawTargetMarker(obj.center.x, obj.center.y);
+                }
+            });
+            
+            // 倒计时
+            if (data.countdown !== null) {
+                this.drawCountdown(data.countdown);
+            }
+        }
+        
+        // 闪烁动画
+        if (data.mode === 'FLASH' && data.flash_progress !== null) {
+            this.renderFlashAnimation(data.flash_progress);
+        }
+        
+        // 追踪模式
+        if (data.mode === 'TRACK') {
+            // 追踪多边形
+            if (data.tracking_polygon) {
+                this.drawPolygon(data.tracking_polygon, this.colors.success, 2);
+            }
+            
+            // 中心点
+            if (data.tracking_center) {
+                this.drawCircle(data.tracking_center.x, data.tracking_center.y, 6, this.colors.success);
+            }
+            
+            // 对齐箭头
+            if (data.hand_center && data.tracking_center) {
+                this.drawMeasureArrow(
+                    data.hand_center,
+                    data.tracking_center
+                );
+            }
+            
+            // 面积比和引导
+            if (data.area_ratio !== null) {
+                this.drawAreaRatio(data.area_ratio, data.guidance);
+            }
+        }
+        
+        // 进度条
+        this.drawTechProgressBars(data.align_score, data.range_score);
+        
+        // FPS
+        this.drawFPS(data.fps);
+        
+        // 状态消息
+        if (data.status_message) {
+            this.drawStatusMessage(data.status_message);
+        }
+    }
+    
+    drawHandSkeleton(landmarks) {
+        const ctx = this.ctx;
+        const color = this.colors.secondaryPurple;
+        
+        // MediaPipe手部连接
+        const connections = [
+            [0, 1], [1, 2], [2, 3], [3, 4],  // 拇指
+            [0, 5], [5, 6], [6, 7], [7, 8],  // 食指
+            [0, 9], [9, 10], [10, 11], [11, 12],  // 中指
+            [0, 13], [13, 14], [14, 15], [15, 16],  // 无名指
+            [0, 17], [17, 18], [18, 19], [19, 20],  // 小指
+            [5, 9], [9, 13], [13, 17]  // 掌心
+        ];
+        
+        // 绘制连接线
+        ctx.strokeStyle = color;
+        ctx.lineWidth = 2;
+        connections.forEach(([i, j]) => {
+            if (landmarks[i] && landmarks[j]) {
+                ctx.beginPath();
+                ctx.moveTo(landmarks[i].x, landmarks[i].y);
+                ctx.lineTo(landmarks[j].x, landmarks[j].y);
+                ctx.stroke();
+            }
+        });
+        
+        // 绘制关键点
+        landmarks.forEach(point => {
+            this.drawCircle(point.x, point.y, 3, color, true);
+        });
+    }
+    
+    drawTextWithBg(text, x, y, fontSize = 18, color = this.colors.white, bgColor = this.colors.glassBg) {
+        const ctx = this.ctx;
+        const padding = 10;
+        
+        ctx.font = `${fontSize}px Arial, "Microsoft YaHei"`;
+        const metrics = ctx.measureText(text);
+        const textWidth = metrics.width;
+        const textHeight = fontSize;
+        
+        // 绘制背景
+        ctx.fillStyle = bgColor;
+        ctx.fillRect(x - padding, y - textHeight - padding, 
+                     textWidth + 2 * padding, textHeight + 2 * padding);
+        
+        // 绘制边框
+        ctx.strokeStyle = this.colors.primaryBlue;
+        ctx.lineWidth = 1;
+        ctx.strokeRect(x - padding, y - textHeight - padding, 
+                       textWidth + 2 * padding, textHeight + 2 * padding);
+        
+        // 绘制文字
+        ctx.fillStyle = color;
+        ctx.fillText(text, x, y);
+    }
+    
+    drawCountdown(seconds) {
+        const text = `检测到物体 Object detected, ${seconds.toFixed(1)}s`;
+        const x = 10;
+        const y = 100;
+        this.drawTextWithBg(text, x, y, 22, this.colors.warning);
+    }
+    
+    renderFlashAnimation(progress) {
+        const ctx = this.ctx;
+        const W = this.canvas.width;
+        const H = this.canvas.height;
+        
+        // 计算闪烁透明度
+        const cycleProgress = progress * 2;
+        const alpha = 0.3 + 0.3 * Math.sin(cycleProgress * Math.PI);
+        
+        // 全屏闪烁效果
+        ctx.fillStyle = this.colors.accentCyan + Math.floor(alpha * 255).toString(16).padStart(2, '0');
+        ctx.fillRect(0, 0, W, H);
+        
+        // 锁定文字
+        this.drawTextWithBg('正在锁定目标... Locking target...', 
+                           W/2 - 150, H/2, 24, this.colors.accentCyan);
+    }
+    
+    drawTechProgressBars(alignScore, rangeScore) {
+        const W = this.canvas.width;
+        const H = this.canvas.height;
+        const barW = W * 0.3;
+        const barH = 8;
+        const gap = 20;
+        const x0 = 20;
+        const y0 = H - 2 * barH - gap - 60;
+        
+        // 对齐进度条
+        this.drawProgressBar(x0, y0, barW, barH, alignScore, 
+                            '对齐 Align', this.colors.primaryBlue);
+        
+        // 距离进度条
+        this.drawProgressBar(x0, y0 + barH + gap, barW, barH, rangeScore, 
+                            '距离(≈1) Distance(≈1)', this.colors.accentCyan);
+    }
+    
+    drawProgressBar(x, y, width, height, value, label, color) {
+        const ctx = this.ctx;
+        
+        // 背景
+        ctx.fillStyle = this.colors.darkBg;
+        ctx.fillRect(x, y, width, height);
+        
+        // 边框
+        ctx.strokeStyle = color;
+        ctx.lineWidth = 1;
+        ctx.strokeRect(x, y, width, height);
+        
+        // 填充（渐变）
+        const fillWidth = width * Math.max(0, Math.min(1, value));
+        if (fillWidth > 0) {
+            const gradient = ctx.createLinearGradient(x, y, x + fillWidth, y);
+            gradient.addColorStop(0, this.colors.secondaryPurple);
+            gradient.addColorStop(1, color);
+            ctx.fillStyle = gradient;
+            ctx.fillRect(x, y, fillWidth, height);
+        }
+        
+        // 标签
+        this.drawTextWithBg(label, x, y - 10, 14, color);
+    }
+    
+    drawCircle(x, y, radius, color, fill = true) {
+        const ctx = this.ctx;
+        ctx.beginPath();
+        ctx.arc(x, y, radius, 0, 2 * Math.PI);
+        if (fill) {
+            ctx.fillStyle = color;
+            ctx.fill();
+        } else {
+            ctx.strokeStyle = color;
+            ctx.lineWidth = 2;
+            ctx.stroke();
+        }
+    }
+    
+    drawBox(box, color, lineWidth = 2) {
+        const ctx = this.ctx;
+        ctx.strokeStyle = color;
+        ctx.lineWidth = lineWidth;
+        ctx.strokeRect(box.x, box.y, box.width, box.height);
+    }
+    
+    drawContour(points, color, lineWidth = 2) {
+        if (!points || points.length < 3) return;
+        
+        const ctx = this.ctx;
+        ctx.strokeStyle = color;
+        ctx.lineWidth = lineWidth;
+        ctx.beginPath();
+        ctx.moveTo(points[0].x, points[0].y);
+        for (let i = 1; i < points.length; i++) {
+            ctx.lineTo(points[i].x, points[i].y);
+        }
+        ctx.closePath();
+        ctx.stroke();
+    }
+    
+    drawPolygon(points, color, lineWidth = 2) {
+        this.drawContour(points, color, lineWidth);
+    }
+    
+    drawTargetMarker(x, y) {
+        // 双圆圈标记
+        this.drawCircle(x, y, 8, this.colors.success, false);
+        this.drawCircle(x, y, 12, this.colors.success, false);
+        this.drawTextWithBg('目标 Target', x + 15, y - 5, 16, this.colors.success);
+    }
+    
+    drawMeasureArrow(p1, p2) {
+        const ctx = this.ctx;
+        const dx = p2.x - p1.x;
+        const dy = p2.y - p1.y;
+        const distance = Math.sqrt(dx * dx + dy * dy);
+        
+        // 绘制线
+        ctx.strokeStyle = this.colors.white;
+        ctx.lineWidth = 2;
+        ctx.setLineDash([5, 5]);
+        ctx.beginPath();
+        ctx.moveTo(p1.x, p1.y);
+        ctx.lineTo(p2.x, p2.y);
+        ctx.stroke();
+        ctx.setLineDash([]);
+        
+        // 绘制箭头
+        const angle = Math.atan2(dy, dx);
+        const arrowLength = 15;
+        const arrowAngle = Math.PI / 6;
+        
+        ctx.beginPath();
+        ctx.moveTo(p2.x, p2.y);
+        ctx.lineTo(
+            p2.x - arrowLength * Math.cos(angle - arrowAngle),
+            p2.y - arrowLength * Math.sin(angle - arrowAngle)
+        );
+        ctx.moveTo(p2.x, p2.y);
+        ctx.lineTo(
+            p2.x - arrowLength * Math.cos(angle + arrowAngle),
+            p2.y - arrowLength * Math.sin(angle + arrowAngle)
+        );
+        ctx.stroke();
+        
+        // 显示距离
+        const midX = (p1.x + p2.x) / 2;
+        const midY = (p1.y + p2.y) / 2;
+        ctx.fillStyle = this.colors.white;
+        ctx.font = '14px Arial';
+        ctx.fillText(`${distance.toFixed(0)}px`, midX + 10, midY - 10);
+    }
+    
+    drawAreaRatio(ratio, guidance) {
+        const y = 120;
+        const text = `面积比 Area Ratio: ${ratio.toFixed(2)}`;
+        this.drawTextWithBg(text, 10, y, 18, this.colors.lightGray);
+        
+        if (guidance) {
+            const guidanceText = {
+                'forward': '向前靠近 Move Forward',
+                'backward': '后退 Move Back',
+                'maintain': '保持 Maintain'
+            };
+            const guidanceColor = guidance === 'maintain' ? this.colors.success : this.colors.warning;
+            this.drawTextWithBg(guidanceText[guidance] || guidance, 
+                               10, y + 40, 20, guidanceColor);
+        }
+    }
+    
+    drawFPS(fps) {
+        const W = this.canvas.width;
+        const text = `FPS: ${fps.toFixed(1)}`;
+        this.drawTextWithBg(text, W - 120, 30, 16, this.colors.accentCyan);
+    }
+    
+    drawStatusMessage(message) {
+        const W = this.canvas.width;
+        const H = this.canvas.height;
+        
+        // 根据消息类型选择颜色
+        let color = this.colors.white;
+        if (message.includes('追踪丢失') || message.includes('lost')) {
+            color = this.colors.error;
+        } else if (message.includes('刷新') || message.includes('refreshed')) {
+            color = this.colors.success;
+        }
+        
+        this.drawTextWithBg(message, W/2 - 200, H - 50, 20, color);
+    }
+}
+
+// 初始化渲染器
+document.addEventListener('DOMContentLoaded', () => {
+    window.visionRenderer = new VisionRenderer('canvas');
+}); 
\ No newline at end of file
diff --git a/static/visualizer.js b/static/visualizer.js
new file mode 100644
index 0000000..ac96992
--- /dev/null
+++ b/static/visualizer.js
@@ -0,0 +1,546 @@
+// static/visualizer.js
+class TechVisualizer {
+    constructor(canvasId) {
+        this.canvas = document.getElementById(canvasId);
+        this.ctx = this.canvas.getContext('2d');
+        this.ws = null;
+        this.data = {};
+        
+        // 科技感配色方案
+        this.colors = {
+            primary: '#00D9FF',      // 青蓝色
+            secondary: '#FF00FF',    // 品红/紫色
+            accent: '#00FF88',       // 青绿色
+            warning: '#FFAA00',      // 橙色
+            background: '#000000',   // 黑色
+            surface: '#0A0A0A',      // 深灰
+            text: '#FFFFFF',         // 白色
+            textMuted: '#888888',    // 灰色
+            grid: '#1A1A1A',         // 网格色
+            glow: '#00D9FF55'        // 发光效果
+        };
+        
+        // 字体设置
+        this.fonts = {
+            title: 'bold 24px "Orbitron", "Microsoft YaHei", sans-serif',
+            subtitle: 'bold 18px "Rajdhani", "Microsoft YaHei", sans-serif',
+            body: '16px "Roboto", "Microsoft YaHei", sans-serif',
+            small: '12px "Roboto", "Microsoft YaHei", sans-serif',
+            tiny: '10px "Roboto", sans-serif'
+        };
+        
+        this.setupCanvas();
+        this.connectWebSocket();
+    }
+    
+    setupCanvas() {
+        // 设置画布大小
+        const resizeCanvas = () => {
+            const rect = this.canvas.getBoundingClientRect();
+            this.canvas.width = rect.width * window.devicePixelRatio;
+            this.canvas.height = rect.height * window.devicePixelRatio;
+            this.ctx.scale(window.devicePixelRatio, window.devicePixelRatio);
+        };
+        
+        window.addEventListener('resize', resizeCanvas);
+        resizeCanvas();
+    }
+    
+    connectWebSocket() {
+        const wsUrl = `ws://${window.location.host}/ws/visualizer`;
+        this.ws = new WebSocket(wsUrl);
+        
+        this.ws.onmessage = (event) => {
+            try {
+                this.data = JSON.parse(event.data);
+                this.render();
+            } catch (e) {
+                console.error('Failed to parse visualization data:', e);
+            }
+        };
+        
+        this.ws.onclose = () => {
+            setTimeout(() => this.connectWebSocket(), 1000);
+        };
+    }
+    
+    render() {
+        const ctx = this.ctx;
+        const width = this.canvas.width / window.devicePixelRatio;
+        const height = this.canvas.height / window.devicePixelRatio;
+        
+        // 清空画布
+        ctx.fillStyle = this.colors.background;
+        ctx.fillRect(0, 0, width, height);
+        
+        // 绘制网格背景
+        this.drawGrid(width, height);
+        
+        // 绘制HUD边框
+        this.drawHUD(width, height);
+        
+        // 根据模式绘制内容
+        if (this.data.mode === 'SEGMENT') {
+            this.drawSegmentMode(width, height);
+        } else if (this.data.mode === 'FLASH') {
+            this.drawFlashMode(width, height);
+        } else if (this.data.mode === 'TRACK') {
+            this.drawTrackMode(width, height);
+        }
+        
+        // 绘制手部骨骼
+        if (this.data.hand) {
+            this.drawHand(this.data.hand, width, height);
+        }
+        
+        // 绘制FPS和状态信息
+        this.drawStats(width, height);
+    }
+    
+    drawGrid(width, height) {
+        const ctx = this.ctx;
+        ctx.strokeStyle = this.colors.grid;
+        ctx.lineWidth = 0.5;
+        
+        const gridSize = 50;
+        for (let x = 0; x < width; x += gridSize) {
+            ctx.beginPath();
+            ctx.moveTo(x, 0);
+            ctx.lineTo(x, height);
+            ctx.stroke();
+        }
+        for (let y = 0; y < height; y += gridSize) {
+            ctx.beginPath();
+            ctx.moveTo(0, y);
+            ctx.lineTo(width, y);
+            ctx.stroke();
+        }
+    }
+    
+    drawHUD(width, height) {
+        const ctx = this.ctx;
+        const margin = 20;
+        
+        // 四角装饰
+        ctx.strokeStyle = this.colors.primary;
+        ctx.lineWidth = 2;
+        
+        const cornerSize = 40;
+        // 左上角
+        ctx.beginPath();
+        ctx.moveTo(margin, margin + cornerSize);
+        ctx.lineTo(margin, margin);
+        ctx.lineTo(margin + cornerSize, margin);
+        ctx.stroke();
+        
+        // 右上角
+        ctx.beginPath();
+        ctx.moveTo(width - margin - cornerSize, margin);
+        ctx.lineTo(width - margin, margin);
+        ctx.lineTo(width - margin, margin + cornerSize);
+        ctx.stroke();
+        
+        // 左下角
+        ctx.beginPath();
+        ctx.moveTo(margin, height - margin - cornerSize);
+        ctx.lineTo(margin, height - margin);
+        ctx.lineTo(margin + cornerSize, height - margin);
+        ctx.stroke();
+        
+        // 右下角
+        ctx.beginPath();
+        ctx.moveTo(width - margin - cornerSize, height - margin);
+        ctx.lineTo(width - margin, height - margin);
+        ctx.lineTo(width - margin, height - margin - cornerSize);
+        ctx.stroke();
+    }
+    
+    drawSegmentMode(width, height) {
+        const ctx = this.ctx;
+        
+        // 绘制检测到的物体
+        if (this.data.segments) {
+            this.data.segments.forEach((seg, index) => {
+                if (seg.contour && seg.contour.length > 0) {
+                    // 绘制轮廓
+                    ctx.beginPath();
+                    ctx.strokeStyle = seg.is_target ? this.colors.primary : this.colors.secondary;
+                    ctx.lineWidth = seg.is_target ? 3 : 2;
+                    
+                    // 添加发光效果
+                    if (seg.is_target) {
+                        ctx.shadowColor = this.colors.primary;
+                        ctx.shadowBlur = 10;
+                    }
+                    
+                    const points = this.scalePoints(seg.contour, width, height);
+                    ctx.moveTo(points[0][0], points[0][1]);
+                    points.forEach(p => ctx.lineTo(p[0], p[1]));
+                    ctx.closePath();
+                    ctx.stroke();
+                    
+                    ctx.shadowBlur = 0;
+                    
+                    // 如果是目标，绘制中心标记
+                    if (seg.is_target) {
+                        const center = this.getContourCenter(points);
+                        this.drawTargetMarker(center[0], center[1]);
+                        
+                        // 绘制面积信息
+                        ctx.font = this.fonts.small;
+                        ctx.fillStyle = this.colors.primary;
+                        ctx.fillText(`Area: ${seg.area}`, center[0] + 20, center[1] - 20);
+                    }
+                }
+            });
+        }
+        
+        // 绘制状态文字
+        if (this.data.auto_lock && this.data.auto_lock.active) {
+            this.drawStatusText(
+                `目标锁定中 Locking Target`,
+                `${this.data.auto_lock.remaining.toFixed(1)}s`,
+                width / 2,
+                100,
+                this.colors.warning
+            );
+        } else {
+            this.drawStatusText(
+                '扫描中 Scanning',
+                '等待检测目标 Waiting for target',
+                width / 2,
+                100,
+                this.colors.primary
+            );
+        }
+    }
+    
+    drawFlashMode(width, height) {
+        const ctx = this.ctx;
+        
+        if (this.data.flash && this.data.flash.mask_contour) {
+            const progress = this.data.flash.progress || 0;
+            const alpha = 0.3 + 0.4 * (0.5 * (1 + Math.sin(progress * 2 * Math.PI - Math.PI/2)));
+            
+            // 绘制闪烁轮廓
+            ctx.beginPath();
+            ctx.strokeStyle = this.colors.accent;
+            ctx.lineWidth = 4;
+            ctx.globalAlpha = alpha;
+            
+            const points = this.scalePoints(this.data.flash.mask_contour, width, height);
+            ctx.moveTo(points[0][0], points[0][1]);
+            points.forEach(p => ctx.lineTo(p[0], p[1]));
+            ctx.closePath();
+            
+            // 填充
+            ctx.fillStyle = this.colors.accent + '33';
+            ctx.fill();
+            ctx.stroke();
+            
+            ctx.globalAlpha = 1;
+            
+            // 绘制锁定动画
+            const center = this.getContourCenter(points);
+            this.drawLockAnimation(center[0], center[1], progress);
+        }
+        
+        this.drawStatusText(
+            '正在锁定目标 Locking Target',
+            '准备追踪 Preparing to track',
+            width / 2,
+            100,
+            this.colors.accent
+        );
+    }
+    
+    drawTrackMode(width, height) {
+        const ctx = this.ctx;
+        const tracking = this.data.tracking;
+        
+        if (!tracking) return;
+        
+        // 绘制追踪多边形
+        if (tracking.polygon && tracking.polygon.length > 0) {
+            ctx.beginPath();
+            ctx.strokeStyle = this.colors.accent;
+            ctx.lineWidth = 3;
+            ctx.shadowColor = this.colors.accent;
+            ctx.shadowBlur = 15;
+            
+            const points = this.scalePoints(tracking.polygon, width, height);
+            ctx.moveTo(points[0][0], points[0][1]);
+            points.forEach(p => ctx.lineTo(p[0], p[1]));
+            ctx.closePath();
+            ctx.stroke();
+            
+            ctx.shadowBlur = 0;
+            
+            // 绘制中心点
+            if (tracking.center) {
+                const center = this.scalePoint(tracking.center, width, height);
+                ctx.fillStyle = this.colors.accent;
+                ctx.beginPath();
+                ctx.arc(center[0], center[1], 6, 0, Math.PI * 2);
+                ctx.fill();
+            }
+        }
+        
+        // 绘制进度条
+        this.drawProgressBars(tracking, width, height);
+        
+        // 绘制引导文字
+        if (tracking.guidance) {
+            const guidanceText = {
+                '向前靠近': 'Move Closer',
+                '后退': 'Move Back',
+                '保持': 'Hold Position'
+            };
+            
+            this.drawStatusText(
+                tracking.guidance,
+                guidanceText[tracking.guidance] || '',
+                width / 2,
+                height - 100,
+                this.colors.warning
+            );
+        }
+        
+        // 如果触发了重新锁定
+        if (tracking.relock_triggered) {
+            this.drawStatusText(
+                '已根据周边检测刷新追踪',
+                'Tracking refreshed by peripheral detection',
+                width / 2,
+                170,
+                this.colors.accent
+            );
+        }
+    }
+    
+    drawHand(handData, width, height) {
+        const ctx = this.ctx;
+        
+        if (!handData.landmarks) return;
+        
+        // 缩放坐标
+        const landmarks = handData.landmarks.map(p => 
+            this.scalePoint([p[0], p[1]], width, height)
+        );
+        
+        // 绘制手部连接线
+        ctx.strokeStyle = this.colors.secondary;
+        ctx.lineWidth = 2;
+        ctx.globalAlpha = 0.8;
+        
+        // MediaPipe手部连接定义
+        const connections = [
+            [0, 1], [1, 2], [2, 3], [3, 4],  // 拇指
+            [0, 5], [5, 6], [6, 7], [7, 8],  // 食指
+            [5, 9], [9, 10], [10, 11], [11, 12], // 中指
+            [9, 13], [13, 14], [14, 15], [15, 16], // 无名指
+            [13, 17], [17, 18], [18, 19], [19, 20], // 小指
+            [0, 17] // 手腕连接
+        ];
+        
+        connections.forEach(([start, end]) => {
+            ctx.beginPath();
+            ctx.moveTo(landmarks[start][0], landmarks[start][1]);
+            ctx.lineTo(landmarks[end][0], landmarks[end][1]);
+            ctx.stroke();
+        });
+        
+        // 绘制关键点
+        landmarks.forEach((point, i) => {
+            ctx.fillStyle = this.colors.secondary;
+            ctx.beginPath();
+            ctx.arc(point[0], point[1], 3, 0, Math.PI * 2);
+            ctx.fill();
+        });
+        
+        ctx.globalAlpha = 1;
+        
+        // 绘制握持评分
+        if (handData.grasp_score !== undefined) {
+            ctx.font = this.fonts.body;
+            ctx.fillStyle = this.colors.text;
+            ctx.fillText(
+                `握持评分 Grasp Score: ${handData.grasp_score.toFixed(2)}`,
+                20,
+                80
+            );
+        }
+    }
+    
+    drawProgressBars(tracking, width, height) {
+        const ctx = this.ctx;
+        const barWidth = width * 0.25;
+        const barHeight = 12;
+        const x = 20;
+        const y = height - 80;
+        
+        // 对齐进度条
+        this.drawProgressBar(
+            x, y - 30,
+            barWidth, barHeight,
+            tracking.align_score || 0,
+            '对齐 Alignment',
+            this.colors.primary
+        );
+        
+        // 距离进度条
+        this.drawProgressBar(
+            x, y,
+            barWidth, barHeight,
+            tracking.range_score || 0,
+            `距离 Distance (≈1)`,
+            this.colors.secondary
+        );
+        
+        // 显示比率
+        if (tracking.ratio !== null && tracking.ratio !== undefined) {
+            ctx.font = this.fonts.small;
+            ctx.fillStyle = this.colors.text;
+            ctx.fillText(
+                `面积比 Ratio: ${tracking.ratio.toFixed(2)}`,
+                x + barWidth + 20,
+                y + 8
+            );
+        }
+    }
+    
+    drawProgressBar(x, y, width, height, value, label, color) {
+        const ctx = this.ctx;
+        
+        // 背景
+        ctx.fillStyle = this.colors.surface;
+        ctx.fillRect(x, y, width, height);
+        
+        // 边框
+        ctx.strokeStyle = color + '44';
+        ctx.lineWidth = 1;
+        ctx.strokeRect(x, y, width, height);
+        
+        // 填充
+        const fillWidth = width * Math.max(0, Math.min(1, value));
+        const gradient = ctx.createLinearGradient(x, y, x + fillWidth, y);
+        gradient.addColorStop(0, color + 'AA');
+        gradient.addColorStop(1, color);
+        ctx.fillStyle = gradient;
+        ctx.fillRect(x, y, fillWidth, height);
+        
+        // 标签
+        ctx.font = this.fonts.small;
+        ctx.fillStyle = this.colors.textMuted;
+        ctx.fillText(label, x, y - 5);
+    }
+    
+    drawStats(width, height) {
+        const ctx = this.ctx;
+        
+        // FPS显示
+        ctx.font = this.fonts.body;
+        ctx.fillStyle = this.colors.accent;
+        ctx.fillText(`FPS: ${(this.data.fps || 0).toFixed(1)}`, 20, 40);
+        
+        // 模式显示
+        const modeText = {
+            'SEGMENT': '分割模式 Segmentation',
+            'FLASH': '锁定模式 Locking',
+            'TRACK': '追踪模式 Tracking'
+        };
+        
+        ctx.fillStyle = this.colors.text;
+        ctx.fillText(modeText[this.data.mode] || this.data.mode, width - 200, 40);
+    }
+    
+    // 辅助函数
+    scalePoint(point, width, height) {
+        if (!this.data.frame_size) return [0, 0];
+        return [
+            point[0] * width / this.data.frame_size.width,
+            point[1] * height / this.data.frame_size.height
+        ];
+    }
+    
+    scalePoints(points, width, height) {
+        return points.map(p => this.scalePoint(p, width, height));
+    }
+    
+    getContourCenter(points) {
+        const sum = points.reduce((acc, p) => [acc[0] + p[0], acc[1] + p[1]], [0, 0]);
+        return [sum[0] / points.length, sum[1] / points.length];
+    }
+    
+    drawTargetMarker(x, y) {
+        const ctx = this.ctx;
+        ctx.strokeStyle = this.colors.primary;
+        ctx.lineWidth = 2;
+        
+        // 十字准星
+        const size = 20;
+        ctx.beginPath();
+        ctx.moveTo(x - size, y);
+        ctx.lineTo(x - size/2, y);
+        ctx.moveTo(x + size/2, y);
+        ctx.lineTo(x + size, y);
+        ctx.moveTo(x, y - size);
+        ctx.lineTo(x, y - size/2);
+        ctx.moveTo(x, y + size/2);
+        ctx.lineTo(x, y + size);
+        ctx.stroke();
+        
+        // 圆圈
+        ctx.beginPath();
+        ctx.arc(x, y, 10, 0, Math.PI * 2);
+        ctx.stroke();
+    }
+    
+    drawLockAnimation(x, y, progress) {
+        const ctx = this.ctx;
+        const radius = 30 + 10 * Math.sin(progress * Math.PI * 2);
+        
+        ctx.strokeStyle = this.colors.accent;
+        ctx.lineWidth = 3;
+        ctx.globalAlpha = 0.8;
+        
+        // 旋转的锁定环
+        ctx.save();
+        ctx.translate(x, y);
+        ctx.rotate(progress * Math.PI * 2);
+        
+        // 绘制4个弧形
+        for (let i = 0; i < 4; i++) {
+            ctx.beginPath();
+            ctx.arc(0, 0, radius, i * Math.PI/2 + 0.1, i * Math.PI/2 + Math.PI/2 - 0.1);
+            ctx.stroke();
+        }
+        
+        ctx.restore();
+        ctx.globalAlpha = 1;
+    }
+    
+    drawStatusText(mainText, subText, x, y, color) {
+        const ctx = this.ctx;
+        
+        // 主文字（中文）
+        ctx.font = this.fonts.subtitle;
+        ctx.fillStyle = color;
+        ctx.textAlign = 'center';
+        ctx.fillText(mainText, x, y);
+        
+        // 副文字（英文）
+        if (subText) {
+            ctx.font = this.fonts.small;
+            ctx.fillStyle = this.colors.textMuted;
+            ctx.fillText(subText, x, y + 20);
+        }
+        
+        ctx.textAlign = 'left';
+    }
+}
+
+// 初始化
+window.addEventListener('DOMContentLoaded', () => {
+    window.visualizer = new TechVisualizer('tech-canvas');
+}); 
\ No newline at end of file
diff --git a/sync_recorder.py b/sync_recorder.py
new file mode 100644
index 0000000..e455188
--- /dev/null
+++ b/sync_recorder.py
@@ -0,0 +1,322 @@
+# sync_recorder.py
+# 同步录制ESP32视频流和音频指令
+# 自动确保视频和音频时间轴对齐
+
+import os
+import cv2
+import wave
+import numpy as np
+import threading
+import time
+from datetime import datetime
+from collections import deque
+import struct
+
+class SyncRecorder:
+    """同步录制器 - 视频+音频时间对齐"""
+    
+    def __init__(self, output_dir="recordings", fps=15.0):
+        """
+        初始化录制器
+        :param output_dir: 输出目录
+        :param fps: 视频帧率（默认15fps）
+        """
+        self.output_dir = output_dir
+        self.fps = fps
+        self.frame_duration = 1.0 / fps  # 每帧时长（秒）
+        
+        # 创建输出目录
+        os.makedirs(output_dir, exist_ok=True)
+        
+        # 录制状态
+        self.is_recording = False
+        self.start_time = None
+        
+        # 视频写入器
+        self.video_writer = None
+        self.video_path = None
+        self.last_frame = None
+        self.frame_count = 0
+        
+        # 音频写入器
+        self.audio_writer = None
+        self.audio_path = None
+        self.audio_buffer = bytearray()
+        self.last_audio_time = 0.0
+        
+        # 音频参数（ESP32标准：16kHz, 16bit, Mono）
+        self.sample_rate = 16000
+        self.sample_width = 2  # 16bit = 2 bytes
+        self.channels = 1
+        
+        # 线程安全
+        self.lock = threading.Lock()
+        
+        # 性能监控
+        self.frames_written = 0
+        self.audio_bytes_written = 0
+        self.last_log_time = time.time()
+        
+        print(f"[RECORDER] 录制器初始化完成 - FPS={fps}, 输出目录={output_dir}")
+    
+    def start_recording(self):
+        """开始新的录制会话"""
+        if self.is_recording:
+            print("[RECORDER] 警告：已经在录制中")
+            return False
+        
+        # 生成文件名（时间戳）
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.video_path = os.path.join(self.output_dir, f"video_{timestamp}.avi")
+        self.audio_path = os.path.join(self.output_dir, f"audio_{timestamp}.wav")
+        
+        # 重置状态
+        self.start_time = time.time()
+        self.last_audio_time = 0.0
+        self.frame_count = 0
+        self.frames_written = 0
+        self.audio_bytes_written = 0
+        self.audio_buffer.clear()
+        self.last_frame = None
+        
+        # 初始化音频文件
+        try:
+            self.audio_writer = wave.open(self.audio_path, 'wb')
+            self.audio_writer.setnchannels(self.channels)
+            self.audio_writer.setsampwidth(self.sample_width)
+            self.audio_writer.setframerate(self.sample_rate)
+        except Exception as e:
+            print(f"[RECORDER] 音频文件初始化失败: {e}")
+            return False
+        
+        self.is_recording = True
+        print(f"[RECORDER] 开始录制")
+        print(f"  视频: {self.video_path}")
+        print(f"  音频: {self.audio_path}")
+        return True
+    
+    def add_frame(self, jpeg_data: bytes):
+        """
+        添加一帧视频（原始JPEG数据）
+        :param jpeg_data: JPEG格式的图像数据
+        """
+        if not self.is_recording:
+            return
+        
+        try:
+            with self.lock:
+                # 解码JPEG
+                arr = np.frombuffer(jpeg_data, dtype=np.uint8)
+                frame = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+                
+                if frame is None:
+                    print(f"[RECORDER] 警告：帧解码失败")
+                    return
+                
+                # 首帧：初始化视频写入器
+                if self.video_writer is None:
+                    height, width = frame.shape[:2]
+                    # 使用XVID编码器（Windows兼容性好）
+                    fourcc = cv2.VideoWriter_fourcc(*'XVID')
+                    self.video_writer = cv2.VideoWriter(
+                        self.video_path, 
+                        fourcc, 
+                        self.fps, 
+                        (width, height)
+                    )
+                    
+                    if not self.video_writer.isOpened():
+                        print(f"[RECORDER] 错误：视频写入器初始化失败")
+                        self.is_recording = False
+                        return
+                    
+                    print(f"[RECORDER] 视频写入器初始化：{width}x{height} @ {self.fps}fps")
+                
+                # 写入帧
+                self.video_writer.write(frame)
+                self.frame_count += 1
+                self.frames_written += 1
+                self.last_frame = frame
+                
+                # 计算当前视频时长（秒）
+                current_video_time = self.frame_count * self.frame_duration
+                
+                # 音频同步：填充静音到视频时长
+                self._sync_audio_to_video(current_video_time)
+                
+                # 性能日志（每10秒）
+                now = time.time()
+                if now - self.last_log_time > 10.0:
+                    elapsed = now - self.start_time
+                    avg_fps = self.frames_written / elapsed if elapsed > 0 else 0
+                    audio_duration = self.audio_bytes_written / (self.sample_rate * self.sample_width)
+                    print(f"[RECORDER] 录制中 - 帧数={self.frames_written}, "
+                          f"实际FPS={avg_fps:.1f}, "
+                          f"视频时长={current_video_time:.1f}s, "
+                          f"音频时长={audio_duration:.1f}s")
+                    self.last_log_time = now
+                    
+        except Exception as e:
+            print(f"[RECORDER] 添加帧失败: {e}")
+            import traceback
+            traceback.print_exc()
+    
+    def add_audio(self, pcm_data: bytes, text: str = ""):
+        """
+        添加音频数据（PCM 16bit）
+        :param pcm_data: PCM格式音频数据
+        :param text: 语音文本（用于日志）
+        """
+        if not self.is_recording:
+            return
+        
+        try:
+            with self.lock:
+                # 当前视频时长
+                current_video_time = self.frame_count * self.frame_duration
+                
+                # 在添加音频前，先填充静音到视频时长
+                self._sync_audio_to_video(current_video_time)
+                
+                # 写入实际音频
+                self.audio_writer.writeframes(pcm_data)
+                audio_duration = len(pcm_data) / (self.sample_rate * self.sample_width)
+                self.last_audio_time = current_video_time + audio_duration
+                self.audio_bytes_written += len(pcm_data)
+                
+                if text:
+                    print(f"[RECORDER] 录制语音: {text[:30]}... (时间={current_video_time:.2f}s, 时长={audio_duration:.2f}s)")
+                    
+        except Exception as e:
+            print(f"[RECORDER] 添加音频失败: {e}")
+    
+    def _sync_audio_to_video(self, video_time: float):
+        """
+        同步音频到视频时长（填充静音）
+        :param video_time: 当前视频时长（秒）
+        """
+        # 计算需要填充的静音时长
+        silence_duration = video_time - self.last_audio_time
+        
+        if silence_duration > 0.01:  # 大于10ms才填充
+            # 生成静音数据
+            silence_samples = int(silence_duration * self.sample_rate)
+            silence_bytes = silence_samples * self.sample_width
+            silence_data = b'\x00' * silence_bytes
+            
+            # 写入静音
+            self.audio_writer.writeframes(silence_data)
+            self.audio_bytes_written += len(silence_data)
+            self.last_audio_time = video_time
+    
+    def stop_recording(self):
+        """停止录制并保存文件"""
+        if not self.is_recording:
+            return
+        
+        print("[RECORDER] 正在保存录制文件...")
+        self.is_recording = False
+        
+        with self.lock:
+            # 最后一次音频同步
+            try:
+                if self.frame_count > 0:
+                    final_video_time = self.frame_count * self.frame_duration
+                    self._sync_audio_to_video(final_video_time)
+            except Exception as e:
+                print(f"[RECORDER] 最终音频同步失败: {e}")
+            
+            # 关闭视频写入器（关键步骤）
+            if self.video_writer is not None:
+                try:
+                    print("[RECORDER] 正在关闭视频写入器...")
+                    self.video_writer.release()
+                    print("[RECORDER] 视频写入器已关闭")
+                except Exception as e:
+                    print(f"[RECORDER] 关闭视频写入器失败: {e}")
+                finally:
+                    self.video_writer = None
+            
+            # 关闭音频写入器
+            if self.audio_writer is not None:
+                try:
+                    print("[RECORDER] 正在关闭音频写入器...")
+                    self.audio_writer.close()
+                    print("[RECORDER] 音频写入器已关闭")
+                except Exception as e:
+                    print(f"[RECORDER] 关闭音频写入器失败: {e}")
+                finally:
+                    self.audio_writer = None
+            
+            # 统计信息
+            try:
+                elapsed = time.time() - self.start_time if self.start_time else 0
+                video_duration = self.frame_count * self.frame_duration
+                audio_duration = self.audio_bytes_written / (self.sample_rate * self.sample_width)
+                
+                print(f"\n{'='*60}")
+                print(f"[RECORDER] 录制完成")
+                print(f"{'='*60}")
+                print(f"  总耗时: {elapsed:.1f}秒")
+                print(f"\n  视频: {self.video_path}")
+                print(f"    - 帧数: {self.frames_written}")
+                print(f"    - 时长: {video_duration:.2f}秒")
+                if elapsed > 0:
+                    print(f"    - 平均FPS: {self.frames_written/elapsed:.1f}")
+                print(f"\n  音频: {self.audio_path}")
+                print(f"    - 数据量: {self.audio_bytes_written/1024:.1f} KB")
+                print(f"    - 时长: {audio_duration:.2f}秒")
+                print(f"\n  时间差: {abs(video_duration - audio_duration):.3f}秒")
+                
+                # 验证文件
+                if os.path.exists(self.video_path):
+                    video_size = os.path.getsize(self.video_path) / 1024 / 1024
+                    print(f"  视频文件大小: {video_size:.2f} MB ✓")
+                else:
+                    print(f"  ⚠ 警告：视频文件未生成")
+                
+                if os.path.exists(self.audio_path):
+                    audio_size = os.path.getsize(self.audio_path) / 1024
+                    print(f"  音频文件大小: {audio_size:.2f} KB ✓")
+                else:
+                    print(f"  ⚠ 警告：音频文件未生成")
+                
+                print(f"{'='*60}\n")
+            except Exception as e:
+                print(f"[RECORDER] 显示统计信息失败: {e}")
+
+
+# 全局录制器实例
+_global_recorder = None
+_recorder_lock = threading.Lock()
+
+def get_recorder():
+    """获取全局录制器实例"""
+    global _global_recorder
+    with _recorder_lock:
+        if _global_recorder is None:
+            _global_recorder = SyncRecorder()
+        return _global_recorder
+
+def start_recording():
+    """启动录制"""
+    recorder = get_recorder()
+    return recorder.start_recording()
+
+def stop_recording():
+    """停止录制"""
+    recorder = get_recorder()
+    recorder.stop_recording()
+
+def record_frame(jpeg_data: bytes):
+    """记录一帧（供外部调用）"""
+    recorder = get_recorder()
+    if recorder.is_recording:
+        recorder.add_frame(jpeg_data)
+
+def record_audio(pcm_data: bytes, text: str = ""):
+    """记录音频（供外部调用）"""
+    recorder = get_recorder()
+    if recorder.is_recording:
+        recorder.add_audio(pcm_data, text)
+
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..ea4c564
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,715 @@
+<!doctype html>
+<html lang="zh-CN">
+
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width,initial-scale=1" />
+  <title>NaviGlass 导盲系统可视化</title>
+  <link rel="icon" type="image/png" href="/static/favicon.png">
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+  <style>
+    :root {
+      --bg: #0b0f14;
+      --card: #121821;
+      --text: #e6edf3;
+      --muted: #9fb0c3;
+      --ok: #7ee787;
+      --err: #ff8080;
+      --warn: #fbbf24;
+      --line: #1f2937;
+      --panel: rgba(18, 24, 33, .75);
+      --primary: #3b82f6;
+      --primary-glow: rgba(59, 130, 246, 0.4);
+    }
+
+    * {
+      box-sizing: border-box
+    }
+
+    body {
+      margin: 0;
+      background: linear-gradient(180deg, #0b0f14, #0b0f14 60%, #0e1621);
+      color: var(--text);
+      font: 16px/1.6 system-ui, -apple-system, Segoe UI, Roboto, "Noto Sans", "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei", Arial;
+    }
+
+    /* 两栏布局 - Day 20 修复：防止滚动条 */
+    html,
+    body {
+      height: 100%;
+      overflow: hidden;
+    }
+
+    .app {
+      display: grid;
+      grid-template-columns: 1fr 700px;
+      gap: 16px;
+      height: 100vh;
+      padding: 16px;
+      box-sizing: border-box;
+      overflow: hidden;
+    }
+
+    .stage {
+      position: relative;
+      border: 1px solid var(--line);
+      border-radius: 14px;
+      background: var(--card);
+      /* Day 20 修复: 统一使用 --card 背景色 */
+      box-shadow: 0 10px 30px rgba(0, 0, 0, .25);
+      overflow: hidden;
+    }
+
+    .canvas-wrap,
+    #canvas {
+      position: absolute;
+      inset: 1px;
+      /* Day 20 修复: 留出边框空间 */
+      width: calc(100% - 2px);
+      height: calc(100% - 2px);
+      display: block;
+      background: #0a1017;
+      /* 视频区域保持深色 */
+      border-radius: 13px;
+      /* 比外框小 1px */
+      z-index: 10
+    }
+
+    /* 右上角：参数 + 日志 */
+    .tri-panels {
+      position: absolute;
+      right: 12px;
+      top: 12px;
+      z-index: 30;
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 12px;
+      max-width: min(720px, 60vw);
+    }
+
+    .box {
+      border: 2px dashed rgba(255, 255, 255, .35);
+      border-radius: 12px;
+      padding: 10px;
+      background: var(--panel);
+      backdrop-filter: blur(8px) saturate(130%);
+      box-shadow: inset 0 0 0 1px rgba(255, 255, 255, .06);
+    }
+
+    .box h4 {
+      margin: 0 0 6px 0;
+      font-size: 12px;
+      color: #ffd769
+    }
+
+    .kv {
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      border-top: 1px dashed rgba(255, 255, 255, .15);
+      padding-top: 6px;
+      margin-top: 6px;
+      color: var(--muted);
+      font-size: 12px
+    }
+
+    /* 左上角 IMU 浮窗 */
+    .imu-float {
+      position: absolute;
+      left: 12px;
+      top: 12px;
+      z-index: 35;
+      width: 600px;
+      /* 恢复合理尺寸 */
+      border: 1px solid rgba(255, 255, 255, 0.15);
+      border-radius: 12px;
+      background: rgba(18, 24, 33, 0.9);
+      backdrop-filter: blur(12px) saturate(140%);
+      box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
+      padding: 12px;
+      transition: all 0.3s ease;
+    }
+
+    .imu-float:hover {
+      box-shadow: 0 12px 40px rgba(0, 0, 0, 0.5);
+    }
+
+    /* Day 20: 优化折叠状态 - 只显示标题和按钮 */
+    .imu-float.collapsed {
+      width: 180px;
+      height: 40px;
+      padding: 8px 12px;
+      overflow: hidden;
+    }
+
+    .imu-float.collapsed .imu-header {
+      margin: 0;
+      font-size: 12px;
+    }
+
+    .imu-float.collapsed .imu-row,
+    .imu-float.collapsed #imu_top_status {
+      display: none !important;
+    }
+
+    .imu-toggle {
+      position: absolute;
+      top: 4px;
+      right: 8px;
+      z-index: 10;
+      width: 32px;
+      height: 32px;
+      border-radius: 8px;
+      background: rgba(255, 255, 255, .1);
+      border: none;
+      cursor: pointer;
+      color: var(--muted);
+      font-size: 16px;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+      transition: all 0.2s ease;
+    }
+
+    .imu-toggle:hover {
+      background: rgba(255, 255, 255, .2);
+      color: var(--text);
+    }
+
+    .imu-row {
+      display: grid;
+      grid-template-columns: 1.5fr 1fr;
+      /* 左侧 3D 模型稍大 */
+      gap: 12px;
+      align-items: stretch;
+      min-height: 200px;
+    }
+
+    #imu_view {
+      position: relative;
+      min-height: 200px;
+      background: #0a1017;
+      border-radius: 10px;
+    }
+
+    #imu_hud {
+      border-radius: 10px;
+      background: rgba(18, 24, 33, 0.8);
+      border: none;
+      padding: 8px;
+      overflow: visible;
+      /* Day 20: 允许内容自然显示 */
+      min-width: 150px;
+      /* Day 20: 缩小最小宽度 */
+      max-height: 300px;
+    }
+
+    #imu_hud::-webkit-scrollbar {
+      width: 8px;
+      /* 滚动条的宽度 */
+    }
+
+    #imu_hud::-webkit-scrollbar-thumb {
+      background-color: #2a6df4;
+      /* 滑块的颜色 */
+      border-radius: 4px;
+      /* 滑块的圆角 */
+    }
+
+    #imu_hud::-webkit-scrollbar-track {
+      background-color: #111a2e;
+      /* 滚动条轨道的颜色 */
+      border-radius: 4px;
+      /* 滚动条轨道的圆角 */
+    }
+
+    #imu_top_status {
+      position: absolute;
+      top: 10px;
+      right: 10px;
+      z-index: 2
+    }
+
+    #imu_top_status .badge {
+      background: rgba(0, 0, 0, .35);
+      border-color: #2d3b50
+    }
+
+    /* 角标 + 当前指令（角标在浮窗下） */
+    .badge-tag {
+      position: absolute;
+      left: 12px;
+      top: calc(12px + 300px + 28px);
+      z-index: 20;
+      background: linear-gradient(120deg, rgba(47, 134, 255, .9), rgba(26, 88, 255, .9));
+      color: #fff;
+      padding: 6px 10px;
+      font-weight: 700;
+      font-size: 12px;
+      border-radius: 999px;
+      box-shadow: 0 6px 16px rgba(47, 134, 255, .35)
+    }
+
+    .command {
+      position: absolute;
+      left: 50%;
+      bottom: 46px;
+      transform: translateX(-50%);
+      z-index: 25;
+      display: flex;
+      gap: 10px;
+      align-items: center;
+      padding: 12px 18px;
+      border-radius: 999px;
+      background: rgba(18, 24, 33, .75);
+      border: 1px solid rgba(255, 255, 255, .14);
+      backdrop-filter: blur(8px) saturate(140%);
+      box-shadow: 0 12px 28px rgba(0, 0, 0, .6)
+    }
+
+    .dot {
+      width: 9px;
+      height: 9px;
+      border-radius: 50%;
+      background: #2f86ff;
+      box-shadow: 0 0 16px #2f86ff
+    }
+
+    /* 右侧聊天（左右气泡）- Day 20 修复：移除 height:100vh */
+    .chat {
+      display: flex;
+      flex-direction: column;
+      border: 1px solid var(--line);
+      border-radius: 14px;
+      background: var(--card);
+      box-shadow: 0 10px 30px rgba(0, 0, 0, 0.25);
+      overflow: hidden;
+      /* 不再使用 height:100vh，让 grid 自动分配高度 */
+    }
+
+    .chat-head {
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      padding: 12px 14px;
+      border-bottom: 1px solid var(--line)
+    }
+
+    .badges {
+      display: flex;
+      gap: 8px;
+      flex-wrap: wrap
+    }
+
+    /* Day 20: 美化状态 badge */
+    .badge {
+      font-size: 11px;
+      font-weight: 500;
+      padding: 5px 10px;
+      border-radius: 8px;
+      border: 1px solid rgba(255, 255, 255, .1);
+      color: var(--muted);
+      background: rgba(255, 255, 255, .05);
+      display: flex;
+      align-items: center;
+      gap: 6px;
+      transition: all 0.2s ease;
+    }
+
+    .badge::before {
+      content: '';
+      width: 6px;
+      height: 6px;
+      border-radius: 50%;
+      background: currentColor;
+      flex-shrink: 0;
+    }
+
+    .badge.ok {
+      color: var(--ok);
+      border-color: rgba(126, 231, 135, .3);
+      background: rgba(126, 231, 135, .1);
+    }
+
+    .badge.ok::before {
+      animation: pulse 2s infinite;
+    }
+
+    .badge.err {
+      color: var(--err);
+      border-color: rgba(255, 128, 128, .3);
+      background: rgba(255, 128, 128, .1);
+    }
+
+    .badge.connecting {
+      color: var(--warn);
+      border-color: rgba(251, 191, 36, .3);
+      background: rgba(251, 191, 36, .1);
+    }
+
+    .badge.connecting::before {
+      animation: blink 1s infinite;
+    }
+
+    @keyframes pulse {
+
+      0%,
+      100% {
+        opacity: 1;
+        transform: scale(1);
+      }
+
+      50% {
+        opacity: .6;
+        transform: scale(1.2);
+      }
+    }
+
+    @keyframes blink {
+
+      0%,
+      100% {
+        opacity: 1;
+      }
+
+      50% {
+        opacity: .2;
+      }
+    }
+
+    .chat-list {
+      flex: 1;
+      overflow: auto;
+      padding: 12px;
+      display: flex;
+      flex-direction: column;
+      gap: 10px;
+      min-height: 0;
+      /* 确保 .chat-list 不会被外部内容撑大 */
+    }
+
+    .live {
+      padding: 12px;
+      border: 1px dashed #2d3b50;
+      border-radius: 12px;
+      background: #0c121a
+    }
+
+    .live h2 {
+      margin: 0 0 8px 0;
+      font-size: 14px;
+      color: var(--muted);
+      font-weight: 600
+    }
+
+    .partial {
+      font-size: 20px;
+      min-height: 2.2em;
+      letter-spacing: .2px
+    }
+
+    .finals {
+      padding: 12px;
+      border: 1px solid #1f2937;
+      border-radius: 12px;
+      background: #0c121a;
+      display: flex;
+      flex-direction: column;
+      flex: 1;
+      /* 使用 flex: 1 而不是 flex-grow: 1 */
+      min-height: 0;
+      /* 允许收缩到小于内容高度 */
+    }
+
+    .finals h2 {
+      margin: 0 0 8px 0;
+      font-size: 14px;
+      color: var(--muted);
+      font-weight: 600
+    }
+
+    .finals ul {
+      list-style: none;
+      margin: 0;
+      padding: 0;
+      display: flex;
+      flex-direction: column;
+      gap: 10px
+    }
+
+    .bubble {
+      max-width: 82%;
+      padding: 10px 12px;
+      border-radius: 14px;
+      border: 1px solid #1f2937
+    }
+
+    .from-bot {
+      align-self: flex-start;
+      background: #0d1729
+    }
+
+    .from-me {
+      align-self: flex-end;
+      background: #12263a
+    }
+
+    .controls {
+      display: flex;
+      gap: 8px
+    }
+
+    button {
+      background: linear-gradient(135deg, #2563eb, #1d4ed8);
+      border: none;
+      color: #fff;
+      border-radius: 10px;
+      padding: 8px 14px;
+      font-weight: 600;
+      cursor: pointer;
+      transition: all 0.2s ease;
+      box-shadow: 0 2px 8px rgba(37, 99, 235, .3);
+    }
+
+    button:hover {
+      transform: translateY(-1px);
+      box-shadow: 0 4px 12px rgba(37, 99, 235, .4);
+    }
+
+    button:active {
+      transform: translateY(0);
+    }
+
+    .ghost {
+      background: transparent;
+      border: 1px solid #304057;
+      color: #9fb0c3;
+      box-shadow: none;
+    }
+
+    .ghost:hover {
+      background: rgba(255, 255, 255, .05);
+      border-color: #405570;
+    }
+
+    /* 隐藏但保留：校准/滑杆（供 main.js 使用） */
+    .hidden-controls {
+      display: none
+    }
+
+    /* Day 20: 改善移动端适配 */
+    @media (max-width:1100px) {
+      .app {
+        grid-template-columns: 1fr;
+        height: auto;
+        padding: 8px;
+        overflow: visible;
+      }
+
+      /* 关键修复：确保视频区域有足够高度 */
+      .stage {
+        min-height: 50vh;
+        height: 50vh;
+        position: relative;
+      }
+
+      .chat {
+        height: 50vh;
+        min-height: 400px;
+      }
+
+      /* 移动端 IMU 浮窗缩小并移到右下角 */
+      .imu-float {
+        width: 160px;
+        left: auto;
+        right: 8px;
+        top: auto;
+        bottom: 8px;
+        padding: 8px;
+      }
+
+      /* 移动端默认折叠状态 */
+      .imu-float:not(.expanded) {
+        width: 160px;
+        height: 40px;
+        overflow: hidden;
+      }
+
+      .imu-float:not(.expanded) .imu-row,
+      .imu-float:not(.expanded) #imu_top_status {
+        display: none !important;
+      }
+
+      .imu-row {
+        grid-template-columns: 1fr;
+      }
+
+      #imu_view {
+        min-height: 200px;
+      }
+    }
+
+    @media (max-width:600px) {
+      .badges {
+        gap: 4px;
+      }
+
+      .badge {
+        font-size: 10px;
+        padding: 4px 8px;
+      }
+
+      .controls {
+        flex-wrap: wrap;
+      }
+
+      button {
+        padding: 6px 10px;
+        font-size: 12px;
+      }
+
+      /* 手机端视频区域更大 */
+      .stage {
+        min-height: 45vh;
+        height: 45vh;
+      }
+
+      .chat {
+        height: 45vh;
+        min-height: 300px;
+      }
+
+      /* 手机端 IMU 浮窗始终小巧 */
+      .imu-float {
+        width: 140px;
+        right: 6px;
+        bottom: 6px;
+        left: auto;
+        top: auto;
+        padding: 6px;
+      }
+
+      .imu-float.collapsed, 
+      .imu-float:not(.expanded) {
+        width: 140px;
+        height: 36px;
+      }
+    }
+  </style>
+</head>
+
+<body>
+
+  <div class="app">
+    <!-- 左侧主舞台 -->
+    <section class="stage">
+      <div class="canvas-wrap"><canvas id="canvas"></canvas></div>
+
+      <!-- 左上角：IMU 浮窗（横向）Day 20 优化 -->
+      <div class="imu-float" id="imuFloat">
+        <button class="imu-toggle" id="imuToggle" title="折叠/展开">−</button>
+        <div class="imu-header" style="font-size:12px;color:#61dafb;margin-bottom:8px;font-weight:600;">📊 IMU 姿态可视化
+        </div>
+        <div class="imu-row">
+          <div id="imu_view"></div>
+          <div id="imu_hud"><!-- JS 会把“IMU 实时数据面板”插到这里 --></div>
+        </div>
+        <div id="imu_top_status" style="display: none;">
+          <span class="badge">UDP: <code>12345</code></span>
+          <span class="badge">Browser WS: <code>/ws</code></span>
+          <span class="badge" id="imu_ws_state">connecting…</span>
+        </div>
+      </div>
+
+
+
+    </section>
+
+    <!-- 右侧聊天 -->
+    <aside class="chat">
+      <div class="chat-head">
+        <div class="badges">
+          <span id="camStatus" class="badge">Camera: connecting…</span>
+          <span id="asrStatus" class="badge">ASR: connecting…</span>
+          <span id="fps" class="badge">FPS: --</span>
+        </div>
+        <div class="controls">
+          <button class="ghost" id="btnClear">清空 Final</button>
+          <button id="btnReconnect">重连</button>
+        </div>
+      </div>
+      <div class="chat-list">
+        <div class="live">
+          <h2>流式识别（Partial）</h2>
+          <div class="partial" id="partial">（等待音频…）</div>
+        </div>
+        <div class="finals">
+          <h2>最终文本（Final）</h2>
+          <ul id="finalList"></ul>
+        </div>
+      </div>
+    </aside>
+  </div>
+
+  <!-- 隐藏但保留：校准/滑杆（供 main.js 使用） -->
+  <div class="hidden-controls">
+    <button id="btn_zero"></button><button id="btn_reset"></button><button id="btn_bias_now"></button>
+    <input id="auto_rezero" type="checkbox" checked /><input id="auto_bias" type="checkbox" checked />
+    <input id="use_proj" type="checkbox" checked /><input id="freeze_still" type="checkbox" checked />
+    <select id="medn">
+      <option>3</option>
+      <option selected>5</option>
+      <option>7</option>
+    </select>
+    <select id="ang_ema">
+      <option value="0">0</option>
+      <option value="0.15" selected>0.15</option>
+      <option value="0.30">0.30</option>
+      <option value="0.5">0.5</option>
+    </select>
+    <select id="grav_beta">
+      <option value="0.95">0.95</option>
+      <option value="0.97">0.97</option>
+      <option value="0.98" selected>0.98</option>
+      <option value="0.99">0.99</option>
+    </select>
+    <select id="yaw_db">
+      <option value="0.05">0.05</option>
+      <option value="0.08" selected>0.08</option>
+      <option value="0.15">0.15</option>
+      <option value="0.30">0.30</option>
+    </select>
+    <select id="still_w">
+      <option value="0.4" selected>0.4</option>
+      <option value="0.6">0.6</option>
+      <option value="1.0">1.0</option>
+    </select>
+    <select id="yaw_leak">
+      <option value="0">0</option>
+      <option value="0.1">0.1</option>
+      <option value="0.2" selected>0.2</option>
+      <option value="0.5">0.5</option>
+    </select>
+    <input id="roll_sl" type="range"><span id="roll_val"></span>
+    <input id="pitch_sl" type="range"><span id="pitch_val"></span>
+    <input id="yaw_sl" type="range"><span id="yaw_val"></span>
+    <input id="gx_sl" type="range"><span id="gx_val"></span>
+    <input id="gy_sl" type="range"><span id="gy_val"></span>
+    <input id="gz_sl" type="range"><span id="gz_val"></span>
+    <input id="ax_sl" type="range"><span id="ax_val"></span>
+    <input id="ay_sl" type="range"><span id="ay_val"></span>
+    <input id="az_sl" type="range"><span id="az_val"></span>
+  </div>
+
+  <!-- three.js importmap -->
+  <script type="importmap">{
+  "imports": { "three": "https://unpkg.com/three@0.155.0/build/three.module.js" }
+}</script>
+
+  <!-- 主脚本 -->
+  <script type="module" src="/static/main.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/trafficlight_detection.py b/trafficlight_detection.py
new file mode 100644
index 0000000..f2da50e
--- /dev/null
+++ b/trafficlight_detection.py
@@ -0,0 +1,627 @@
+# -*- coding: utf-8 -*-
+"""
+红绿灯检测模块 - 独立工作流版本
+基于YOLO模型实时检测红绿灯状态，并通过语音反馈
+可以通过语音命令"检测红绿灯"、"停止检测"来控制
+"""
+
+import os
+import time
+import threading
+import cv2
+import numpy as np
+from ultralytics import YOLO
+import bridge_io
+from audio_player import play_voice_text  # 使用统一的语音播放接口
+import logging
+
+# Day 20: TensorRT 模型加载工具
+from model_utils import get_best_model_path
+
+logger = logging.getLogger(__name__)
+
+# ========= 配置参数 =========
+# Day 20: 优先使用 TensorRT 引擎
+YOLO_MODEL_PATH = get_best_model_path(os.path.join(os.path.dirname(__file__), "model", "trafficlight.pt"))
+
+# ========= 显示参数 =========
+CONF_THRESHOLD = 0.25  # 置信度阈值
+FONT_SIZE = 20
+STROKE_WIDTH = 3
+
+# ========= 语音播报参数 =========
+TTS_INTERVAL_SEC = 2.0  # 语音播报间隔（避免频繁播报）
+ENABLE_TTS = False  # 【禁用】红绿灯检测模块不播报，由 workflow_crossstreet.py 统一处理
+
+# ========= 线程控制 =========
+_detection_thread = None
+_stop_event = None
+_detection_running = False
+
+# ========= 单帧处理模式（新增）=========
+_model = None  # 全局模型实例
+_last_tts_ts = 0.0
+_last_detected_light = None
+_detection_history = []
+
+# ========= 前端配色（BGR） =========
+FRONTEND_COLORS = {
+    "text": (230, 237, 243),   # 白色文字
+    "red": (0, 0, 255),        # 红色
+    "yellow": (0, 255, 255),   # 黄色
+    "green": (0, 255, 0),      # 绿色
+    "muted": (159, 176, 195),  # 灰色
+}
+
+# 红绿灯状态到颜色的映射
+LIGHT_COLORS = {
+    "stop": FRONTEND_COLORS["red"],
+    "countdown_go": FRONTEND_COLORS["yellow"],
+    "go": FRONTEND_COLORS["green"],
+}
+
+# 【修正】红绿灯状态到中文的映射
+# 只包含真正的红绿灯类别，排除斑马线(crossing)和空白
+LIGHT_NAMES = {
+    "stop": "红灯",              # 机动车红灯
+    "go": "绿灯",                # 机动车绿灯
+    "countdown_go": "黄灯",      # 绿灯倒计时（用黄灯提示）
+    "countdown_stop": "红灯",    # 红灯倒计时
+}
+
+# 红绿灯状态到语音文件的映射
+LIGHT_VOICE_MAP = {
+    "stop": "红灯",              # → voice/红灯.WAV
+    "go": "绿灯",                # → voice/绿灯.WAV
+    "countdown_go": "黄灯",      # → voice/黄灯.WAV（绿灯倒计时用黄灯提示）
+    "countdown_stop": "红灯",    # → voice/红灯.WAV
+}
+
+# 需要过滤的类别（不检测、不显示）
+FILTERED_CLASSES = {
+    "crossing",          # 斑马线（不需要）
+    "blank",            # 空白
+    "countdown_blank"   # 倒计时空白
+}
+
+# UI文本管理
+_UI_LINE = 0
+_UI_H = 0
+_UI_TR_LINE = 0
+_UI_TOP_MARGIN = 12
+_UI_RIGHT_MARGIN = 12
+UNIFIED_FONT_PX = 12
+
+def ui_reset_overlay(img_h: int):
+    """每帧调用一次，重置叠加行计数"""
+    global _UI_LINE, _UI_H, _UI_TR_LINE
+    _UI_LINE = 0
+    _UI_TR_LINE = 0
+    _UI_H = int(img_h)
+
+def _ui_next_y_top(font_size: int) -> int:
+    """返回右上角下一行的y坐标"""
+    global _UI_TR_LINE
+    line_gap = max(4, int(font_size * 0.25))
+    y_top = _UI_TOP_MARGIN + (_UI_TR_LINE * (font_size + line_gap))
+    _UI_TR_LINE += 1
+    return y_top
+
+# ======== 中文文本绘制 ========
+_PIL_OK = False
+_FONT_PATH = None
+
+def _init_font():
+    global _PIL_OK, _FONT_PATH
+    try:
+        from PIL import ImageFont
+        _PIL_OK = True
+    except Exception:
+        _PIL_OK = False
+        return
+    candidates = [
+        # Linux 中文字体路径 (Ubuntu/Debian)
+        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
+        "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
+    ]
+    for p in candidates:
+        if os.path.exists(p):
+            _FONT_PATH = p
+            return
+    _PIL_OK = False
+
+_init_font()
+
+def draw_text_cn(img_bgr, text, xy, font_size=20, color=(255,255,255), ui_hint=True):
+    """统一的中文文本绘制"""
+    color = (255, 255, 255)
+    font_size = int(UNIFIED_FONT_PX)
+
+    H, W = img_bgr.shape[:2]
+    y_top = _ui_next_y_top(font_size) if ui_hint else xy[1]
+    tw = th = 0
+    font_obj = None
+
+    if _PIL_OK and _FONT_PATH:
+        try:
+            from PIL import Image, ImageDraw, ImageFont
+            font_obj = ImageFont.truetype(_FONT_PATH, font_size)
+            bbox = ImageDraw.Draw(Image.new('RGB', (1,1))).textbbox((0,0), text, font=font_obj)
+            tw = max(1, bbox[2] - bbox[0])
+            th = max(1, bbox[3] - bbox[1])
+        except Exception:
+            pass
+    
+    if _PIL_OK and _FONT_PATH and font_obj is not None:
+        try:
+            from PIL import Image, ImageDraw
+            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+            pil_img = Image.fromarray(img_rgb)
+            draw = ImageDraw.Draw(pil_img)
+            if ui_hint:
+                x = max(8, W - _UI_RIGHT_MARGIN - tw)
+                y = y_top
+            else:
+                x = xy[0]
+                y = xy[1]
+            draw.text((x, y), text, fill=color, font=font_obj)
+            img_bgr[:] = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
+            return
+        except Exception:
+            pass
+    
+    # OpenCV 回退
+    if tw <= 0 or th <= 0:
+        scale = font_size/24.0
+        (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, 2)
+    if ui_hint:
+        x = max(8, W - _UI_RIGHT_MARGIN - int(tw))
+        y_baseline = int(y_top + th)
+    else:
+        x = xy[0]
+        y_baseline = xy[1] + int(th)
+    cv2.putText(img_bgr, text, (x, y_baseline), cv2.FONT_HERSHEY_SIMPLEX, font_size/24.0, color, 2, cv2.LINE_AA)
+
+def main(headless: bool = True, stop_event=None):
+    """
+    红绿灯检测主函数
+    
+    参数:
+        headless: 是否无头模式（不显示OpenCV窗口）
+        stop_event: threading.Event，用于停止检测
+    """
+    
+    print("[TRAFFIC] 加载 YOLO 红绿灯检测模型...")
+    try:
+        model = YOLO(YOLO_MODEL_PATH)
+        print(f"[TRAFFIC] 模型加载成功: {YOLO_MODEL_PATH}")
+    except Exception as e:
+        print(f"[TRAFFIC] 模型加载失败: {e}")
+        return
+
+    # 获取类别名称
+    class_names = model.names if hasattr(model, 'names') else {}
+    print(f"[TRAFFIC] 模型类别: {class_names}")
+
+    # 状态跟踪
+    last_tts_ts = 0.0
+    last_detected_light = None
+    fps_hist = []
+    
+    # 【优化】状态稳定性判断 - 使用多数表决而非连续帧
+    detection_history = []  # 保存最近N帧的检测结果
+    HISTORY_SIZE = 5        # 保存最近5帧
+    MAJORITY_THRESHOLD = 3  # 5帧中至少3帧相同才认为稳定
+    
+    # 【新增】帧统计
+    frame_count = 0
+    frame_received_count = 0
+    frame_none_count = 0
+    last_frame_log_time = time.time()
+
+    print("[TRAFFIC] 等待 ESP32 画面...")
+
+    try:
+        while True:
+            # 检查停止事件
+            if stop_event and stop_event.is_set():
+                print("[TRAFFIC] 停止事件触发，退出检测")
+                break
+
+            # 【优化】从bridge_io获取原始BGR帧 - 增加超时时间
+            frame = bridge_io.wait_raw_bgr(timeout_sec=2.0)  # 从0.5秒增加到2秒
+            
+            frame_count += 1
+            
+            if frame is None:
+                frame_none_count += 1
+                # 每3秒打印一次帧统计
+                current_time = time.time()
+                if current_time - last_frame_log_time > 3.0:
+                    print(f"[TRAFFIC] 帧统计: 总={frame_count}, 收到={frame_received_count}, "
+                          f"丢失={frame_none_count}, 丢失率={frame_none_count/frame_count*100:.1f}%")
+                    last_frame_log_time = current_time
+                
+                if headless:
+                    cv2.waitKey(1)
+                continue
+            
+            frame_received_count += 1
+
+            # 重置UI叠加
+            H, W = frame.shape[:2]
+            ui_reset_overlay(H)
+
+            vis = frame.copy()
+            t_now = time.time()
+
+            # 【优化】YOLO推理 - 添加计时
+            inference_start = time.time()
+            results = model(frame, conf=CONF_THRESHOLD, verbose=False)
+            inference_time = (time.time() - inference_start) * 1000
+            
+            # 监控推理时间
+            if inference_time > 100:
+                print(f"[TRAFFIC] WARNING: 推理耗时 {inference_time:.0f}ms")
+
+            # 处理检测结果
+            detected_light = None
+            max_conf = 0.0
+
+            if results and len(results) > 0:
+                r = results[0]
+                if r.boxes is not None and len(r.boxes) > 0:
+                    # 【过滤】遍历所有检测框，找到置信度最高的红绿灯（排除斑马线）
+                    for box in r.boxes:
+                        cls_id = int(box.cls[0])
+                        conf = float(box.conf[0])
+                        class_name = class_names.get(cls_id, f"class_{cls_id}")
+                        class_name_lower = class_name.lower()
+                        
+                        # 跳过不需要的类别
+                        if class_name_lower in FILTERED_CLASSES:
+                            continue
+                        
+                        if conf > max_conf:
+                            max_conf = conf
+                            detected_light = class_name_lower
+
+                    # 【过滤】绘制检测框（只绘制红绿灯）
+                    for box in r.boxes:
+                        cls_id = int(box.cls[0])
+                        conf = float(box.conf[0])
+                        class_name = class_names.get(cls_id, f"class_{cls_id}")
+                        class_name_lower = class_name.lower()
+                        
+                        # 跳过不需要的类别
+                        if class_name_lower in FILTERED_CLASSES:
+                            continue
+                        
+                        # 获取边界框坐标
+                        x1, y1, x2, y2 = map(int, box.xyxy[0])
+                        
+                        # 确定颜色
+                        color = LIGHT_COLORS.get(class_name_lower, FRONTEND_COLORS["text"])
+                        
+                        # 绘制边界框
+                        cv2.rectangle(vis, (x1, y1), (x2, y2), color, STROKE_WIDTH)
+                        
+                        # 绘制中文标签（使用PIL）
+                        label = f"{LIGHT_NAMES.get(class_name.lower(), class_name)}: {conf:.2f}"
+                        
+                        if _PIL_OK and _FONT_PATH:
+                            try:
+                                from PIL import Image, ImageDraw, ImageFont
+                                # 使用较大的字体绘制标签
+                                font_obj = ImageFont.truetype(_FONT_PATH, 20)
+                                # 转换为PIL图像
+                                img_rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
+                                pil_img = Image.fromarray(img_rgb)
+                                draw = ImageDraw.Draw(pil_img)
+                                
+                                # 计算文本尺寸
+                                bbox = draw.textbbox((0, 0), label, font=font_obj)
+                                text_w = bbox[2] - bbox[0]
+                                text_h = bbox[3] - bbox[1]
+                                
+                                # 标签位置
+                                label_y = max(y1 - text_h - 8, text_h)
+                                
+                                # 绘制背景矩形
+                                bg_x1 = x1
+                                bg_y1 = label_y - text_h - 4
+                                bg_x2 = x1 + text_w + 8
+                                bg_y2 = label_y + 4
+                                cv2.rectangle(vis, (bg_x1, bg_y1), (bg_x2, bg_y2), color, -1)
+                                
+                                # 重新转换（因为矩形是用OpenCV画的）
+                                img_rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
+                                pil_img = Image.fromarray(img_rgb)
+                                draw = ImageDraw.Draw(pil_img)
+                                
+                                # 【删除】绘制文字
+                                # draw.text((x1 + 4, label_y - text_h), label, fill=(0, 0, 0), font=font_obj)
+                                
+                                # 转换回OpenCV格式
+                                vis[:] = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
+                            except Exception as e:
+                                # 【删除】PIL失败时的文本标签
+                                pass
+                        else:
+                            # 【删除】文本标签
+                            pass
+
+            # 【优化】状态稳定性判断：使用多数表决而非连续帧
+            detection_history.append(detected_light)
+            if len(detection_history) > HISTORY_SIZE:
+                detection_history.pop(0)
+            
+            # 判断状态是否稳定（多数表决）
+            stable_light = None
+            if len(detection_history) >= MAJORITY_THRESHOLD:
+                # 统计最近N帧中每个状态出现的次数
+                valid_detections = [d for d in detection_history if d and d in LIGHT_NAMES]
+                if len(valid_detections) >= MAJORITY_THRESHOLD:
+                    # 找出现次数最多的状态
+                    from collections import Counter
+                    counter = Counter(valid_detections)
+                    most_common = counter.most_common(1)
+                    if most_common and most_common[0][1] >= MAJORITY_THRESHOLD:
+                        stable_light = most_common[0][0]
+                        # 打印调试信息
+                        if frame_received_count % 30 == 0:
+                            print(f"[TRAFFIC] 检测历史: {detection_history[-5:]}, 稳定状态: {stable_light}")
+            
+            # 【禁用语音播报】只检测不播报，由调用者（workflow_crossstreet.py）统一处理语音
+            # 只更新状态跟踪
+            if stable_light:
+                # 状态改变时记录（但不播报）
+                if stable_light != last_detected_light:
+                    last_detected_light = stable_light
+                    print(f"[TRAFFIC] 检测到稳定状态改变: {LIGHT_NAMES[stable_light]}（不播报）")
+                    last_tts_ts = t_now
+                # 超过间隔时间，更新时间戳（但不播报）
+                elif (t_now - last_tts_ts) > TTS_INTERVAL_SEC:
+                    print(f"[TRAFFIC] 稳定状态持续: {LIGHT_NAMES[stable_light]}（不播报）")
+                    last_tts_ts = t_now
+
+            # 【删除】显示当前检测状态
+            # if detected_light and detected_light in LIGHT_NAMES:
+            #     status_text = f"检测: {LIGHT_NAMES[detected_light]} ({max_conf:.2f})"
+            #     color = LIGHT_COLORS[detected_light]
+            # else:
+            #     status_text = "检测: 无"
+            #     color = FRONTEND_COLORS["muted"]
+            # draw_text_cn(vis, status_text, (10, 40), font_size=18, color=color)
+            
+            # 【删除】显示稳定状态
+            # if stable_light:
+            #     stable_text = f"稳定状态: {LIGHT_NAMES[stable_light]}"
+            #     stable_color = LIGHT_COLORS[stable_light]
+            # else:
+            #     stable_text = f"稳定状态: 等待中 ({len(detection_history)}/{HISTORY_SIZE})"
+            #     stable_color = FRONTEND_COLORS["muted"]
+            # draw_text_cn(vis, stable_text, (10, 60), font_size=18, color=stable_color)
+
+            # 【删除】FPS计算和显示
+            # fps_hist.append(t_now)
+            # if len(fps_hist) > 30:
+            #     fps_hist.pop(0)
+            # fps = 0.0 if len(fps_hist) < 2 else (len(fps_hist)-1)/(fps_hist[-1]-fps_hist[0])
+            # draw_text_cn(vis, f"FPS: {fps:.1f}", (10, 20), font_size=16, color=FRONTEND_COLORS["text"])
+
+            # 发送可视化结果到前端
+            bridge_io.send_vis_bgr(vis)
+
+            # 非headless模式下显示窗口
+            if not headless:
+                cv2.imshow("Traffic Light Detection", vis)
+                key = cv2.waitKey(1) & 0xFF
+                if key in (27, ord('q')):
+                    break
+            else:
+                cv2.waitKey(1)
+
+    except Exception as e:
+        print(f"[TRAFFIC] 检测过程出错: {e}")
+    finally:
+        if not headless:
+            cv2.destroyAllWindows()
+        print("[TRAFFIC] 红绿灯检测已停止")
+
+
+def start_detection():
+    """启动红绿灯检测（在后台线程中运行）"""
+    global _detection_thread, _stop_event, _detection_running
+    
+    if _detection_running:
+        print("[TRAFFIC] 红绿灯检测已在运行中")
+        return False
+    
+    _stop_event = threading.Event()
+    _detection_thread = threading.Thread(
+        target=main,
+        args=(True, _stop_event),  # headless=True, stop_event
+        daemon=True,
+        name="TrafficLightDetection"
+    )
+    _detection_thread.start()
+    _detection_running = True
+    print("[TRAFFIC] 红绿灯检测已启动（后台线程）")
+    return True
+
+def stop_detection():
+    """停止红绿灯检测"""
+    global _detection_thread, _stop_event, _detection_running
+    
+    if not _detection_running:
+        print("[TRAFFIC] 红绿灯检测未运行")
+        return False
+    
+    print("[TRAFFIC] 正在停止红绿灯检测...")
+    if _stop_event:
+        _stop_event.set()
+    
+    if _detection_thread:
+        _detection_thread.join(timeout=2.0)
+        _detection_thread = None
+    
+    _stop_event = None
+    _detection_running = False
+    print("[TRAFFIC] 红绿灯检测已停止")
+    return True
+
+def is_detection_running():
+    """检查红绿灯检测是否正在运行"""
+    return _detection_running
+
+def init_model():
+    """初始化YOLO模型（单帧处理模式）"""
+    global _model
+    if _model is not None:
+        print("[TRAFFIC] 模型已加载")
+        return True
+    
+    try:
+        print("[TRAFFIC] 加载 YOLO 红绿灯检测模型...")
+        _model = YOLO(YOLO_MODEL_PATH)
+        print(f"[TRAFFIC] 模型加载成功: {YOLO_MODEL_PATH}")
+        class_names = _model.names if hasattr(_model, 'names') else {}
+        print(f"[TRAFFIC] 模型类别: {class_names}")
+        return True
+    except Exception as e:
+        print(f"[TRAFFIC] 模型加载失败: {e}")
+        _model = None
+        return False
+
+def process_single_frame(image: np.ndarray, ui_broadcast_callback=None) -> dict:
+    """
+    处理单帧图像（主线程模式，避免掉帧）
+    参数：
+        image: 输入图像
+        ui_broadcast_callback: 前端广播回调函数（用于显示红绿灯状态）
+    返回：{'vis_image': 可视化图像, 'detected_light': 检测到的灯, 'stable_light': 稳定状态}
+    """
+    global _model, _last_tts_ts, _last_detected_light, _detection_history
+    
+    if _model is None:
+        if not init_model():
+            return {'vis_image': image, 'detected_light': None, 'stable_light': None}
+    
+    vis = image.copy()
+    t_now = time.time()
+    
+    # YOLO推理
+    results = _model(image, conf=CONF_THRESHOLD, verbose=False)
+    
+    # 处理检测结果
+    detected_light = None
+    max_conf = 0.0
+    class_names = _model.names if hasattr(_model, 'names') else {}
+    
+    if results and len(results) > 0:
+        r = results[0]
+        if r.boxes is not None and len(r.boxes) > 0:
+            # 遍历所有检测框，找到置信度最高的红绿灯（过滤掉crossing等）
+            for box in r.boxes:
+                cls_id = int(box.cls[0])
+                conf = float(box.conf[0])
+                class_name = class_names.get(cls_id, f"class_{cls_id}")
+                class_name_lower = class_name.lower()
+                
+                # 【过滤】跳过不需要的类别（斑马线、空白等）
+                if class_name_lower in FILTERED_CLASSES:
+                    continue
+                
+                if conf > max_conf:
+                    max_conf = conf
+                    detected_light = class_name_lower
+            
+            # 绘制检测框（只绘制红绿灯，不绘制斑马线）
+            for box in r.boxes:
+                cls_id = int(box.cls[0])
+                conf = float(box.conf[0])
+                class_name = class_names.get(cls_id, f"class_{cls_id}")
+                class_name_lower = class_name.lower()
+                
+                # 【过滤】跳过不需要的类别
+                if class_name_lower in FILTERED_CLASSES:
+                    continue
+                
+                # 获取边界框坐标
+                x1, y1, x2, y2 = map(int, box.xyxy[0])
+                
+                # 确定颜色
+                color = LIGHT_COLORS.get(class_name_lower, FRONTEND_COLORS["text"])
+                
+                # 绘制边界框
+                cv2.rectangle(vis, (x1, y1), (x2, y2), color, STROKE_WIDTH)
+    
+    # 【放宽】状态稳定性判断（多数表决） - 降低要求
+    _detection_history.append(detected_light)
+    if len(_detection_history) > 5:
+        _detection_history.pop(0)
+    
+    stable_light = None
+    if len(_detection_history) >= 2:  # 从3帧降低到2帧
+        from collections import Counter
+        valid_detections = [d for d in _detection_history if d and d in LIGHT_NAMES]
+        if len(valid_detections) >= 2:  # 从3帧降低到2帧
+            counter = Counter(valid_detections)
+            most_common = counter.most_common(1)
+            if most_common and most_common[0][1] >= 2:  # 从3次降低到2次
+                stable_light = most_common[0][0]
+    
+    # 【调试】打印检测结果（已禁用）
+    # print(f"[TRAFFIC-DEBUG] detected={detected_light}, stable={stable_light}, history={_detection_history}")
+    
+    # 【禁用语音播报】只检测不播报，由 workflow_crossstreet.py 统一处理语音
+    # 只更新状态跟踪，不调用 play_voice_text
+    if stable_light:
+        # 更新状态跟踪（用于检测状态变化）
+        if stable_light != _last_detected_light:
+            _last_detected_light = stable_light
+            print(f"[TRAFFIC] 检测到稳定状态改变: {LIGHT_NAMES[stable_light]}（不播报）")
+            _last_tts_ts = t_now
+        elif (t_now - _last_tts_ts) > TTS_INTERVAL_SEC:
+            # 超过间隔时间，更新时间戳（但不播报）
+            print(f"[TRAFFIC] 稳定状态持续: {LIGHT_NAMES[stable_light]}（不播报）")
+            _last_tts_ts = t_now
+    
+    # 【删除】状态文本显示
+    # if detected_light and detected_light in LIGHT_NAMES:
+    #     status_text = f"{LIGHT_NAMES[detected_light]} ({max_conf:.2f})"
+    # else:
+    #     status_text = "无检测"
+    # 
+    # if stable_light:
+    #     stable_text = f"稳定: {LIGHT_NAMES[stable_light]}"
+    # else:
+    #     stable_text = f"等待稳定 ({len(_detection_history)}/5)"
+    # 
+    # # 添加简单的文本显示
+    # cv2.putText(vis, status_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
+    # cv2.putText(vis, stable_text, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)
+    
+    return {
+        'vis_image': vis,
+        'detected_light': detected_light,
+        'stable_light': stable_light
+    }
+
+def reset_detection_state():
+    """重置检测状态"""
+    global _last_tts_ts, _last_detected_light, _detection_history
+    _last_tts_ts = 0.0
+    _last_detected_light = None
+    _detection_history = []
+    print("[TRAFFIC] 检测状态已重置")
+
+if __name__ == "__main__":
+    main(headless=False)
+
+
+
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..9fef671
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,307 @@
+# utils.py
+# -*- coding: utf-8 -*-
+import cv2
+import numpy as np
+import logging
+
+logger = logging.getLogger(__name__)
+
+# 物品名称映射
+ITEM_TO_CLASS_MAP = {
+    "红牛": "Red_Bull",
+    "AD钙奶": "AD_milk",
+    "ad钙奶": "AD_milk",
+    "钙奶": "AD_milk",
+}
+
+# 英文类别名到中文的映射
+_OBSTACLE_NAME_CN = {
+    'person': '人',
+    'bicycle': '自行车',
+    'car': '车',
+    'motorcycle': '摩托车',
+    'bus': '公交车',
+    'truck': '卡车',
+    'animal': '动物',
+    'scooter': '电瓶车',
+    'stroller': '婴儿车',
+    'dog': '狗',
+}
+
+# 动态类别名称列表
+DYNAMIC_CLASS_NAMES = {'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'animal', 'dog'}
+
+def extract_english_label(item_cn: str) -> tuple:
+    """
+    提取中文物品名称对应的英文标签
+    :param item_cn: 中文物品名称
+    :return: (英文标签, 来源)
+    """
+    # 先查找本地映射
+    if item_cn in ITEM_TO_CLASS_MAP:
+        return ITEM_TO_CLASS_MAP[item_cn], "local"
+    
+    # 如果没有找到，返回原始名称
+    return item_cn, "direct"
+
+def _to_cn_obstacle(name: str) -> str:
+    """
+    将英文障碍物名称转换为中文
+    :param name: 英文名称
+    :return: 中文名称
+    """
+    try:
+        key = (name or '').strip().lower()
+        return _OBSTACLE_NAME_CN.get(key, '障碍物')
+    except Exception:
+        return '障碍物'
+
+def estimate_global_affine(prev_gray, curr_gray, mask=None):
+    """
+    估计两帧之间的全局仿射变换
+    :param prev_gray: 前一帧灰度图
+    :param curr_gray: 当前帧灰度图
+    :param mask: 可选的掩码，只在掩码区域内计算
+    :return: (仿射矩阵, 内点数)
+    """
+    try:
+        # 提取特征点
+        detector = cv2.ORB_create(nfeatures=500)
+        kp1, des1 = detector.detectAndCompute(prev_gray, mask)
+        kp2, des2 = detector.detectAndCompute(curr_gray, mask)
+        
+        if des1 is None or des2 is None or len(kp1) < 10 or len(kp2) < 10:
+            return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32), 0
+        
+        # 匹配特征点
+        matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        matches = matcher.match(des1, des2)
+        
+        if len(matches) < 4:
+            return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32), 0
+        
+        # 提取匹配的点对
+        src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
+        dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
+        
+        # 使用RANSAC估计仿射变换
+        M, inliers = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC, 
+                                                 ransacReprojThreshold=3.0)
+        
+        if M is None:
+            return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32), 0
+        
+        inlier_count = np.sum(inliers) if inliers is not None else 0
+        return M, inlier_count
+        
+    except Exception as e:
+        logger.warning(f"estimate_global_affine failed: {e}")
+        return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32), 0
+
+def warp_mask(mask, M, output_shape):
+    """
+    使用仿射变换对掩码进行变换
+    :param mask: 输入掩码
+    :param M: 2x3的仿射变换矩阵
+    :param output_shape: 输出形状 (width, height)
+    :return: 变换后的掩码
+    """
+    try:
+        if mask is None or M is None:
+            return None
+        
+        W, H = output_shape
+        warped = cv2.warpAffine(mask, M, (W, H), 
+                               flags=cv2.INTER_NEAREST,
+                               borderMode=cv2.BORDER_CONSTANT,
+                               borderValue=0)
+        return warped
+        
+    except Exception as e:
+        logger.warning(f"warp_mask failed: {e}")
+        return None
+
+def estimate_translation_flow(prev_gray, curr_gray, mask=None):
+    """
+    估计两帧之间的平移光流
+    :param prev_gray: 前一帧灰度图
+    :param curr_gray: 当前帧灰度图
+    :param mask: 可选的掩码
+    :return: (中位光流幅度, 平移矩阵)
+    """
+    try:
+        # 计算稀疏光流
+        corners = cv2.goodFeaturesToTrack(prev_gray, maxCorners=100, 
+                                         qualityLevel=0.3, minDistance=7, 
+                                         mask=mask)
+        
+        if corners is None or len(corners) < 10:
+            return 0.0, np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+        
+        # 计算光流
+        next_pts, status, _ = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, 
+                                                       corners, None)
+        
+        # 筛选有效点
+        valid_old = corners[status == 1]
+        valid_new = next_pts[status == 1]
+        
+        if len(valid_old) < 5:
+            return 0.0, np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+        
+        # 计算位移
+        flow_vectors = valid_new - valid_old
+        flow_magnitudes = np.linalg.norm(flow_vectors, axis=1)
+        median_flow = np.median(flow_magnitudes)
+        
+        # 估计平均平移
+        mean_translation = np.mean(flow_vectors, axis=0)
+        M = np.array([[1, 0, mean_translation[0]], 
+                      [0, 1, mean_translation[1]]], dtype=np.float32)
+        
+        return median_flow, M
+        
+    except Exception as e:
+        logger.warning(f"estimate_translation_flow failed: {e}")
+        return 0.0, np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+
+def is_stationary_frame(prev_gray, curr_gray, mask=None, threshold=0.35):
+    """
+    判断用户是否静止
+    :param prev_gray: 前一帧灰度图
+    :param curr_gray: 当前帧灰度图
+    :param mask: 可选的掩码
+    :param threshold: 静止判定阈值
+    :return: True表示静止，False表示运动
+    """
+    try:
+        median_flow, _ = estimate_translation_flow(prev_gray, curr_gray, mask)
+        return median_flow < threshold
+    except:
+        return False
+
+def compute_approach_metrics(prev_obstacles, curr_obstacles, M, H, W):
+    """
+    计算障碍物的接近度量
+    :param prev_obstacles: 前一帧障碍物列表
+    :param curr_obstacles: 当前帧障碍物列表
+    :param M: 仿射变换矩阵
+    :param H: 图像高度
+    :param W: 图像宽度
+    :return: 接近度量列表
+    """
+    metrics = []
+    
+    for curr_obs in curr_obstacles:
+        # 寻找最佳匹配的前一帧障碍物
+        best_match = None
+        best_iou = 0.0
+        
+        curr_mask = curr_obs.get('mask')
+        if curr_mask is None:
+            metrics.append(None)
+            continue
+        
+        for prev_obs in prev_obstacles:
+            prev_mask = prev_obs.get('mask')
+            if prev_mask is None:
+                continue
+            
+            # 将前一帧掩码变换到当前帧
+            warped_prev = warp_mask(prev_mask, M, (W, H))
+            if warped_prev is None:
+                continue
+            
+            # 计算IoU
+            intersection = np.logical_and(curr_mask > 0, warped_prev > 0).sum()
+            union = np.logical_or(curr_mask > 0, warped_prev > 0).sum()
+            iou = intersection / union if union > 0 else 0.0
+            
+            if iou > best_iou:
+                best_iou = iou
+                best_match = prev_obs
+        
+        if best_match is None:
+            metrics.append(None)
+            continue
+        
+        # 计算度量
+        curr_area = curr_obs.get('area', 0)
+        prev_area = best_match.get('area', 0)
+        area_growth = (curr_area - prev_area) / prev_area if prev_area > 0 else 0.0
+        
+        curr_bottom_y = curr_obs.get('bottom_y_ratio', 0)
+        prev_bottom_y = best_match.get('bottom_y_ratio', 0)
+        v_forward = curr_bottom_y - prev_bottom_y
+        
+        metrics.append({
+            'area_growth': area_growth,
+            'v_forward': v_forward,
+            'iou': best_iou
+        })
+    
+    return metrics
+
+def compute_risk_scores(obstacles, prev_obstacles, M, path_mask, image_shape,
+                       stop_th=0.6, avoid_th=0.56):
+    """
+    计算障碍物的风险评分
+    :param obstacles: 当前障碍物列表
+    :param prev_obstacles: 前一帧障碍物列表
+    :param M: 仿射变换矩阵
+    :param path_mask: 路径掩码
+    :param image_shape: 图像形状
+    :param stop_th: 停止阈值
+    :param avoid_th: 避让阈值
+    :return: (评分后的障碍物列表, 是否需要停止, 是否需要避让, 可视化元素)
+    """
+    H, W = image_shape[:2]
+    has_stop = False
+    has_avoid = False
+    risk_vis = []
+    
+    # 计算接近度量
+    metrics = compute_approach_metrics(prev_obstacles, obstacles, M, H, W)
+    
+    for obs, met in zip(obstacles, metrics):
+        risk_score = 0.0
+        
+        if met is not None:
+            # 基于接近速度和面积增长计算风险
+            if met['v_forward'] > 0.004:  # 向下移动
+                risk_score += 0.3
+            if met['area_growth'] > 0.01:  # 面积增长
+                risk_score += 0.3
+        
+        # 基于距离的风险
+        bottom_y = obs.get('bottom_y_ratio', 0)
+        area_ratio = obs.get('area_ratio', 0)
+        
+        if bottom_y > 0.8 or area_ratio > 0.15:
+            risk_score += 0.3
+        
+        # 动态物体额外风险
+        name_lower = str(obs.get('name', '')).lower()
+        if name_lower in DYNAMIC_CLASS_NAMES:
+            risk_score *= 1.2
+        
+        obs['risk_score'] = risk_score
+        
+        # 更新标志
+        if risk_score >= stop_th:
+            has_stop = True
+        elif risk_score >= avoid_th:
+            has_avoid = True
+        
+        # 添加风险可视化
+        if risk_score > 0.3:
+            risk_color = "rgba(255, 0, 0, 0.3)" if risk_score >= stop_th else "rgba(255, 165, 0, 0.3)"
+            risk_vis.append({
+                "type": "risk_indicator",
+                "score": risk_score,
+                "color": risk_color,
+                "position": [int(obs.get('center_x', W/2)), int(obs.get('center_y', H/2))]
+            })
+    
+    return obstacles, has_stop, has_avoid, risk_vis
+
diff --git a/voice/map.zh-CN.json b/voice/map.zh-CN.json
new file mode 100644
index 0000000..8f2b4d1
--- /dev/null
+++ b/voice/map.zh-CN.json
@@ -0,0 +1,714 @@
+{
+  "丢失路径，重新搜索。": {
+    "files": [
+      "丢失路径，重新搜索。.wav"
+    ],
+    "duration_ms": 1653
+  },
+  "保持直行": {
+    "files": [
+      "保持直行.wav"
+    ],
+    "duration_ms": 587
+  },
+  "保持直行，靠近盲道。": {
+    "files": [
+      "保持直行，靠近盲道。.wav"
+    ],
+    "duration_ms": 1493
+  },
+  "切换到盲道导航。": {
+    "files": [
+      "切换到盲道导航。.wav"
+    ],
+    "duration_ms": 1013
+  },
+  "到达转弯处，向右平移。": {
+    "files": [
+      "到达转弯处，向右平移。.wav"
+    ],
+    "duration_ms": 1973
+  },
+  "到达转弯处，向左平移。": {
+    "files": [
+      "到达转弯处，向左平移。.wav"
+    ],
+    "duration_ms": 1680
+  },
+  "前方有人，停一下。": {
+    "files": [
+      "前方有人，停一下。.wav"
+    ],
+    "duration_ms": 1547
+  },
+  "前方有人，注意避让。": {
+    "files": [
+      "前方有人，注意避让。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "前方有公交车，停一下。": {
+    "files": [
+      "前方有公交车，停一下。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "前方有动物，停一下。": {
+    "files": [
+      "前方有动物，停一下。.wav"
+    ],
+    "duration_ms": 1333
+  },
+  "前方有卡车，停一下。": {
+    "files": [
+      "前方有卡车，停一下。.wav"
+    ],
+    "duration_ms": 1440
+  },
+  "前方有婴儿车，停一下。": {
+    "files": [
+      "前方有婴儿车，停一下。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "前方有左转弯，继续直行。": {
+    "files": [
+      "前方有左转弯，继续直行。.wav"
+    ],
+    "duration_ms": 1920
+  },
+  "前方有摩托车，停一下。": {
+    "files": [
+      "前方有摩托车，停一下。.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "前方有狗，停一下。": {
+    "files": [
+      "前方有狗，停一下。.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "前方有电瓶车，停一下。": {
+    "files": [
+      "前方有电瓶车，停一下。.wav"
+    ],
+    "duration_ms": 1653
+  },
+  "前方有自行车，停一下。": {
+    "files": [
+      "前方有自行车，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "前方有车，停一下。": {
+    "files": [
+      "前方有车，停一下。.wav"
+    ],
+    "duration_ms": 804
+  },
+  "前方有车，注意避让。": {
+    "files": [
+      "前方有车，注意避让。.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "前方有障碍物，停一下。": {
+    "files": [
+      "前方有障碍物，停一下。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "前方有障碍物，注意避让。": {
+    "files": [
+      "前方有障碍物，注意避让。.wav"
+    ],
+    "duration_ms": 1893
+  },
+  "发现斑马线，对准方向。": {
+    "files": [
+      "发现斑马线，对准方向。.wav"
+    ],
+    "duration_ms": 1413
+  },
+  "右侧有人，停一下。": {
+    "files": [
+      "右侧有人，停一下。.wav"
+    ],
+    "duration_ms": 1600
+  },
+  "右侧有公交车，停一下。": {
+    "files": [
+      "右侧有公交车，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "右侧有动物，停一下。": {
+    "files": [
+      "右侧有动物，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "右侧有卡车，停一下。": {
+    "files": [
+      "右侧有卡车，停一下。.wav"
+    ],
+    "duration_ms": 1413
+  },
+  "右侧有婴儿车，停一下。": {
+    "files": [
+      "右侧有婴儿车，停一下。.wav"
+    ],
+    "duration_ms": 1440
+  },
+  "右侧有摩托车，停一下。": {
+    "files": [
+      "右侧有摩托车，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "右侧有狗，停一下。": {
+    "files": [
+      "右侧有狗，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "右侧有电瓶车，停一下。": {
+    "files": [
+      "右侧有电瓶车，停一下。.wav"
+    ],
+    "duration_ms": 1657
+  },
+  "右侧有自行车，停一下。": {
+    "files": [
+      "右侧有自行车，停一下。.wav"
+    ],
+    "duration_ms": 1680
+  },
+  "右侧有车，停一下。": {
+    "files": [
+      "右侧有车，停一下。.wav"
+    ],
+    "duration_ms": 1440
+  },
+  "右侧有障碍物，停一下。": {
+    "files": [
+      "右侧有障碍物，停一下。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "右移": {
+    "files": [
+      "右移.wav"
+    ],
+    "duration_ms": 378
+  },
+  "右转": {
+    "files": [
+      "右转.wav"
+    ],
+    "duration_ms": 507
+  },
+  "右转一点": {
+    "files": [
+      "右转一点.wav"
+    ],
+    "duration_ms": 560
+  },
+  "向前直行几步越过障碍物。然后说‘好了’。": {
+    "files": [
+      "向前直行几步越过障碍物。然后说‘好了’。.wav"
+    ],
+    "duration_ms": 2667
+  },
+  "向右平移": {
+    "files": [
+      "向右平移.wav"
+    ],
+    "duration_ms": 720
+  },
+  "向右平移，对准盲道": {
+    "files": [
+      "向右平移，对准盲道.wav"
+    ],
+    "duration_ms": 1733
+  },
+  "向左平移": {
+    "files": [
+      "向左平移.wav"
+    ],
+    "duration_ms": 720
+  },
+  "向左平移，对准盲道": {
+    "files": [
+      "向左平移，对准盲道.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "启动过马路模式失败，请稍后重试。": {
+    "files": [
+      "启动过马路模式失败，请稍后重试。.wav"
+    ],
+    "duration_ms": 2187
+  },
+  "好的，请停下侧移。": {
+    "files": [
+      "好的，请停下侧移。.wav"
+    ],
+    "duration_ms": 1658
+  },
+  "寻物任务完成。": {
+    "files": [
+      "寻物任务完成。.wav"
+    ],
+    "duration_ms": 827
+  },
+  "导航已被取消。": {
+    "files": [
+      "导航已被取消。.wav"
+    ],
+    "duration_ms": 827
+  },
+  "左侧有人，停一下。": {
+    "files": [
+      "左侧有人，停一下。.wav"
+    ],
+    "duration_ms": 1013
+  },
+  "左侧有公交车，停一下。": {
+    "files": [
+      "左侧有公交车，停一下。.wav"
+    ],
+    "duration_ms": 1493
+  },
+  "左侧有动物，停一下。": {
+    "files": [
+      "左侧有动物，停一下。.wav"
+    ],
+    "duration_ms": 1360
+  },
+  "左侧有卡车，停一下。": {
+    "files": [
+      "左侧有卡车，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "左侧有婴儿车，停一下。": {
+    "files": [
+      "左侧有婴儿车，停一下。.wav"
+    ],
+    "duration_ms": 1600
+  },
+  "左侧有摩托车，停一下。": {
+    "files": [
+      "左侧有摩托车，停一下。.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "左侧有狗，停一下。": {
+    "files": [
+      "左侧有狗，停一下。.wav"
+    ],
+    "duration_ms": 1627
+  },
+  "左侧有电瓶车，停一下。": {
+    "files": [
+      "左侧有电瓶车，停一下。.wav"
+    ],
+    "duration_ms": 1733
+  },
+  "左侧有自行车，停一下。": {
+    "files": [
+      "左侧有自行车，停一下。.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "左侧有车，停一下。": {
+    "files": [
+      "左侧有车，停一下。.wav"
+    ],
+    "duration_ms": 1467
+  },
+  "左侧有障碍物，停一下。": {
+    "files": [
+      "左侧有障碍物，停一下。.wav"
+    ],
+    "duration_ms": 1627
+  },
+  "左移": {
+    "files": [
+      "左移.wav"
+    ],
+    "duration_ms": 378
+  },
+  "左转": {
+    "files": [
+      "左转.wav"
+    ],
+    "duration_ms": 378
+  },
+  "左转一点": {
+    "files": [
+      "左转一点.wav"
+    ],
+    "duration_ms": 613
+  },
+  "已停止导航。": {
+    "files": [
+      "已停止导航。.wav"
+    ],
+    "duration_ms": 827
+  },
+  "已到盲道跟前，切换到盲道导航。": {
+    "files": [
+      "已到盲道跟前，切换到盲道导航。.wav"
+    ],
+    "duration_ms": 2080
+  },
+  "已到达目标前方，请注意。": {
+    "files": [
+      "已到达目标前方，请注意。.wav"
+    ],
+    "duration_ms": 1760
+  },
+  "已到达目标，引导结束。": {
+    "files": [
+      "已到达目标，引导结束。.wav"
+    ],
+    "duration_ms": 1733
+  },
+  "已回到盲道。": {
+    "files": [
+      "已回到盲道。.wav"
+    ],
+    "duration_ms": 773
+  },
+  "已对准, 准备切换过马路模式。": {
+    "files": [
+      "已对准, 准备切换过马路模式。.wav"
+    ],
+    "duration_ms": 1893
+  },
+  "已对准新路径，请向前直行。": {
+    "files": [
+      "已对准新路径，请向前直行。.wav"
+    ],
+    "duration_ms": 1973
+  },
+  "引导超时，自动结束。": {
+    "files": [
+      "引导超时，自动结束。.wav"
+    ],
+    "duration_ms": 1657
+  },
+  "收到，准备回归盲道。": {
+    "files": [
+      "收到，准备回归盲道。.wav"
+    ],
+    "duration_ms": 1547
+  },
+  "斑马线已对准，继续前行。": {
+    "files": [
+      "斑马线已对准，继续前行。.wav"
+    ],
+    "duration_ms": 1733
+  },
+  "方向已对正！现在校准位置。": {
+    "files": [
+      "方向已对正！现在校准位置。.wav"
+    ],
+    "duration_ms": 1760
+  },
+  "方向正确，请直行。": {
+    "files": [
+      "方向正确，请直行。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "方向正确，请继续前进。": {
+    "files": [
+      "方向正确，请继续前进。.wav"
+    ],
+    "duration_ms": 1867
+  },
+  "校准完成！您已在盲道上，开始前行。": {
+    "files": [
+      "校准完成！您已在盲道上，开始前行。.wav"
+    ],
+    "duration_ms": 2613
+  },
+  "检测到已移动，开始对准新方向。": {
+    "files": [
+      "检测到已移动，开始对准新方向。.wav"
+    ],
+    "duration_ms": 2000
+  },
+  "正在接近斑马线，为您对准方向。": {
+    "files": [
+      "正在接近斑马线，为您对准方向。.wav"
+    ],
+    "duration_ms": 2000
+  },
+  "正在等待绿灯…": {
+    "files": [
+      "正在等待绿灯….wav"
+    ],
+    "duration_ms": 640
+  },
+  "没看到盲道，请向右侧小幅移动。": {
+    "files": [
+      "没看到盲道，请向右侧小幅移动。.wav"
+    ],
+    "duration_ms": 2240
+  },
+  "没看到盲道，请向左侧小幅移动。": {
+    "files": [
+      "没看到盲道，请向左侧小幅移动。.wav"
+    ],
+    "duration_ms": 2267
+  },
+  "目标在您的左前方，请右转一点。": {
+    "files": [
+      "目标在您的左前方，请右转一点。.wav"
+    ],
+    "duration_ms": 1947
+  },
+  "目标在您的左前方，请左转一点。": {
+    "files": [
+      "目标在您的左前方，请左转一点。.wav"
+    ],
+    "duration_ms": 2000
+  },
+  "目标就在前方，请慢慢靠近。": {
+    "files": [
+      "目标就在前方，请慢慢靠近。.wav"
+    ],
+    "duration_ms": 2160
+  },
+  "目标消失，请原地小幅转动。": {
+    "files": [
+      "目标消失，请原地小幅转动。.wav"
+    ],
+    "duration_ms": 2080
+  },
+  "目标消失，请原地等待。": {
+    "files": [
+      "目标消失，请原地等待。.wav"
+    ],
+    "duration_ms": 1733
+  },
+  "盲道已接近，开始对准盲道。": {
+    "files": [
+      "盲道已接近，开始对准盲道。.wav"
+    ],
+    "duration_ms": 1973
+  },
+  "稍微向右调整，继续前进。": {
+    "files": [
+      "稍微向右调整，继续前进。.wav"
+    ],
+    "duration_ms": 1840
+  },
+  "稍微向左调整，继续前进。": {
+    "files": [
+      "稍微向左调整，继续前进。.wav"
+    ],
+    "duration_ms": 2053
+  },
+  "绿灯稳定，开始通行。": {
+    "files": [
+      "绿灯稳定，开始通行。.wav"
+    ],
+    "duration_ms": 1547
+  },
+  "绿灯快没了": {
+    "files": [
+      "绿灯快没了.wav"
+    ],
+    "duration_ms": 1200
+  },
+  "开始通行": {
+    "files": [
+      "绿灯稳定，开始通行。.wav"
+    ],
+    "duration_ms": 1547
+  },
+  "斑马线已在跟前，进入红绿灯判定模式": {
+    "files": [
+      "正在等待绿灯….wav"
+    ],
+    "duration_ms": 640
+  },
+  "请向右平移。": {
+    "files": [
+      "请向右平移。.wav"
+    ],
+    "duration_ms": 747
+  },
+  "请向右微调，对准盲道。": {
+    "files": [
+      "请向右微调，对准盲道。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "请向右转动。": {
+    "files": [
+      "请向右转动。.wav"
+    ],
+    "duration_ms": 773
+  },
+  "请向左平移。": {
+    "files": [
+      "请向左平移。.wav"
+    ],
+    "duration_ms": 933
+  },
+  "请向左微调，对准盲道。": {
+    "files": [
+      "请向左微调，对准盲道。.wav"
+    ],
+    "duration_ms": 1573
+  },
+  "请向左转动。": {
+    "files": [
+      "请向左转动。.wav"
+    ],
+    "duration_ms": 667
+  },
+  "请继续向右平移。": {
+    "files": [
+      "请继续向右平移。.wav"
+    ],
+    "duration_ms": 1413
+  },
+  "请继续向左平移。": {
+    "files": [
+      "请继续向左平移。.wav"
+    ],
+    "duration_ms": 1360
+  },
+  "请问完成了吗？": {
+    "files": [
+      "请问完成了吗？.wav"
+    ],
+    "duration_ms": 773
+  },
+  "路径太远，请继续靠近": {
+    "files": [
+      "路径太远，请继续靠近.wav"
+    ],
+    "duration_ms": 1520
+  },
+  "路径被挡住，请向右侧平移。": {
+    "files": [
+      "路径被挡住，请向右侧平移。.wav"
+    ],
+    "duration_ms": 2000
+  },
+  "路径被挡住，请向左侧平移。": {
+    "files": [
+      "路径被挡住，请向左侧平移。.wav"
+    ],
+    "duration_ms": 1760
+  },
+  "过马路模式已启动。": {
+    "files": [
+      "过马路模式已启动。.wav"
+    ],
+    "duration_ms": 1360
+  },
+  "过马路结束，准备上人行道。": {
+    "files": [
+      "过马路结束，准备上人行道。.wav"
+    ],
+    "duration_ms": 2053
+  },
+  "远处发现斑马线，继续直行。": {
+    "files": [
+      "远处发现斑马线，继续直行。.wav"
+    ],
+    "duration_ms": 1920
+  },
+  "远处有盲道，继续前行。": {
+    "files": [
+      "远处有盲道，继续前行。.wav"
+    ],
+    "duration_ms": 1733
+  },
+
+  "避让完成，已回到盲道。": {
+    "files": [
+      "避让完成，已回到盲道。.wav"
+    ],
+    "duration_ms": 1600
+  },
+
+  "前方有右转弯，继续直行。": {
+    "files": [
+      "前方有右转弯，继续直行。.wav"
+    ],
+    "duration_ms": 1657
+  },
+  "红灯": {
+    "files": [
+      "红灯.WAV"
+    ],
+    "duration_ms": 400
+  },
+  "绿灯": {
+    "files": [
+      "绿灯.WAV"
+    ],
+    "duration_ms": 400
+  },
+  "黄灯": {
+    "files": [
+      "黄灯.WAV"
+    ],
+    "duration_ms": 400
+  },
+  "远处发现斑马线": {
+    "files": [
+      "../music/远处发现斑马线.WAV"
+    ],
+    "duration_ms": 1600
+  },
+  "正在靠近斑马线": {
+    "files": [
+      "../music/正在靠近斑马线.WAV"
+    ],
+    "duration_ms": 1600
+  },
+  "接近斑马线": {
+    "files": [
+      "../music/接近斑马线.WAV"
+    ],
+    "duration_ms": 1200
+  },
+  "斑马线到了可以过马路": {
+    "files": [
+      "../music/斑马线到了可以过马路.WAV"
+    ],
+    "duration_ms": 2000
+  },
+  "在画面左侧": {
+    "files": [
+      "../music/在画面左侧.WAV"
+    ],
+    "duration_ms": 1200
+  },
+  "在画面中间": {
+    "files": [
+      "../music/在画面中间.WAV"
+    ],
+    "duration_ms": 1200
+  },
+  "在画面右侧": {
+    "files": [
+      "../music/在画面右侧.WAV"
+    ],
+    "duration_ms": 1200
+  }
+}
+
+
diff --git a/voice/丢失路径，重新搜索。.wav b/voice/丢失路径，重新搜索。.wav
new file mode 100644
index 0000000..a682114
Binary files /dev/null and b/voice/丢失路径，重新搜索。.wav differ
diff --git a/voice/保持直行.wav b/voice/保持直行.wav
new file mode 100644
index 0000000..5ad19fb
Binary files /dev/null and b/voice/保持直行.wav differ
diff --git a/voice/保持直行，靠近盲道。.wav b/voice/保持直行，靠近盲道。.wav
new file mode 100644
index 0000000..0becc06
Binary files /dev/null and b/voice/保持直行，靠近盲道。.wav differ
diff --git a/voice/切换到盲道导航。.wav b/voice/切换到盲道导航。.wav
new file mode 100644
index 0000000..eb75924
Binary files /dev/null and b/voice/切换到盲道导航。.wav differ
diff --git a/voice/到达转弯处，向右平移。.wav b/voice/到达转弯处，向右平移。.wav
new file mode 100644
index 0000000..a45c286
Binary files /dev/null and b/voice/到达转弯处，向右平移。.wav differ
diff --git a/voice/到达转弯处，向左平移。.wav b/voice/到达转弯处，向左平移。.wav
new file mode 100644
index 0000000..f057229
Binary files /dev/null and b/voice/到达转弯处，向左平移。.wav differ
diff --git a/voice/前方有人，停一下。.wav b/voice/前方有人，停一下。.wav
new file mode 100644
index 0000000..02ef0f4
Binary files /dev/null and b/voice/前方有人，停一下。.wav differ
diff --git a/voice/前方有人，注意避让。.wav b/voice/前方有人，注意避让。.wav
new file mode 100644
index 0000000..76f2237
Binary files /dev/null and b/voice/前方有人，注意避让。.wav differ
diff --git a/voice/前方有公交车，停一下。.wav b/voice/前方有公交车，停一下。.wav
new file mode 100644
index 0000000..84448f6
Binary files /dev/null and b/voice/前方有公交车，停一下。.wav differ
diff --git a/voice/前方有动物，停一下。.wav b/voice/前方有动物，停一下。.wav
new file mode 100644
index 0000000..02e35b3
Binary files /dev/null and b/voice/前方有动物，停一下。.wav differ
diff --git a/voice/前方有卡车，停一下。.wav b/voice/前方有卡车，停一下。.wav
new file mode 100644
index 0000000..680f7e5
Binary files /dev/null and b/voice/前方有卡车，停一下。.wav differ
diff --git a/voice/前方有右转弯，继续直行。.wav b/voice/前方有右转弯，继续直行。.wav
new file mode 100644
index 0000000..f007c04
Binary files /dev/null and b/voice/前方有右转弯，继续直行。.wav differ
diff --git a/voice/前方有婴儿车，停一下。.wav b/voice/前方有婴儿车，停一下。.wav
new file mode 100644
index 0000000..0d7b019
Binary files /dev/null and b/voice/前方有婴儿车，停一下。.wav differ
diff --git a/voice/前方有左转弯，继续直行。.wav b/voice/前方有左转弯，继续直行。.wav
new file mode 100644
index 0000000..e4b4aea
Binary files /dev/null and b/voice/前方有左转弯，继续直行。.wav differ
diff --git a/voice/前方有摩托车，停一下。.wav b/voice/前方有摩托车，停一下。.wav
new file mode 100644
index 0000000..954e8ff
Binary files /dev/null and b/voice/前方有摩托车，停一下。.wav differ
diff --git a/voice/前方有狗，停一下。.wav b/voice/前方有狗，停一下。.wav
new file mode 100644
index 0000000..4d8d29d
Binary files /dev/null and b/voice/前方有狗，停一下。.wav differ
diff --git a/voice/前方有电瓶车，停一下。.wav b/voice/前方有电瓶车，停一下。.wav
new file mode 100644
index 0000000..713ba64
Binary files /dev/null and b/voice/前方有电瓶车，停一下。.wav differ
diff --git a/voice/前方有自行车，停一下。.wav b/voice/前方有自行车，停一下。.wav
new file mode 100644
index 0000000..6dfabac
Binary files /dev/null and b/voice/前方有自行车，停一下。.wav differ
diff --git a/voice/前方有车，停一下。.wav b/voice/前方有车，停一下。.wav
new file mode 100644
index 0000000..5270b48
Binary files /dev/null and b/voice/前方有车，停一下。.wav differ
diff --git a/voice/前方有车，注意避让。.wav b/voice/前方有车，注意避让。.wav
new file mode 100644
index 0000000..036e945
Binary files /dev/null and b/voice/前方有车，注意避让。.wav differ
diff --git a/voice/前方有障碍物，停一下。.wav b/voice/前方有障碍物，停一下。.wav
new file mode 100644
index 0000000..13f0adc
Binary files /dev/null and b/voice/前方有障碍物，停一下。.wav differ
diff --git a/voice/前方有障碍物，注意避让。.wav b/voice/前方有障碍物，注意避让。.wav
new file mode 100644
index 0000000..09d0ea7
Binary files /dev/null and b/voice/前方有障碍物，注意避让。.wav differ
diff --git a/voice/发现斑马线，对准方向。.wav b/voice/发现斑马线，对准方向。.wav
new file mode 100644
index 0000000..22a2072
Binary files /dev/null and b/voice/发现斑马线，对准方向。.wav differ
diff --git a/voice/右侧有人，停一下。.wav b/voice/右侧有人，停一下。.wav
new file mode 100644
index 0000000..af80608
Binary files /dev/null and b/voice/右侧有人，停一下。.wav differ
diff --git a/voice/右侧有公交车，停一下。.wav b/voice/右侧有公交车，停一下。.wav
new file mode 100644
index 0000000..f3fae0a
Binary files /dev/null and b/voice/右侧有公交车，停一下。.wav differ
diff --git a/voice/右侧有动物，停一下。.wav b/voice/右侧有动物，停一下。.wav
new file mode 100644
index 0000000..eaf2995
Binary files /dev/null and b/voice/右侧有动物，停一下。.wav differ
diff --git a/voice/右侧有卡车，停一下。.wav b/voice/右侧有卡车，停一下。.wav
new file mode 100644
index 0000000..110dc5b
Binary files /dev/null and b/voice/右侧有卡车，停一下。.wav differ
diff --git a/voice/右侧有婴儿车，停一下。.wav b/voice/右侧有婴儿车，停一下。.wav
new file mode 100644
index 0000000..5670c33
Binary files /dev/null and b/voice/右侧有婴儿车，停一下。.wav differ
diff --git a/voice/右侧有摩托车，停一下。.wav b/voice/右侧有摩托车，停一下。.wav
new file mode 100644
index 0000000..4da98bc
Binary files /dev/null and b/voice/右侧有摩托车，停一下。.wav differ
diff --git a/voice/右侧有狗，停一下。.wav b/voice/右侧有狗，停一下。.wav
new file mode 100644
index 0000000..0f32e13
Binary files /dev/null and b/voice/右侧有狗，停一下。.wav differ
diff --git a/voice/右侧有电瓶车，停一下。.wav b/voice/右侧有电瓶车，停一下。.wav
new file mode 100644
index 0000000..18edc78
Binary files /dev/null and b/voice/右侧有电瓶车，停一下。.wav differ
diff --git a/voice/右侧有自行车，停一下。.wav b/voice/右侧有自行车，停一下。.wav
new file mode 100644
index 0000000..c40102c
Binary files /dev/null and b/voice/右侧有自行车，停一下。.wav differ
diff --git a/voice/右侧有车，停一下。.wav b/voice/右侧有车，停一下。.wav
new file mode 100644
index 0000000..c64bc94
Binary files /dev/null and b/voice/右侧有车，停一下。.wav differ
diff --git a/voice/右侧有障碍物，停一下。.wav b/voice/右侧有障碍物，停一下。.wav
new file mode 100644
index 0000000..f882471
Binary files /dev/null and b/voice/右侧有障碍物，停一下。.wav differ
diff --git a/voice/右移.wav b/voice/右移.wav
new file mode 100644
index 0000000..65fc54a
Binary files /dev/null and b/voice/右移.wav differ
diff --git a/voice/右转.wav b/voice/右转.wav
new file mode 100644
index 0000000..b3b6be9
Binary files /dev/null and b/voice/右转.wav differ
diff --git a/voice/右转一点.wav b/voice/右转一点.wav
new file mode 100644
index 0000000..4e4386c
Binary files /dev/null and b/voice/右转一点.wav differ
diff --git a/voice/向前直行几步越过障碍物。然后说‘好了’。.wav b/voice/向前直行几步越过障碍物。然后说‘好了’。.wav
new file mode 100644
index 0000000..ee9623a
Binary files /dev/null and b/voice/向前直行几步越过障碍物。然后说‘好了’。.wav differ
diff --git a/voice/向右平移.wav b/voice/向右平移.wav
new file mode 100644
index 0000000..bb56764
Binary files /dev/null and b/voice/向右平移.wav differ
diff --git a/voice/向右平移，对准盲道.wav b/voice/向右平移，对准盲道.wav
new file mode 100644
index 0000000..04ff64b
Binary files /dev/null and b/voice/向右平移，对准盲道.wav differ
diff --git a/voice/向左平移.wav b/voice/向左平移.wav
new file mode 100644
index 0000000..4b3156a
Binary files /dev/null and b/voice/向左平移.wav differ
diff --git a/voice/向左平移，对准盲道.wav b/voice/向左平移，对准盲道.wav
new file mode 100644
index 0000000..346b297
Binary files /dev/null and b/voice/向左平移，对准盲道.wav differ
diff --git a/voice/启动过马路模式失败，请稍后重试。.wav b/voice/启动过马路模式失败，请稍后重试。.wav
new file mode 100644
index 0000000..464a08c
Binary files /dev/null and b/voice/启动过马路模式失败，请稍后重试。.wav differ
diff --git a/voice/好的，请停下侧移。.wav b/voice/好的，请停下侧移。.wav
new file mode 100644
index 0000000..625573c
Binary files /dev/null and b/voice/好的，请停下侧移。.wav differ
diff --git a/voice/寻物任务完成。.wav b/voice/寻物任务完成。.wav
new file mode 100644
index 0000000..184c2d9
Binary files /dev/null and b/voice/寻物任务完成。.wav differ
diff --git a/voice/导航已被取消。.wav b/voice/导航已被取消。.wav
new file mode 100644
index 0000000..ddf6773
Binary files /dev/null and b/voice/导航已被取消。.wav differ
diff --git a/voice/左侧有人，停一下。.wav b/voice/左侧有人，停一下。.wav
new file mode 100644
index 0000000..653e845
Binary files /dev/null and b/voice/左侧有人，停一下。.wav differ
diff --git a/voice/左侧有公交车，停一下。.wav b/voice/左侧有公交车，停一下。.wav
new file mode 100644
index 0000000..7a3d637
Binary files /dev/null and b/voice/左侧有公交车，停一下。.wav differ
diff --git a/voice/左侧有动物，停一下。.wav b/voice/左侧有动物，停一下。.wav
new file mode 100644
index 0000000..7099472
Binary files /dev/null and b/voice/左侧有动物，停一下。.wav differ
diff --git a/voice/左侧有卡车，停一下。.wav b/voice/左侧有卡车，停一下。.wav
new file mode 100644
index 0000000..cb5dfcc
Binary files /dev/null and b/voice/左侧有卡车，停一下。.wav differ
diff --git a/voice/左侧有婴儿车，停一下。.wav b/voice/左侧有婴儿车，停一下。.wav
new file mode 100644
index 0000000..c26ccb8
Binary files /dev/null and b/voice/左侧有婴儿车，停一下。.wav differ
diff --git a/voice/左侧有摩托车，停一下。.wav b/voice/左侧有摩托车，停一下。.wav
new file mode 100644
index 0000000..49ec8cd
Binary files /dev/null and b/voice/左侧有摩托车，停一下。.wav differ
diff --git a/voice/左侧有狗，停一下。.wav b/voice/左侧有狗，停一下。.wav
new file mode 100644
index 0000000..e53ec61
Binary files /dev/null and b/voice/左侧有狗，停一下。.wav differ
diff --git a/voice/左侧有电瓶车，停一下。.wav b/voice/左侧有电瓶车，停一下。.wav
new file mode 100644
index 0000000..13b6ede
Binary files /dev/null and b/voice/左侧有电瓶车，停一下。.wav differ
diff --git a/voice/左侧有自行车，停一下。.wav b/voice/左侧有自行车，停一下。.wav
new file mode 100644
index 0000000..66b681e
Binary files /dev/null and b/voice/左侧有自行车，停一下。.wav differ
diff --git a/voice/左侧有车，停一下。.wav b/voice/左侧有车，停一下。.wav
new file mode 100644
index 0000000..cd72629
Binary files /dev/null and b/voice/左侧有车，停一下。.wav differ
diff --git a/voice/左侧有障碍物，停一下。.wav b/voice/左侧有障碍物，停一下。.wav
new file mode 100644
index 0000000..f86af6b
Binary files /dev/null and b/voice/左侧有障碍物，停一下。.wav differ
diff --git a/voice/左移.wav b/voice/左移.wav
new file mode 100644
index 0000000..b7bd8de
Binary files /dev/null and b/voice/左移.wav differ
diff --git a/voice/左转.wav b/voice/左转.wav
new file mode 100644
index 0000000..10e6f4a
Binary files /dev/null and b/voice/左转.wav differ
diff --git a/voice/左转一点.wav b/voice/左转一点.wav
new file mode 100644
index 0000000..6a8a27a
Binary files /dev/null and b/voice/左转一点.wav differ
diff --git a/voice/已停止导航。.wav b/voice/已停止导航。.wav
new file mode 100644
index 0000000..fda991f
Binary files /dev/null and b/voice/已停止导航。.wav differ
diff --git a/voice/已到盲道跟前，切换到盲道导航。.wav b/voice/已到盲道跟前，切换到盲道导航。.wav
new file mode 100644
index 0000000..402756e
Binary files /dev/null and b/voice/已到盲道跟前，切换到盲道导航。.wav differ
diff --git a/voice/已到达目标前方，请注意。.wav b/voice/已到达目标前方，请注意。.wav
new file mode 100644
index 0000000..8ae1ef6
Binary files /dev/null and b/voice/已到达目标前方，请注意。.wav differ
diff --git a/voice/已到达目标，引导结束。.wav b/voice/已到达目标，引导结束。.wav
new file mode 100644
index 0000000..107ac0d
Binary files /dev/null and b/voice/已到达目标，引导结束。.wav differ
diff --git a/voice/已回到盲道。.wav b/voice/已回到盲道。.wav
new file mode 100644
index 0000000..333ab65
Binary files /dev/null and b/voice/已回到盲道。.wav differ
diff --git a/voice/已对准, 准备切换过马路模式。.wav b/voice/已对准, 准备切换过马路模式。.wav
new file mode 100644
index 0000000..3f23ee2
Binary files /dev/null and b/voice/已对准, 准备切换过马路模式。.wav differ
diff --git a/voice/已对准新路径，请向前直行。.wav b/voice/已对准新路径，请向前直行。.wav
new file mode 100644
index 0000000..4f6e867
Binary files /dev/null and b/voice/已对准新路径，请向前直行。.wav differ
diff --git a/voice/引导超时，自动结束。.wav b/voice/引导超时，自动结束。.wav
new file mode 100644
index 0000000..f808ec8
Binary files /dev/null and b/voice/引导超时，自动结束。.wav differ
diff --git a/voice/收到，准备回归盲道。.wav b/voice/收到，准备回归盲道。.wav
new file mode 100644
index 0000000..f6801b6
Binary files /dev/null and b/voice/收到，准备回归盲道。.wav differ
diff --git a/voice/斑马线已对准，继续前行。.wav b/voice/斑马线已对准，继续前行。.wav
new file mode 100644
index 0000000..2f566a0
Binary files /dev/null and b/voice/斑马线已对准，继续前行。.wav differ
diff --git a/voice/方向已对正！现在校准位置。.wav b/voice/方向已对正！现在校准位置。.wav
new file mode 100644
index 0000000..19d638f
Binary files /dev/null and b/voice/方向已对正！现在校准位置。.wav differ
diff --git a/voice/方向正确，请直行。.wav b/voice/方向正确，请直行。.wav
new file mode 100644
index 0000000..fddd765
Binary files /dev/null and b/voice/方向正确，请直行。.wav differ
diff --git a/voice/方向正确，请继续前进。.wav b/voice/方向正确，请继续前进。.wav
new file mode 100644
index 0000000..a2f9510
Binary files /dev/null and b/voice/方向正确，请继续前进。.wav differ
diff --git a/voice/校准完成！您已在盲道上，开始前行。.wav b/voice/校准完成！您已在盲道上，开始前行。.wav
new file mode 100644
index 0000000..9b16fd3
Binary files /dev/null and b/voice/校准完成！您已在盲道上，开始前行。.wav differ
diff --git a/voice/检测到已移动，开始对准新方向。.wav b/voice/检测到已移动，开始对准新方向。.wav
new file mode 100644
index 0000000..9d64665
Binary files /dev/null and b/voice/检测到已移动，开始对准新方向。.wav differ
diff --git a/voice/正在接近斑马线，为您对准方向。.wav b/voice/正在接近斑马线，为您对准方向。.wav
new file mode 100644
index 0000000..f984c60
Binary files /dev/null and b/voice/正在接近斑马线，为您对准方向。.wav differ
diff --git a/voice/正在等待绿灯….wav b/voice/正在等待绿灯….wav
new file mode 100644
index 0000000..6ebf2d0
Binary files /dev/null and b/voice/正在等待绿灯….wav differ
diff --git a/voice/没看到盲道，请向右侧小幅移动。.wav b/voice/没看到盲道，请向右侧小幅移动。.wav
new file mode 100644
index 0000000..2b3e2dd
Binary files /dev/null and b/voice/没看到盲道，请向右侧小幅移动。.wav differ
diff --git a/voice/没看到盲道，请向左侧小幅移动。.wav b/voice/没看到盲道，请向左侧小幅移动。.wav
new file mode 100644
index 0000000..1f66f4e
Binary files /dev/null and b/voice/没看到盲道，请向左侧小幅移动。.wav differ
diff --git a/voice/目标在您的左前方，请右转一点。.wav b/voice/目标在您的左前方，请右转一点。.wav
new file mode 100644
index 0000000..e7497fe
Binary files /dev/null and b/voice/目标在您的左前方，请右转一点。.wav differ
diff --git a/voice/目标在您的左前方，请左转一点。.wav b/voice/目标在您的左前方，请左转一点。.wav
new file mode 100644
index 0000000..79509f4
Binary files /dev/null and b/voice/目标在您的左前方，请左转一点。.wav differ
diff --git a/voice/目标就在前方，请慢慢靠近。.wav b/voice/目标就在前方，请慢慢靠近。.wav
new file mode 100644
index 0000000..b31ca15
Binary files /dev/null and b/voice/目标就在前方，请慢慢靠近。.wav differ
diff --git a/voice/目标消失，请原地小幅转动。.wav b/voice/目标消失，请原地小幅转动。.wav
new file mode 100644
index 0000000..2dac313
Binary files /dev/null and b/voice/目标消失，请原地小幅转动。.wav differ
diff --git a/voice/目标消失，请原地等待。.wav b/voice/目标消失，请原地等待。.wav
new file mode 100644
index 0000000..8db3ce9
Binary files /dev/null and b/voice/目标消失，请原地等待。.wav differ
diff --git a/voice/盲道已接近，开始对准盲道。.wav b/voice/盲道已接近，开始对准盲道。.wav
new file mode 100644
index 0000000..d5d4779
Binary files /dev/null and b/voice/盲道已接近，开始对准盲道。.wav differ
diff --git a/voice/稍微向右调整，继续前进。.wav b/voice/稍微向右调整，继续前进。.wav
new file mode 100644
index 0000000..db0706b
Binary files /dev/null and b/voice/稍微向右调整，继续前进。.wav differ
diff --git a/voice/稍微向左调整，继续前进。.wav b/voice/稍微向左调整，继续前进。.wav
new file mode 100644
index 0000000..92aecc0
Binary files /dev/null and b/voice/稍微向左调整，继续前进。.wav differ
diff --git a/voice/红灯.WAV b/voice/红灯.WAV
new file mode 100644
index 0000000..59c34f9
Binary files /dev/null and b/voice/红灯.WAV differ
diff --git a/voice/红灯_原始.WAV b/voice/红灯_原始.WAV
new file mode 100644
index 0000000..f53ff7f
Binary files /dev/null and b/voice/红灯_原始.WAV differ
diff --git a/voice/绿灯.WAV b/voice/绿灯.WAV
new file mode 100644
index 0000000..75981ec
Binary files /dev/null and b/voice/绿灯.WAV differ
diff --git a/voice/绿灯_原始.WAV b/voice/绿灯_原始.WAV
new file mode 100644
index 0000000..1a5725c
Binary files /dev/null and b/voice/绿灯_原始.WAV differ
diff --git a/voice/绿灯快没了.wav b/voice/绿灯快没了.wav
new file mode 100644
index 0000000..4d63baa
Binary files /dev/null and b/voice/绿灯快没了.wav differ
diff --git a/voice/绿灯稳定，开始通行。.wav b/voice/绿灯稳定，开始通行。.wav
new file mode 100644
index 0000000..6c175de
Binary files /dev/null and b/voice/绿灯稳定，开始通行。.wav differ
diff --git a/voice/请向右平移。.wav b/voice/请向右平移。.wav
new file mode 100644
index 0000000..48cb226
Binary files /dev/null and b/voice/请向右平移。.wav differ
diff --git a/voice/请向右微调，对准盲道。.wav b/voice/请向右微调，对准盲道。.wav
new file mode 100644
index 0000000..eb197ec
Binary files /dev/null and b/voice/请向右微调，对准盲道。.wav differ
diff --git a/voice/请向右转动。.wav b/voice/请向右转动。.wav
new file mode 100644
index 0000000..5a4b8fb
Binary files /dev/null and b/voice/请向右转动。.wav differ
diff --git a/voice/请向左平移。.wav b/voice/请向左平移。.wav
new file mode 100644
index 0000000..60fe6e6
Binary files /dev/null and b/voice/请向左平移。.wav differ
diff --git a/voice/请向左微调，对准盲道。.wav b/voice/请向左微调，对准盲道。.wav
new file mode 100644
index 0000000..54c5cb3
Binary files /dev/null and b/voice/请向左微调，对准盲道。.wav differ
diff --git a/voice/请向左转动。.wav b/voice/请向左转动。.wav
new file mode 100644
index 0000000..381281e
Binary files /dev/null and b/voice/请向左转动。.wav differ
diff --git a/voice/请继续向右平移。.wav b/voice/请继续向右平移。.wav
new file mode 100644
index 0000000..03f0bf8
Binary files /dev/null and b/voice/请继续向右平移。.wav differ
diff --git a/voice/请继续向左平移。.wav b/voice/请继续向左平移。.wav
new file mode 100644
index 0000000..946fc82
Binary files /dev/null and b/voice/请继续向左平移。.wav differ
diff --git a/voice/请问完成了吗？.wav b/voice/请问完成了吗？.wav
new file mode 100644
index 0000000..b6ce528
Binary files /dev/null and b/voice/请问完成了吗？.wav differ
diff --git a/voice/路径太远，请继续靠近.wav b/voice/路径太远，请继续靠近.wav
new file mode 100644
index 0000000..db0947c
Binary files /dev/null and b/voice/路径太远，请继续靠近.wav differ
diff --git a/voice/路径被挡住，请向右侧平移。.wav b/voice/路径被挡住，请向右侧平移。.wav
new file mode 100644
index 0000000..0296ec4
Binary files /dev/null and b/voice/路径被挡住，请向右侧平移。.wav differ
diff --git a/voice/路径被挡住，请向左侧平移。.wav b/voice/路径被挡住，请向左侧平移。.wav
new file mode 100644
index 0000000..d6bd1f0
Binary files /dev/null and b/voice/路径被挡住，请向左侧平移。.wav differ
diff --git a/voice/过马路模式已启动。.wav b/voice/过马路模式已启动。.wav
new file mode 100644
index 0000000..3ef4eff
Binary files /dev/null and b/voice/过马路模式已启动。.wav differ
diff --git a/voice/过马路结束，准备上人行道。.wav b/voice/过马路结束，准备上人行道。.wav
new file mode 100644
index 0000000..f665f3d
Binary files /dev/null and b/voice/过马路结束，准备上人行道。.wav differ
diff --git a/voice/远处发现斑马线，继续直行。.wav b/voice/远处发现斑马线，继续直行。.wav
new file mode 100644
index 0000000..f5c9a06
Binary files /dev/null and b/voice/远处发现斑马线，继续直行。.wav differ
diff --git a/voice/远处有盲道，继续前行。.wav b/voice/远处有盲道，继续前行。.wav
new file mode 100644
index 0000000..d461e89
Binary files /dev/null and b/voice/远处有盲道，继续前行。.wav differ
diff --git a/voice/避让完成，已回到盲道。.wav b/voice/避让完成，已回到盲道。.wav
new file mode 100644
index 0000000..2048fed
Binary files /dev/null and b/voice/避让完成，已回到盲道。.wav differ
diff --git a/voice/黄灯.WAV b/voice/黄灯.WAV
new file mode 100644
index 0000000..c44a249
Binary files /dev/null and b/voice/黄灯.WAV differ
diff --git a/voice/黄灯_原始.WAV b/voice/黄灯_原始.WAV
new file mode 100644
index 0000000..e4051d3
Binary files /dev/null and b/voice/黄灯_原始.WAV differ
diff --git a/workflow_blindpath.py b/workflow_blindpath.py
new file mode 100644
index 0000000..ef4a98b
--- /dev/null
+++ b/workflow_blindpath.py
@@ -0,0 +1,3235 @@
+# workflow_blindpath.py
+# -*- coding: utf-8 -*-
+"""
+盲道导航工作流 - 纯净版
+移除了所有 Redis、Celery 依赖，可以直接集成到任何 Python 应用中
+"""
+import os
+import time
+import cv2
+import numpy as np
+import logging
+from typing import Dict, List, Optional, Tuple, Any
+from dataclasses import dataclass
+from collections import deque
+import torch  # 添加这行
+from obstacle_detector_client import ObstacleDetectorClient
+# 【移除】从这里播放音频会导致线程池中asyncio无法工作
+# from audio_player import play_voice_text
+# 语音由 app_main.py 统一处理
+from crosswalk_awareness import CrosswalkAwarenessMonitor, split_combined_voice  # 斑马线感知
+# 尝试导入 Pillow，用于中文显示
+try:
+    from PIL import Image, ImageDraw, ImageFont
+    PIL_AVAILABLE = True
+except ImportError:
+    PIL_AVAILABLE = False
+    Image, ImageDraw, ImageFont = None, None, None
+
+logger = logging.getLogger(__name__)
+
+# ========== 状态常量定义 ==========
+STATE_ONBOARDING = "ONBOARDING"
+STATE_NAVIGATING = "NAVIGATING"
+STATE_MANEUVERING_TURN = "MANEUVERING_TURN"
+STATE_AVOIDING_OBSTACLE = "AVOIDING_OBSTACLE"
+STATE_LOCKING_ON = "LOCKING_ON"
+
+# ONBOARDING子步骤
+ONBOARDING_STEP_ROTATION = "ROTATION"
+ONBOARDING_STEP_TRANSLATION = "TRANSLATION"
+
+# 转向子步骤
+MANEUVER_STEP_1_ISSUE_COMMAND = "ISSUE_COMMAND"
+MANEUVER_STEP_2_WAIT_FOR_SHIFT = "WAIT_FOR_SHIFT"
+MANEUVER_STEP_3_ALIGN_ON_NEW_PATH = "ALIGN_ON_NEW_PATH"
+
+# 颜色定义 (BGR格式)
+VIS_COLORS = {
+    "blind_path": (0, 255, 0),      # 绿色
+    "obstacle": (0, 0, 255),        # 红色
+    "crosswalk": (0, 165, 255),     # 橙色
+    "centerline": (0, 255, 255),     # 黄色
+    "target_point": (255, 0, 0),     # 蓝色
+    "turn_point": (128, 0, 128),     # 紫色
+    "pulse_effect": (100, 100, 255)  # 淡红色
+}
+
+# 障碍物名称映射
+_OBSTACLE_NAME_CN = {
+    'person': '人',
+    'bicycle': '自行车',
+    'car': '车',
+    'motorcycle': '摩托车',
+    'bus': '公交车',
+    'truck': '卡车',
+    'animal': '动物',
+    'scooter': '电瓶车',
+    'stroller': '婴儿车',
+    'dog': '狗',
+}
+
+# 动态类别名称列表
+DYNAMIC_CLASS_NAMES = {'person', 'bicycle', 'car', 'motorcycle', 'bus', 'truck', 'animal', 'dog'}
+
+@dataclass
+class ProcessingResult:
+    """处理结果数据类"""
+    guidance_text: str  # 语音引导文本
+    visualizations: List[Dict[str, Any]]  # 可视化元素列表
+    annotated_image: Optional[np.ndarray] = None  # 标注后的图像
+    state_info: Dict[str, Any] = None  # 状态信息
+    
+    def __post_init__(self):
+        if self.state_info is None:
+            self.state_info = {}
+
+
+class BlindPathNavigator:
+    """盲道导航处理器 - 无外部依赖版本"""
+    
+    def __init__(self, yolo_model=None, obstacle_detector=None):
+        """
+        初始化导航器
+        :param yolo_model: YOLO分割模型（可选）
+        :param obstacle_detector: 障碍物检测器（可选）
+        """
+        self.yolo_model = yolo_model
+        self.obstacle_detector = obstacle_detector
+        
+        # 状态变量
+        self.current_state = STATE_ONBOARDING
+        self.onboarding_step = ONBOARDING_STEP_ROTATION
+        self.maneuver_step = MANEUVER_STEP_1_ISSUE_COMMAND
+        self.maneuver_target_info = None
+        
+
+        # 光流追踪参数
+        self.lk_params = dict(
+            winSize=(15, 15),
+            maxLevel=2,
+            criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
+        )
+        
+        # 特征检测参数
+        self.feature_params = dict(
+            maxCorners=100,
+            qualityLevel=0.05,
+            minDistance=10,
+            blockSize=7,
+            useHarrisDetector=False,
+            k=0.04
+        )
+        
+        # 光流追踪点缓存
+        self.flow_points = {}  # {mask_type: points}
+        self.flow_grace = {}   # {mask_type: grace_count}
+        self.FLOW_GRACE_MAX = 3  # 【修改】从8帧降低到3帧，快速清除光流遗留
+        
+        # 中心线平滑缓存
+        self.centerline_history = []  # 历史中心线数据
+        self.centerline_history_max = 5  # 保留最近5帧用于平滑
+        
+        # 多项式系数平滑缓存
+        self.poly_coeffs_history = []  # 历史多项式系数
+        self.poly_coeffs_history_max = 8  # 保留最近8帧系数用于平滑
+
+        # 转弯检测追踪器
+        self.turn_detection_tracker = {
+            'direction': None,
+            'consecutive_hits': 0,
+            'last_seen_frame': 0,
+            'corner_info': None
+        }
+        
+        # 转弯冷却
+        self.turn_cooldown_frames = 0
+        self.TURN_COOLDOWN_DURATION = 50
+        
+        # 避障相关
+        self.avoidance_plan = None
+        self.avoidance_step_index = 0
+        self.lock_on_data = None
+        
+        # 斑马线追踪
+        self.crosswalk_tracker = {
+            'stage': 'not_detected',
+            'consecutive_frames': 0,
+            'last_area_ratio': 0.0,
+            'last_bottom_y_ratio': 0.0,
+            'last_center_x_ratio': 0.5,
+            'position_announced': False,
+            'alignment_status': 'not_aligned',
+            'last_seen_frame': 0,
+            'last_angle': 0.0
+        }
+        
+        # 帧计数器
+        self.frame_counter = 0
+        
+        # 直行提示配置 - 支持环境变量
+        self.guide_interval = float(os.getenv("AIGLASS_STRAIGHT_INTERVAL", "4.0"))  # 播报间隔（秒）
+        self.last_guide_time = 0.0
+        self.straight_continuous_mode = os.getenv("AIGLASS_STRAIGHT_CONTINUOUS", "1") == "1"  # 持续播报模式
+        self.straight_repeat_limit = int(os.getenv("AIGLASS_STRAIGHT_LIMIT", "2"))  # 限制模式下的最大次数
+        self.straight_repeat_count = 0
+        
+        # 【新增】方向指令持续播报配置
+        self.direction_interval = float(os.getenv("AIGLASS_DIRECTION_INTERVAL", "3.0"))  # 方向指令间隔（秒）
+        self.last_direction_time = 0.0
+        self.last_direction_message = ""
+        
+        # 打印配置信息
+        logger.info(f"[BlindPath] 直行播报配置: 间隔={self.guide_interval}秒, "
+                   f"持续模式={self.straight_continuous_mode}, "
+                   f"限制次数={self.straight_repeat_limit}")
+        logger.info(f"[BlindPath] 方向播报配置: 间隔={self.direction_interval}秒")
+
+        # 缓存变量
+        self.prev_gray = None
+        self.prev_blind_path_mask = None
+        self.prev_crosswalk_mask = None
+        self.prev_obstacle_cache = []
+        self.last_guidance_message = ""
+        self.last_detected_obstacles = []
+        self.last_obstacle_detection_frame = 0
+        self.last_any_speech_time = 0
+        
+        # 斑马线准备状态标志
+        self.crosswalk_ready_announced = False
+        self.crosswalk_ready_time = 0
+        
+        # 障碍物语音待播报
+        self.pending_obstacle_voice = None
+        
+        # 红绿灯检测
+        self.traffic_light_detector = None
+        self.init_traffic_light_detector()
+        self.traffic_light_history = deque(maxlen=8)  # 用于多数表决
+        self.last_traffic_light_state = "unknown"
+        self.green_light_announced = False
+        
+        # 阈值设置
+        self.CLASS_CONF_THRESHOLDS = {
+            1: 0.20,  # blind_path
+            0: 0.30   # crosswalk
+        }
+        
+        # 导航阈值
+        # 导航阈值
+        self.ONBOARDING_ALIGN_THRESHOLD_RATIO = 0.1
+        self.VP_FIT_ERROR_THRESHOLD = 8.0
+
+        self.ONBOARDING_ORIENTATION_THRESHOLD_RAD = np.deg2rad(10)
+        self.ONBOARDING_CENTER_OFFSET_THRESHOLD_RATIO = 0.15
+        self.NAV_ORIENTATION_THRESHOLD_RAD = np.deg2rad(10)
+        self.NAV_CENTER_OFFSET_THRESHOLD_RATIO = 0.15
+        self.CURVATURE_PROXY_THRESHOLD = 5e-5
+        
+        # 斑马线切换阈值
+        self.CROSSWALK_SWITCH_AREA_RATIO = 0.22
+        self.CROSSWALK_SWITCH_BOTTOM_RATIO = 0.9
+        self.CROSSWALK_SWITCH_CONSECUTIVE_FRAMES = 10
+        
+        # 障碍物检测间隔
+        # 障碍物检测优化参数 - Day 22 优化: 增加间隔减少卡顿
+        self.OBSTACLE_DETECTION_INTERVAL = int(os.getenv("AIGLASS_OBS_INTERVAL", "18"))  # 从15帧增加到18帧
+        self.OBSTACLE_CACHE_DURATION_FRAMES = int(os.getenv("AIGLASS_OBS_CACHE_FRAMES", "20"))  # Day 21: 缓存20帧减少GPU负载
+        
+        # 障碍物播报管理
+        self.last_obstacle_speech = ""
+        self.last_obstacle_speech_time = 0
+        self.obstacle_speech_cooldown = 5.0  # 相同障碍物3秒内不重复播报
+        
+        # 掩码稳定化参数（已禁用光流外推，这些参数不再使用）
+        self.MASK_STAB_MIN_AREA = int(os.getenv("AIGLASS_MASK_MIN_AREA", "1500"))
+        self.MASK_STAB_KERNEL = int(os.getenv("AIGLASS_MASK_MORPH", "3"))
+        self.MASK_MISS_TTL = 0  # 【修改为0】禁用光流外推，完全实时
+        self.blind_miss_ttl = 0
+        self.cross_miss_ttl = 0
+        
+        # 光流跟踪参数
+        self.flow_iou_threshold = 0.3  # IoU低于此值时重新初始化光流点
+        
+        # 【新增】盲道YOLO检测间隔 - Day 22 优化: 增加间隔减少卡顿
+        self.BLINDPATH_DETECTION_INTERVAL = int(os.getenv("AIGLASS_BLINDPATH_INTERVAL", "10"))  # 从8帧增加到10帧
+        self.last_blindpath_detection_frame = 0
+        self.last_blindpath_mask = None
+        self.last_crosswalk_mask = None
+        
+        # 【新增】斑马线感知监控器
+        self.crosswalk_monitor = CrosswalkAwarenessMonitor()
+        logger.info("[BlindPath] 斑马线感知监控器已初始化")
+        logger.info(f"[BlindPath] 盲道检测间隔: 每{self.BLINDPATH_DETECTION_INTERVAL}帧")
+    
+    def init_traffic_light_detector(self):
+        """初始化红绿灯检测器"""
+        try:
+            # 首先尝试使用 YOLO 模型检测红绿灯
+            self.traffic_light_yolo = None
+            # 如果你有专门的红绿灯模型，在这里加载
+            # self.traffic_light_yolo = YOLO('path/to/traffic_light_model.pt')
+        except Exception as e:
+            logger.info(f"未加载红绿灯YOLO模型: {e}")
+    
+    def detect_traffic_light(self, image: np.ndarray) -> str:
+        """检测红绿灯状态
+        返回: 'red', 'green', 'yellow', 'unknown'
+        """
+        # 模拟模式（用于测试）
+        if os.getenv("AIGLASS_SIMULATE_TRAFFIC_LIGHT", "0") == "1":
+            # 根据帧数模拟红绿灯变化
+            cycle = (self.frame_counter // 100) % 3
+            if cycle == 0:
+                return "red"
+            elif cycle == 1:
+                return "yellow"
+            else:
+                return "green"
+        
+        # 如果有 YOLO 模型，优先使用
+        if self.traffic_light_yolo:
+            try:
+                results = self.traffic_light_yolo.predict(image, verbose=False, conf=0.3)
+                # TODO: 解析 YOLO 结果，判断红绿灯颜色
+                pass
+            except:
+                pass
+        
+        # 使用 HSV 颜色检测作为后备方案
+        return self._detect_traffic_light_by_color(image)
+    
+    def _detect_traffic_light_by_color(self, image: np.ndarray) -> str:
+        """基于 HSV 颜色空间检测红绿灯"""
+        h, w = image.shape[:2]
+        # 检测图像上半部分和中间部分（红绿灯可能在不同高度）
+        roi = image[:int(h * 0.7), :]  # 扩大检测范围到70%
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        
+        # 提高亮度的图像用于检测（有助于检测较暗的红绿灯）
+        hsv_bright = hsv.copy()
+        hsv_bright[:, :, 2] = cv2.add(hsv_bright[:, :, 2], 30)  # 增加亮度
+        
+        # 定义颜色范围（优化后的参数）
+        # 红色（两个范围，因为红色在 HSV 中跨越 0 度）
+        lower_red1 = np.array([0, 120, 100])
+        upper_red1 = np.array([10, 255, 255])
+        lower_red2 = np.array([170, 120, 100])
+        upper_red2 = np.array([180, 255, 255])
+        
+        # 绿色（调整为更宽的范围以适应不同灯光）
+        lower_green = np.array([40, 60, 60])
+        upper_green = np.array([90, 255, 255])
+        
+        # 黄色
+        lower_yellow = np.array([15, 100, 100])
+        upper_yellow = np.array([40, 255, 255])
+        
+        # 创建掩码（同时在原图和增亮图上检测）
+        mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
+        mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
+        mask_red1_bright = cv2.inRange(hsv_bright, lower_red1, upper_red1)
+        mask_red2_bright = cv2.inRange(hsv_bright, lower_red2, upper_red2)
+        mask_red = cv2.bitwise_or(cv2.bitwise_or(mask_red1, mask_red2), 
+                                 cv2.bitwise_or(mask_red1_bright, mask_red2_bright))
+        
+        mask_green = cv2.bitwise_or(cv2.inRange(hsv, lower_green, upper_green),
+                                   cv2.inRange(hsv_bright, lower_green, upper_green))
+        mask_yellow = cv2.bitwise_or(cv2.inRange(hsv, lower_yellow, upper_yellow),
+                                    cv2.inRange(hsv_bright, lower_yellow, upper_yellow))
+        
+        # 形态学操作去噪
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+        mask_red = cv2.morphologyEx(mask_red, cv2.MORPH_OPEN, kernel)
+        mask_green = cv2.morphologyEx(mask_green, cv2.MORPH_OPEN, kernel)
+        mask_yellow = cv2.morphologyEx(mask_yellow, cv2.MORPH_OPEN, kernel)
+        
+        # 计算每种颜色的面积
+        area_red = cv2.countNonZero(mask_red)
+        area_green = cv2.countNonZero(mask_green)
+        area_yellow = cv2.countNonZero(mask_yellow)
+        
+        # 设置最小面积阈值（降低阈值使检测更敏感）
+        min_area = 30  # 进一步降低阈值
+        
+        # 添加更详细的调试信息
+        if hasattr(self, 'frame_counter') and self.frame_counter % 30 == 0:
+            logger.info(f"[HSV检测] 红:{area_red}, 绿:{area_green}, 黄:{area_yellow}")
+            # 保存调试图像
+            if os.getenv("AIGLASS_DEBUG_TRAFFIC_LIGHT", "0") == "1":
+                debug_dir = "traffic_light_debug"
+                os.makedirs(debug_dir, exist_ok=True)
+                cv2.imwrite(f"{debug_dir}/frame_{self.frame_counter}_roi.jpg", roi)
+                cv2.imwrite(f"{debug_dir}/frame_{self.frame_counter}_red.jpg", mask_red)
+                cv2.imwrite(f"{debug_dir}/frame_{self.frame_counter}_green.jpg", mask_green)
+                cv2.imwrite(f"{debug_dir}/frame_{self.frame_counter}_yellow.jpg", mask_yellow)
+        
+        # 判断颜色（优先级：绿 > 红 > 黄）
+        if area_green > min_area and area_green > area_red * 0.8:  # 绿灯优先
+            return "green"
+        elif area_red > min_area and area_red > area_green:
+            return "red"
+        elif area_yellow > min_area:
+            return "yellow"
+        else:
+            return "unknown"
+    
+    def _get_voice_priority(self, guidance_text):
+        """获取语音指令的优先级
+        优先级：障碍物(100) > 转向/平移(50) > 保持直行(10)
+        """
+        if not guidance_text:
+            return 0
+        
+        # 障碍物播报 - 最高优先级
+        obstacle_keywords = ['前方有', '左侧有', '右侧有', '停一下', '注意避让', '障碍物']
+        for keyword in obstacle_keywords:
+            if keyword in guidance_text:
+                return 100
+        
+        # 转向和平移 - 中等优先级  
+        direction_keywords = ['左转', '右转', '左移', '右移', '向左', '向右', '平移', '微调']
+        for keyword in direction_keywords:
+            if keyword in guidance_text:
+                return 50
+        
+        # 保持直行 - 最低优先级
+        if '保持直行' in guidance_text or '继续前进' in guidance_text or '方向正确' in guidance_text:
+            return 10
+        
+        # 其他指令 - 默认中等优先级
+        return 30
+
+    def process_frame(self, image: np.ndarray) -> ProcessingResult:
+        """
+        处理单帧图像
+        :param image: BGR格式的图像
+        :return: 处理结果
+        """
+        # 【Day 15 性能诊断】帧处理计时
+        import time as perf_time
+        frame_start_time = perf_time.perf_counter()
+        timing_log = {}
+        
+        self.frame_counter += 1
+        
+        # 更新冷却期
+        if self.turn_cooldown_frames > 0:
+            self.turn_cooldown_frames -= 1
+        
+        image_height, image_width = image.shape[:2]
+        image_center_x = image_width / 2
+        
+        # 转换为灰度图
+        t0 = perf_time.perf_counter()
+        curr_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        timing_log['grayscale'] = (perf_time.perf_counter() - t0) * 1000
+        
+        # 可视化元素列表
+        frame_visualizations = []
+        guidance_text = ""
+        
+        # Day 20 优化: 并行检测 - 盲道和障碍物同时检测
+        # 统一检测间隔，减少总体延迟
+        UNIFIED_DETECTION_INTERVAL = int(os.getenv("AIGLASS_UNIFIED_INTERVAL", "20"))  # Day 21: 从10帧增加到20帧减少卡顿
+        
+        t0 = perf_time.perf_counter()
+        if self.frame_counter % UNIFIED_DETECTION_INTERVAL == 0:
+            # 尝试使用 GPU 并行检测
+            try:
+                from gpu_parallel import ParallelDetector
+                if not hasattr(self, '_parallel_detector'):
+                    self._parallel_detector = ParallelDetector(self.yolo_model, self.obstacle_detector)
+                    logger.info("[Day20] GPU 并行检测器已初始化")
+                
+                # 并行执行盲道检测和障碍物检测
+                blind_path_mask, crosswalk_mask, detected_obstacles = self._parallel_detector.detect_all(
+                    image, self.last_blindpath_mask
+                )
+                
+                self.last_blindpath_mask = blind_path_mask
+                self.last_crosswalk_mask = crosswalk_mask
+                self.last_detected_obstacles = detected_obstacles
+                self.last_obstacle_detection_frame = self.frame_counter
+                
+                timing_log['yolo'] = (perf_time.perf_counter() - t0) * 1000
+                timing_log['obstacle'] = 0  # 并行执行，计入 yolo 时间
+                
+            except ImportError:
+                # 回退到串行检测
+                logger.warning("[Day20] gpu_parallel 模块不可用，使用串行检测")
+                blind_path_mask, crosswalk_mask = self._detect_path_and_crosswalk(image)
+                self.last_blindpath_mask = blind_path_mask
+                self.last_crosswalk_mask = crosswalk_mask
+                timing_log['yolo'] = (perf_time.perf_counter() - t0) * 1000
+                
+                t0 = perf_time.perf_counter()
+                detected_obstacles = self._detect_obstacles(image, blind_path_mask)
+                self.last_detected_obstacles = detected_obstacles
+                self.last_obstacle_detection_frame = self.frame_counter
+                timing_log['obstacle'] = (perf_time.perf_counter() - t0) * 1000
+        else:
+            # 使用缓存
+            blind_path_mask = self.last_blindpath_mask
+            crosswalk_mask = self.last_crosswalk_mask
+            timing_log['yolo'] = 0
+            
+            if self.frame_counter - self.last_obstacle_detection_frame < self.OBSTACLE_CACHE_DURATION_FRAMES:
+                detected_obstacles = self.last_detected_obstacles
+            else:
+                detected_obstacles = []
+            timing_log['obstacle'] = 0  # 未执行检测
+        
+        # 添加所有障碍物的可视化（不只是近距离的）
+        t0 = perf_time.perf_counter()
+        for i, obs in enumerate(detected_obstacles):
+            self._add_obstacle_visualization(obs, frame_visualizations)
+        timing_log['obstacle_viz'] = (perf_time.perf_counter() - t0) * 1000
+        
+        # 【新增】检查近距离障碍物并设置语音
+        self._check_and_set_obstacle_voice(detected_obstacles)
+        
+        # 【新增】斑马线感知处理
+        # 【Day 15 优化】减少每帧日志输出，只在每 30 帧输出一次
+        if crosswalk_mask is not None and self.frame_counter % 30 == 0:
+            cross_pixels = np.sum(crosswalk_mask > 0)
+            if cross_pixels > 0:
+                logger.info(f"[斑马线] monitor: pixels={cross_pixels}, area={cross_pixels/crosswalk_mask.size*100:.2f}%")
+        elif crosswalk_mask is None and self.frame_counter % 30 == 0:
+                logger.info(f"[斑马线] crosswalk_mask为None")
+        
+        crosswalk_guidance = self.crosswalk_monitor.process_frame(crosswalk_mask, blind_path_mask)
+        if crosswalk_guidance:
+            logger.info(f"[斑马线感知] 检测结果: area={crosswalk_guidance.get('area', 0):.3f}, "
+                       f"should_broadcast={crosswalk_guidance.get('should_broadcast', False)}, "
+                       f"voice={crosswalk_guidance.get('voice_text', 'None')}")
+        if crosswalk_guidance and crosswalk_guidance['should_broadcast']:
+            # 将斑马线语音加入待播报列表（通过pending机制）
+            if not hasattr(self, 'pending_crosswalk_voice'):
+                self.pending_crosswalk_voice = None
+            self.pending_crosswalk_voice = crosswalk_guidance
+            logger.info(f"[斑马线语音] 已设置待播报语音: {crosswalk_guidance['voice_text']}, 优先级{crosswalk_guidance['priority']}")
+        
+        # 【新增】添加斑马线可视化
+        if crosswalk_mask is not None:
+            # 计算可视化数据
+            total_pixels = crosswalk_mask.size
+            crosswalk_pixels = np.sum(crosswalk_mask > 0)
+            area_ratio = crosswalk_pixels / total_pixels
+            
+            y_coords, x_coords = np.where(crosswalk_mask > 0)
+            if len(y_coords) > 0:
+                center_x_ratio = np.mean(x_coords) / crosswalk_mask.shape[1]
+                center_y_ratio = np.mean(y_coords) / crosswalk_mask.shape[0]
+                has_occlusion = self.crosswalk_monitor._check_occlusion(crosswalk_mask, blind_path_mask)
+                
+                # 获取可视化数据
+                viz_data = self.crosswalk_monitor.get_visualization_data(
+                    crosswalk_mask, area_ratio, center_x_ratio, center_y_ratio, has_occlusion
+                )
+                
+                # 添加斑马线mask可视化
+                self._add_mask_visualization(crosswalk_mask, frame_visualizations, 
+                                            "crosswalk_mask", viz_data['stage_color'])
+                
+                # 添加斑马线检测信息可视化
+                self._add_crosswalk_info_visualization(viz_data, image_height, image_width, 
+                                                      frame_visualizations)
+        
+        # 【已禁用】4. 更新斑马线追踪器 - 盲道导航不再跳转到斑马线
+        # self._update_crosswalk_tracker(crosswalk_mask, image_height, image_width)
+        
+        # 5. 添加路径可视化
+        # 【恢复】盲道mask可视化
+        self._add_mask_visualization(blind_path_mask, frame_visualizations, "blind_path_mask", "rgba(0, 255, 0, 0.4)")
+        # 【斑马线可视化由crosswalk_monitor处理，不在这里添加】
+        
+
+        # 【已禁用】5. 根据状态执行不同的导航逻辑 - 盲道导航不再处理斑马线
+        current_stage = 'not_detected'  # 固定为不检测斑马线
+        # current_stage = self.crosswalk_tracker['stage']  # 已禁用
+        
+        # 直接进行盲道导航，不检查斑马线状态
+        if False:  # current_stage == 'ready':
+            # 检查是否已经播报过准备提示
+            if not hasattr(self, 'crosswalk_ready_announced'):
+                self.crosswalk_ready_announced = False
+                self.crosswalk_ready_time = 0
+            
+            current_time = time.time()
+            
+            # 检测红绿灯
+            traffic_light_color = self.detect_traffic_light(image)
+            self.traffic_light_history.append(traffic_light_color)
+            
+            # 调试信息
+            if self.frame_counter % 30 == 0:  # 每30帧打印一次
+                logger.info(f"[红绿灯检测] 当前颜色: {traffic_light_color}, 历史: {list(self.traffic_light_history)}")
+            
+            # 多数表决，获得稳定的红绿灯状态
+            if len(self.traffic_light_history) >= 3:
+                color_counts = {}
+                for color in self.traffic_light_history:
+                    color_counts[color] = color_counts.get(color, 0) + 1
+                # 获取出现次数最多的颜色
+                stable_color = max(color_counts.items(), key=lambda x: x[1])[0]
+            else:
+                stable_color = "unknown"
+            
+            # 添加红绿灯状态可视化
+            self._add_traffic_light_visualization(
+                stable_color, frame_visualizations, image_height, image_width
+            )
+            
+            # 决定语音播报
+            if not self.crosswalk_ready_announced:
+                guidance_text = "已对准, 准备切换过马路模式。"
+                self.crosswalk_ready_announced = True
+                self.crosswalk_ready_time = current_time
+            elif stable_color == "green" and not self.green_light_announced:
+                guidance_text = "绿灯稳定，开始通行。"
+                self.green_light_announced = True
+            elif stable_color == "red":
+                # 红灯时定期提醒
+                if current_time - self.crosswalk_ready_time > 5.0:
+                    guidance_text = "正在等待绿灯…"
+                    self.crosswalk_ready_time = current_time
+                else:
+                    guidance_text = ""
+            else:
+                guidance_text = ""
+            
+            # 添加状态信息
+            frame_visualizations.append({
+                "type": "data_panel",
+                "data": {
+                    "状态": "等待过马路",
+                    "红绿灯": stable_color,
+                    "检测历史": len(self.traffic_light_history)
+                },
+                "position": (25, image_height - 120)
+            })
+            
+        elif False:  # current_stage == 'approaching':
+            guidance_text = self._handle_crosswalk_approaching(
+                frame_visualizations, image_height, image_width, image
+            )
+            
+        # elif current_stage in ['far', 'not_detected']:
+        else:  # 总是执行盲道导航
+            # 【已禁用】斑马线提示
+            # if current_stage == 'far' and not self.crosswalk_tracker['position_announced']:
+            #     guidance_text = "远处发现斑马线，继续直行。"
+            #     self.crosswalk_tracker['position_announced'] = True
+            
+            if blind_path_mask is None:
+                guidance_text = ""
+                # 【移除左上角文字，改为右上角数据面板】
+                frame_visualizations.append({
+                    "type": "data_panel",
+                    "data": {
+                        "状态": "等待盲道识别"
+                    },
+                    "position": (image_width - 180, 20)
+                })
+            else:
+                guidance_text = self._execute_state_machine(
+                    blind_path_mask, image, frame_visualizations,
+                    image_height, image_width, curr_gray
+                )
+        
+        # 6. 更新缓存
+        self.prev_gray = curr_gray
+        if blind_path_mask is not None:
+            self.prev_blind_path_mask = blind_path_mask.copy()
+        if crosswalk_mask is not None:
+            self.prev_crosswalk_mask = crosswalk_mask.copy()
+        
+        # 【改进】语音优先级管理系统
+        current_time = time.time()
+        
+        # 收集所有可能的语音指令
+        voice_candidates = []
+        
+        # 1. 添加主要导航语音
+        if guidance_text:
+            voice_candidates.append({
+                'text': guidance_text,
+                'priority': self._get_voice_priority(guidance_text),
+                'source': 'navigation'
+            })
+        
+        # 2. 检查是否有障碍物语音（独立检查，确保最高优先级）
+        if hasattr(self, 'pending_obstacle_voice'):
+            if self.pending_obstacle_voice:
+                voice_candidates.append({
+                    'text': self.pending_obstacle_voice,
+                    'priority': 100,  # 障碍物始终最高优先级
+                    'source': 'obstacle'
+                })
+                self.pending_obstacle_voice = None  # 清除已处理的障碍物语音
+        
+        # 【新增】检查是否有斑马线语音
+        if hasattr(self, 'pending_crosswalk_voice'):
+            if self.pending_crosswalk_voice:
+                voice_candidates.append({
+                    'text': self.pending_crosswalk_voice['voice_text'],
+                    'priority': self.pending_crosswalk_voice['priority'],
+                    'source': 'crosswalk'
+                })
+                self.pending_crosswalk_voice = None  # 清除已处理的斑马线语音
+        
+        # 3. 选择优先级最高的语音
+        if voice_candidates:
+            # 按优先级排序，取最高的
+            voice_candidates.sort(key=lambda x: x['priority'], reverse=True)
+            selected_voice = voice_candidates[0]
+            final_guidance_text = selected_voice['text']
+            
+            # 全局播报冷却（避免任何语音重叠）- Day 22 优化: 降低冷却
+            MIN_SPEECH_INTERVAL = 0.8  # 任意两条语音间隔至少0.8秒 (从1.2降低)
+            if hasattr(self, 'last_any_speech_time'):
+                if current_time - self.last_any_speech_time < MIN_SPEECH_INTERVAL:
+                    final_guidance_text = ""  # 太快了，跳过这次播报
+            
+            # 特殊处理保持直行的节流
+            if final_guidance_text == "保持直行":
+                if self.straight_continuous_mode:
+                    # 持续播报模式：只检查时间间隔
+                    if current_time - self.last_guide_time >= self.guide_interval:
+                        self.last_guide_time = current_time
+                        self.straight_repeat_count += 1
+                        self.last_any_speech_time = current_time
+                    else:
+                        final_guidance_text = ""
+                else:
+                    # 原有的限制模式
+                    if (current_time - self.last_guide_time >= self.guide_interval) and \
+                       (self.straight_repeat_count < self.straight_repeat_limit):
+                        self.last_guide_time = current_time
+                        self.straight_repeat_count += 1
+                        self.last_any_speech_time = current_time
+                    else:
+                        final_guidance_text = ""
+            elif final_guidance_text and selected_voice['source'] != 'obstacle':
+                # 【修改】非直行、非障碍物指令 - 支持方向指令持续播报
+                # 判断是否是方向指令
+                direction_keywords = ["左转", "右转", "左移", "右移", "向左", "向右", "平移", "微调"]
+                is_direction = any(keyword in final_guidance_text for keyword in direction_keywords)
+                
+                if is_direction:
+                    # 方向指令：支持持续播报
+                    if final_guidance_text == self.last_direction_message:
+                        # 同一个方向指令，检查时间间隔
+                        if current_time - self.last_direction_time >= self.direction_interval:
+                            self.last_direction_time = current_time
+                            self.last_any_speech_time = current_time
+                            self.straight_repeat_count = 0
+                        else:
+                            final_guidance_text = ""  # 时间间隔不够，跳过
+                    else:
+                        # 新的方向指令，立即播报
+                        self.last_direction_message = final_guidance_text
+                        self.last_direction_time = current_time
+                        self.last_any_speech_time = current_time
+                        self.straight_repeat_count = 0
+                else:
+                    # 其他指令：只播报一次
+                    if final_guidance_text != self.last_guidance_message:
+                        self.last_guidance_message = final_guidance_text
+                        self.straight_repeat_count = 0
+                        self.last_any_speech_time = current_time
+                    else:
+                        final_guidance_text = ""
+            elif final_guidance_text and selected_voice['source'] == 'obstacle':
+                # 障碍物语音总是播报
+                self.last_any_speech_time = current_time
+            elif final_guidance_text and selected_voice['source'] == 'crosswalk':
+                # 斑马线语音总是播报（不受重复检查限制）
+                self.last_any_speech_time = current_time
+                
+            # 播报选中的语音
+            if final_guidance_text:
+                try:
+                    # 【优化】组合语音只播第一部分，避免队列积压
+                    if selected_voice.get('source') == 'crosswalk' and ',' in final_guidance_text:
+                        voice_parts = split_combined_voice(final_guidance_text)
+                        logger.info(f"[斑马线语音] 组合播报检测到{len(voice_parts)}部分，只播第一部分保持实时")
+                        # 只播放第一部分，后续部分丢弃以保持实时性
+                        if voice_parts:
+                            # 【移除】play_voice_text(voice_parts[0]) - 由app_main统一处理
+                            final_guidance_text = voice_parts[0]  # 只保留第一部分
+                            logger.info(f"[语音待播] 优先级{selected_voice['priority']}: {voice_parts[0]}")
+                    else:
+                        # 【移除】play_voice_text(final_guidance_text) - 由app_main统一处理
+                        logger.info(f"[语音待播] 优先级{selected_voice['priority']}: {final_guidance_text}")
+                except Exception as e:
+                    logger.error(f"[语音播报] 播放失败: {e}")
+        else:
+            final_guidance_text = ""
+        
+        # 7. 生成标注图像
+        # Day 20 优化：移除 image.copy()，直接在原图上绘制（输入图像是临时的）
+        t0 = perf_time.perf_counter()
+        
+        if frame_visualizations:
+            annotated_image = self._draw_visualizations(image, frame_visualizations)
+        else:
+            annotated_image = image
+        
+        # 添加底部指令按钮（显示当前实际播报的语音）
+        current_instruction = final_guidance_text if final_guidance_text else "等待中..."
+        annotated_image = self._draw_command_button(annotated_image, current_instruction)
+        timing_log['visualization'] = (perf_time.perf_counter() - t0) * 1000
+        
+        # 【Day 15 性能诊断】每 30 帧输出一次详细性能报告
+        frame_total_time = (perf_time.perf_counter() - frame_start_time) * 1000
+        if self.frame_counter % 30 == 0:
+            logger.info(f"[PERF] Frame={self.frame_counter} 总耗时={frame_total_time:.1f}ms | "
+                       f"YOLO={timing_log.get('yolo', 0):.1f}ms, "
+                       f"障碍物={timing_log.get('obstacle', 0):.1f}ms, "
+                       f"可视化={timing_log.get('visualization', 0):.1f}ms, "
+                       f"灰度={timing_log.get('grayscale', 0):.1f}ms")
+        
+        # 8. 返回结果
+        # 【修改】返回 final_guidance_text（经过节流的），由 app_main 统一播放
+        return ProcessingResult(
+            guidance_text=final_guidance_text,
+            visualizations=frame_visualizations,
+            annotated_image=annotated_image,
+            state_info={
+                "state": self.current_state,
+                "crosswalk_stage": current_stage,
+                "frame_count": self.frame_counter
+            }
+        )
+    
+    def _detect_path_and_crosswalk(self, image: np.ndarray) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+        """检测盲道和斑马线 - Day 22 性能优化版本"""
+        if self.yolo_model is None:
+            # 【新增】没有模型时返回模拟数据用于测试
+            logger.warning("YOLO模型未加载，返回模拟数据")
+            h, w = image.shape[:2]
+            # 创建一个模拟的盲道掩码（垂直居中的条带）
+            blind_path_mask = np.zeros((h, w), dtype=np.uint8)
+            # 在图像中央创建一个宽度为图像宽度20%的垂直条带
+            strip_width = int(w * 0.2)
+            strip_left = (w - strip_width) // 2
+            blind_path_mask[int(h*0.3):, strip_left:strip_left+strip_width] = 255
+            return blind_path_mask, None
+        
+        blind_path_mask = None
+        crosswalk_mask = None
+        
+        try:
+            # Day 22 优化: 动态调整输入分辨率以提升性能
+            # 可通过环境变量 AIGLASS_YOLO_IMGSZ 配置，默认480（从640降低）
+            imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
+            
+            min_conf = min(self.CLASS_CONF_THRESHOLDS.values())
+            
+            # Day 22 优化: 使用 half 精度加速（如果GPU支持）
+            use_half = os.getenv("AIGLASS_YOLO_HALF", "1") == "1"
+            
+            results = self.yolo_model.predict(
+                image, 
+                verbose=False, 
+                conf=min_conf, 
+                classes=[0, 1],
+                imgsz=imgsz,      # 使用较小的输入尺寸
+                half=use_half     # FP16 半精度加速
+            )
+            
+            if (results and results[0] and results[0].masks is not None and 
+                results[0].boxes is not None and len(results[0].masks.data) > 0):
+                
+                for mask_tensor, conf_tensor, cls_tensor in zip(
+                    results[0].masks.data, results[0].boxes.conf, results[0].boxes.cls
+                ):
+                    class_id = int(cls_tensor.item())
+                    confidence = float(conf_tensor.item())
+                    threshold = self.CLASS_CONF_THRESHOLDS.get(class_id, 1.0)
+                    
+                    if confidence >= threshold:
+                        current_mask = self._tensor_to_mask(mask_tensor, image.shape[1], image.shape[0])
+                        
+                        if class_id == 1:  # 盲道
+                            if blind_path_mask is None:
+                                blind_path_mask = current_mask
+                            else:
+                                blind_path_mask = cv2.bitwise_or(blind_path_mask, current_mask)
+                        elif class_id == 0:  # 斑马线
+                            if crosswalk_mask is None:
+                                crosswalk_mask = current_mask
+                            else:
+                                crosswalk_mask = cv2.bitwise_or(crosswalk_mask, current_mask)
+        except Exception as e:
+            logger.error(f"YOLO检测失败: {e}")
+            # 【新增】检测失败时也返回模拟数据
+            h, w = image.shape[:2]
+            blind_path_mask = np.zeros((h, w), dtype=np.uint8)
+            strip_width = int(w * 0.2)
+            strip_left = (w - strip_width) // 2
+            blind_path_mask[int(h*0.3):, strip_left:strip_left+strip_width] = 255
+        
+        return blind_path_mask, crosswalk_mask
+    
+    def _tensor_to_mask(self, mask_tensor, out_w: int, out_h: int, binarize: bool = True) -> np.ndarray:
+        """将张量掩码转换为numpy数组"""
+        try:
+            import torch
+            
+            if not isinstance(mask_tensor, torch.Tensor):
+                arr = np.asarray(mask_tensor)
+                if arr.dtype != np.uint8:
+                    arr = (arr > 0.5).astype(np.uint8) * 255 if binarize else (arr * 255.0).astype(np.uint8)
+                mask_u8 = arr
+            else:
+                if mask_tensor.dtype in (torch.bfloat16, torch.float16):
+                    mask_tensor = mask_tensor.to(torch.float32)
+                
+                if mask_tensor.ndim > 2:
+                    mask_tensor = mask_tensor.squeeze()
+                
+                if binarize:
+                    mask_tensor = (mask_tensor > 0.5).to(torch.uint8).mul_(255)
+                    mask_u8 = mask_tensor.cpu().numpy()
+                else:
+                    mask_u8 = (mask_tensor.mul(255).clamp_(0, 255).to(torch.uint8)).cpu().numpy()
+            
+            if mask_u8.ndim == 3:
+                mask_u8 = mask_u8.squeeze(-1)
+            
+            if mask_u8.shape[1] != out_w or mask_u8.shape[0] != out_h:
+                mask_u8 = cv2.resize(mask_u8, (out_w, out_h), interpolation=cv2.INTER_NEAREST)
+            
+            return mask_u8
+        except ImportError:
+            # 如果没有torch，返回空掩码
+            return np.zeros((out_h, out_w), dtype=np.uint8)
+    
+    def _stabilize_mask(self, prev_gray, curr_gray, raw_mask, prev_stable_mask, mask_type):
+        """稳定化掩码 - 使用 Lucas-Kanade 光流"""
+        if mask_type == 'blind_path':
+            ttl = self.blind_miss_ttl
+            min_area = self.MASK_STAB_MIN_AREA
+        else:  # crosswalk
+            ttl = self.cross_miss_ttl
+            min_area = self.MASK_STAB_MIN_AREA
+        
+        # 调用新的光流稳定化方法
+        stable_mask = self._stabilize_seg_mask(
+            prev_gray, curr_gray, raw_mask, prev_stable_mask,
+            (curr_gray.shape[1], curr_gray.shape[0]) if curr_gray is not None else (640, 480),
+            min_area_px=min_area,
+            morph_kernel=self.MASK_STAB_KERNEL,
+            mask_type=mask_type
+        )
+        
+        if stable_mask is not None:
+            # 重置TTL
+            if mask_type == 'blind_path':
+                self.blind_miss_ttl = self.MASK_MISS_TTL
+            else:
+                self.cross_miss_ttl = self.MASK_MISS_TTL
+            return stable_mask
+        else:
+            # 减少TTL
+            if mask_type == 'blind_path':
+                self.blind_miss_ttl = max(0, self.blind_miss_ttl - 1)
+            else:
+                self.cross_miss_ttl = max(0, self.cross_miss_ttl - 1)
+            return None
+    
+    def _stabilize_seg_mask(self, prev_gray, curr_gray, curr_mask, prev_stable_mask, 
+                          image_wh, min_area_px=1500, morph_kernel=3, iou_high_thr=0.4, mask_type='', 
+                          fast_clear=True):
+        """使用 Lucas-Kanade 光流的掩码稳定化实现"""
+        W, H = image_wh
+        
+        def _binarize(mask):
+            if mask is None:
+                return None
+            if mask.dtype != np.uint8:
+                mask = mask.astype(np.uint8)
+            mask = (mask > 0).astype(np.uint8) * 255
+            return mask
+        
+        def _morph_smooth(mask, kernel_size):
+            if mask is None:
+                return None
+            k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, 
+                                         (max(1, kernel_size), max(1, kernel_size)))
+            sm = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k, iterations=1)
+            sm = cv2.morphologyEx(sm, cv2.MORPH_OPEN, k, iterations=1)
+            return sm
+        
+        curr_mask_b = _binarize(curr_mask)
+        prev_mask_b = _binarize(prev_stable_mask)
+        
+        # 如果没有历史数据，直接返回当前掩码
+        if prev_mask_b is None or prev_gray is None or curr_gray is None:
+            return _morph_smooth(curr_mask_b, morph_kernel) if curr_mask_b is not None else None
+        
+        # 当前帧有检测结果
+        if curr_mask_b is not None and np.sum(curr_mask_b > 0) >= min_area_px:
+            # 计算与上一帧的IoU
+            if prev_mask_b is not None:
+                inter = np.logical_and(curr_mask_b > 0, prev_mask_b > 0).sum()
+                union = np.logical_or(curr_mask_b > 0, prev_mask_b > 0).sum()
+                iou = float(inter) / float(union) if union > 0 else 0.0
+                
+                # IoU足够高，说明检测稳定，直接使用当前结果
+                if iou >= iou_high_thr:
+                    return _morph_smooth(curr_mask_b, morph_kernel)
+                
+                # IoU较低但仍有重叠，进行加权融合
+                elif iou > 0.1:
+                    # 使用光流预测的掩码
+                    flow_mask = self._predict_mask_with_flow(prev_mask_b, prev_gray, curr_gray)
+                    if flow_mask is not None:
+                        # 根据IoU动态调整权重
+                        # IoU越低，越依赖光流；IoU越高，越依赖当前检测
+                        w_curr = min(0.9, 0.4 + iou)  # IoU=0.1时w_curr=0.5, IoU=0.5时w_curr=0.9
+                        w_flow = 1.0 - w_curr
+                        
+                        fused = (w_curr * curr_mask_b.astype(np.float32) + 
+                                w_flow * flow_mask.astype(np.float32))
+                        fused_bin = (fused >= 128).astype(np.uint8) * 255
+                        
+                        # 重新初始化光流点（如果IoU过低）
+                        if iou < self.flow_iou_threshold:
+                            self.flow_points['blind_path'] = None
+                        
+                        return _morph_smooth(fused_bin, morph_kernel)
+            
+            # 没有历史或IoU太低，使用当前检测
+            return _morph_smooth(curr_mask_b, morph_kernel)
+        
+        # 当前帧没有检测结果，尝试使用光流外推
+        else:
+            # 获取对应的TTL
+            if mask_type == 'blind_path':
+                ttl = self.blind_miss_ttl
+            else:
+                ttl = self.cross_miss_ttl
+            
+            # 【修改】当前帧无检测结果，快速清除
+            if fast_clear and ttl <= 1:
+                # TTL耗尽，立即返回None，不使用光流
+                return None
+                
+            if prev_mask_b is not None and np.sum(prev_mask_b > 0) >= min_area_px and ttl > 0:
+                # 使用光流预测
+                flow_mask = self._predict_mask_with_flow(prev_mask_b, prev_gray, curr_gray)
+                if flow_mask is not None and np.sum(flow_mask > 0) >= min_area_px * 0.5:
+                    return _morph_smooth(flow_mask, morph_kernel)
+            
+            # 光流失败或超过TTL
+            return None
+    
+    def _predict_mask_with_flow(self, prev_mask, prev_gray, curr_gray):
+        """使用Lucas-Kanade光流预测掩码位置（改进版）"""
+        try:
+            # 方法1：尝试使用凸包方法（参考yolomedia）
+            if hasattr(self, 'flow_points') and 'blind_path' in self.flow_points:
+                p0 = self.flow_points['blind_path']
+                if p0 is not None and len(p0) >= 5:
+                    # 计算光流
+                    p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, p0, None, **self.lk_params)
+                    
+                    if p1 is not None and st is not None:
+                        good_new = p1[st == 1]
+                        if len(good_new) >= 5:
+                            # 更新光流点
+                            self.flow_points['blind_path'] = good_new.reshape(-1, 1, 2)
+                            
+                            # 生成凸包掩码
+                            hull = cv2.convexHull(good_new.reshape(-1, 1, 2))
+                            poly = hull.reshape(-1, 2)
+                            
+                            if len(poly) >= 3:
+                                H, W = curr_gray.shape[:2]
+                                flow_mask = np.zeros((H, W), dtype=np.uint8)
+                                cv2.fillPoly(flow_mask, [poly.astype(np.int32)], 255)
+                                return flow_mask
+            
+            # 方法2：边缘特征点方法（原有方法，作为备选）
+            edge_mask = self._get_edge_mask(prev_mask, offset=10)
+            
+            # 检测特征点
+            p0 = cv2.goodFeaturesToTrack(prev_gray, mask=edge_mask, **self.feature_params)
+            if p0 is None or len(p0) < 8:
+                return None
+            
+            # 保存特征点供下次使用
+            self.flow_points['blind_path'] = p0
+            
+            # 计算光流
+            p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, p0, None, **self.lk_params)
+            
+            if p1 is None or st is None:
+                return None
+            
+            # 只保留成功追踪的点
+            good_new = p1[st == 1]
+            good_old = p0[st == 1]
+            
+            if len(good_new) < 5:
+                return None
+            
+            # 估计变换矩阵（使用RANSAC提高鲁棒性）
+            M, inliers = cv2.estimateAffinePartial2D(good_old, good_new, method=cv2.RANSAC, ransacReprojThreshold=5.0)
+            
+            if M is None:
+                return None
+            
+            # 应用变换
+            H, W = curr_gray.shape[:2]
+            flow_mask = cv2.warpAffine(prev_mask, M, (W, H), 
+                                    flags=cv2.INTER_NEAREST,
+                                    borderMode=cv2.BORDER_CONSTANT,
+                                    borderValue=0)
+            
+            return flow_mask
+            
+        except Exception as e:
+            logger.debug(f"光流预测失败: {e}")
+            return None
+            
+    
+    def _get_edge_mask(self, mask, offset=10):
+        """获取掩码的内边缘区域，用于特征点检测"""
+        if mask is None:
+            return None
+        
+        # 腐蚀得到内部掩码
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (offset*2, offset*2))
+        inner = cv2.erode(mask, kernel, iterations=1)
+        
+        # 边缘 = 原始 - 内部
+        edge = cv2.subtract(mask, inner)
+        
+        # 稍微膨胀边缘区域
+        kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+        edge = cv2.dilate(edge, kernel_small, iterations=1)
+        
+        return edge
+
+    def _smooth_centerline(self, centerline_data):
+        """平滑中心线数据，减少抖动"""
+        if centerline_data is None or len(centerline_data) < 5:
+            return centerline_data
+        
+        # 保存到历史记录
+        self.centerline_history.append(centerline_data.copy())
+        if len(self.centerline_history) > self.centerline_history_max:
+            self.centerline_history.pop(0)
+        
+        # 如果历史记录不足，返回轻度平滑的当前帧数据
+        if len(self.centerline_history) < 3:
+            # 对当前帧进行空间平滑
+            smoothed_data = centerline_data.copy()
+            # 使用滑动窗口平均
+            window_size = 5
+            for i in range(len(smoothed_data)):
+                start_idx = max(0, i - window_size // 2)
+                end_idx = min(len(smoothed_data), i + window_size // 2 + 1)
+                window = smoothed_data[start_idx:end_idx]
+                if len(window) > 0:
+                    smoothed_data[i, 1] = np.mean(window[:, 1])  # 平滑x坐标
+                    smoothed_data[i, 2] = np.mean(window[:, 2])  # 平滑宽度
+            return smoothed_data
+        
+        # 时间平滑：使用历史帧的加权平均
+        smoothed_data = centerline_data.copy()
+        
+        # 为每个y坐标找到历史帧中对应的数据
+        for i, (y, x, width) in enumerate(centerline_data):
+            x_values = [x]
+            width_values = [width]
+            weights = [1.0]  # 当前帧权重最高
+            
+            # 从历史帧中查找相近y坐标的数据
+            for hist_idx, hist_data in enumerate(self.centerline_history[-3:-1]):  # 使用最近的2帧历史
+                # 找到最接近的y坐标
+                y_diffs = np.abs(hist_data[:, 0] - y)
+                if len(y_diffs) > 0:
+                    closest_idx = np.argmin(y_diffs)
+                    if y_diffs[closest_idx] < 10:  # y坐标差异小于10像素
+                        x_values.append(hist_data[closest_idx, 1])
+                        width_values.append(hist_data[closest_idx, 2])
+                        # 历史帧权重递减
+                        weights.append(0.5 ** (len(self.centerline_history) - hist_idx - 1))
+            
+            # 加权平均
+            if len(x_values) > 1:
+                weights = np.array(weights)
+                weights = weights / np.sum(weights)
+                smoothed_data[i, 1] = np.sum(np.array(x_values) * weights)
+                smoothed_data[i, 2] = np.sum(np.array(width_values) * weights)
+        
+        # 空间平滑：对结果再进行一次滑动窗口平均
+        window_size = 3
+        final_data = smoothed_data.copy()
+        for i in range(len(final_data)):
+            start_idx = max(0, i - window_size // 2)
+            end_idx = min(len(final_data), i + window_size // 2 + 1)
+            window = smoothed_data[start_idx:end_idx]
+            if len(window) > 0:
+                final_data[i, 1] = np.mean(window[:, 1])
+                final_data[i, 2] = np.mean(window[:, 2])
+        
+        return final_data
+
+    def _estimate_affine(self, prev_gray, curr_gray, mask=None):
+        """使用光流估计仿射变换（备用方法）"""
+        try:
+            # 提取特征点
+            if mask is not None:
+                p0 = cv2.goodFeaturesToTrack(prev_gray, mask=mask, **self.feature_params)
+            else:
+                p0 = cv2.goodFeaturesToTrack(prev_gray, **self.feature_params)
+            
+            if p0 is None or len(p0) < 4:
+                return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+            
+            # 计算光流
+            p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, p0, None, **self.lk_params)
+            
+            if p1 is None or st is None:
+                return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+            
+            # 只保留好的点
+            good_new = p1[st == 1].reshape(-1, 2)
+            good_old = p0[st == 1].reshape(-1, 2)
+            
+            if len(good_new) < 4:
+                return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+            
+            # 估计仿射变换
+            M, _ = cv2.estimateAffinePartial2D(good_old, good_new, method=cv2.RANSAC)
+            
+            if M is None:
+                return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+            
+            return M
+            
+        except Exception as e:
+            logger.debug(f"仿射估计失败: {e}")
+            return np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
+    
+    def _warp_mask(self, mask, M, output_shape):
+        """应用仿射变换"""
+        try:
+            W, H = output_shape
+            warped = cv2.warpAffine(mask, M, (W, H), 
+                                   flags=cv2.INTER_NEAREST,
+                                   borderMode=cv2.BORDER_CONSTANT,
+                                   borderValue=0)
+            return warped
+        except:
+            return None
+    
+    def _add_mask_visualization(self, mask, visualizations, viz_type, color, add_outline=True):
+        """添加掩码可视化（增加描边）"""
+        if mask is None:
+            return
+        
+        try:
+            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if contours:
+                main_contour = max(contours, key=cv2.contourArea)
+                points = main_contour.squeeze(1)[::5].tolist()
+                
+                # 添加填充
+                visualizations.append({
+                    "type": viz_type,
+                    "points": points,
+                    "color": color
+                })
+                
+                # 添加描边（盲道不添加描边）
+                if add_outline and viz_type != "blind_path_mask":
+                    visualizations.append({
+                        "type": "outline",
+                        "points": points,
+                        "color": "rgba(255, 255, 255, 0.8)",  # 白色描边
+                        "thickness": 3
+                    })
+        except:
+            pass
+
+    
+    def _update_crosswalk_tracker(self, crosswalk_mask, image_height, image_width):
+        """更新斑马线追踪器"""
+        if crosswalk_mask is not None:
+            self.crosswalk_tracker['consecutive_frames'] += 1
+            self.crosswalk_tracker['last_seen_frame'] = self.frame_counter
+            
+            # 计算关键指标
+            total_area = image_height * image_width
+            area_ratio = np.sum(crosswalk_mask > 0) / total_area
+            y_coords, x_coords = np.where(crosswalk_mask > 0)
+            
+            if len(y_coords) > 0:
+                bottom_y_ratio = np.max(y_coords) / image_height
+                center_x_ratio = np.mean(x_coords) / image_width
+                
+                self.crosswalk_tracker['last_area_ratio'] = area_ratio
+                self.crosswalk_tracker['last_bottom_y_ratio'] = bottom_y_ratio
+                self.crosswalk_tracker['last_center_x_ratio'] = center_x_ratio
+                
+                # 计算角度
+                try:
+                    contours, _ = cv2.findContours(crosswalk_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    if contours:
+                        main_contour = max(contours, key=cv2.contourArea)
+                        rect = cv2.minAreaRect(main_contour)
+                        angle = rect[-1]
+                        w, h = rect[1]
+                        if w < h:
+                            angle += 90
+                        self.crosswalk_tracker['last_angle'] = angle
+                except:
+                    self.crosswalk_tracker['last_angle'] = 0.0
+                
+                # 状态切换
+                is_ready_to_switch = (
+                    area_ratio >= self.CROSSWALK_SWITCH_AREA_RATIO and
+                    bottom_y_ratio >= self.CROSSWALK_SWITCH_BOTTOM_RATIO or
+                    (self.crosswalk_tracker['consecutive_frames'] >= self.CROSSWALK_SWITCH_CONSECUTIVE_FRAMES 
+                     and area_ratio > 0.18)
+                )
+                
+                if is_ready_to_switch and self.crosswalk_tracker['alignment_status'] == 'aligned':
+                    if self.crosswalk_tracker['stage'] != 'ready':
+                        self.crosswalk_tracker['stage'] = 'ready'
+                elif area_ratio > 0.07 or bottom_y_ratio > 0.75:
+                    if self.crosswalk_tracker['stage'] in ['far', 'not_detected']:
+                        self.crosswalk_tracker['stage'] = 'approaching'
+                elif area_ratio > 0.01:
+                    if self.crosswalk_tracker['stage'] == 'not_detected':
+                        self.crosswalk_tracker['stage'] = 'far'
+        else:
+            # 丢失检测
+            if self.frame_counter - self.crosswalk_tracker['last_seen_frame'] > 15:
+                self.crosswalk_tracker['stage'] = 'not_detected'
+                self.crosswalk_tracker['consecutive_frames'] = 0
+                self.crosswalk_tracker['position_announced'] = False
+                self.crosswalk_tracker['alignment_status'] = 'not_aligned'
+                # 重置准备状态标志
+                if hasattr(self, 'crosswalk_ready_announced'):
+                    self.crosswalk_ready_announced = False
+                    self.crosswalk_ready_time = 0
+                if hasattr(self, 'traffic_light_history'):
+                    self.traffic_light_history.clear()
+                    self.green_light_announced = False
+    
+    def _handle_crosswalk_approaching(self, frame_visualizations, image_height, image_width, image):
+        """处理接近斑马线的情况"""
+        # 障碍物检测
+        if self.obstacle_detector and self.frame_counter % self.OBSTACLE_DETECTION_INTERVAL == 0:
+            detected_obstacles = self._detect_obstacles(image)
+            self.last_detected_obstacles = detected_obstacles
+            self.last_obstacle_detection_frame = self.frame_counter
+        
+        # 添加障碍物可视化
+        for obs in self.last_detected_obstacles:
+            self._add_obstacle_visualization(obs, frame_visualizations)
+        
+        # 优先检查近距离障碍物（提高阈值，只有非常近才报警）
+        NEAR_DISTANCE_Y_THRESHOLD = 0.75  # 提高到0.75
+        NEAR_DISTANCE_AREA_THRESHOLD = 0.12  # 提高到0.12
+        near_obstacles = [
+            obs for obs in self.last_detected_obstacles
+            if (obs.get('bottom_y_ratio', 0) > NEAR_DISTANCE_Y_THRESHOLD or
+                obs.get('area_ratio', 0) > NEAR_DISTANCE_AREA_THRESHOLD)
+        ]
+        
+        # 如果有近距离障碍物，应用相同的播报逻辑
+        if near_obstacles:
+            main_obstacle = near_obstacles[0]
+            obstacle_name = main_obstacle.get('name', '')
+            current_time = time.time()
+            
+            # 检查是否需要播报（避免重复）
+            should_announce = False
+            if obstacle_name != self.last_obstacle_speech:
+                should_announce = True
+                self.last_obstacle_speech = obstacle_name
+                self.last_obstacle_speech_time = current_time
+            elif current_time - self.last_obstacle_speech_time > self.obstacle_speech_cooldown:
+                should_announce = True
+                self.last_obstacle_speech_time = current_time
+            
+            if should_announce:
+                return self._speech_for_obstacle(obstacle_name)
+        else:
+            # 没有障碍物时清空记录
+            self.last_obstacle_speech = ""
+        
+        # 对准逻辑
+        if self.crosswalk_tracker['alignment_status'] == 'not_aligned':
+            guidance_text = "正在接近斑马线，为您对准方向。"
+            self.crosswalk_tracker['alignment_status'] = 'aligning'
+        else:
+            angle = self.crosswalk_tracker['last_angle']
+            center_x_ratio = self.crosswalk_tracker['last_center_x_ratio']
+            
+            ANGLE_ALIGN_THRESHOLD = 15
+            POSITION_ALIGN_THRESHOLD = 0.25
+            
+            if abs(angle) > ANGLE_ALIGN_THRESHOLD:
+                guidance_text = "右转" if angle < 0 else "左转"
+            elif abs(center_x_ratio - 0.5) > (POSITION_ALIGN_THRESHOLD / 2):
+                guidance_text = "右移" if center_x_ratio < 0.5 else "左移"
+            else:
+                self.crosswalk_tracker['alignment_status'] = 'aligned'
+                guidance_text = "斑马线已对准，继续前行。"
+        
+        # 添加数据面板
+        data_for_panel = {
+            "状态": "对准斑马线",
+            "引导": guidance_text,
+            "角度": f"{self.crosswalk_tracker['last_angle']:.1f}°",
+            "偏移": f"{(self.crosswalk_tracker['last_center_x_ratio'] - 0.5):.2f}"
+        }
+        frame_visualizations.append({
+            "type": "data_panel",
+            "data": data_for_panel,
+            "position": (25, image_height - 75)
+        })
+        
+        return guidance_text
+    
+    def _execute_state_machine(self, mask, image, frame_visualizations, 
+                              image_height, image_width, curr_gray):
+        """执行状态机逻辑"""
+        if self.current_state == STATE_ONBOARDING:
+            return self._handle_onboarding(mask, image, frame_visualizations, 
+                                         image_height, image_width)
+        elif self.current_state == STATE_NAVIGATING:
+            return self._handle_navigating(mask, image, frame_visualizations,
+                                         image_height, image_width, curr_gray)
+        elif self.current_state == STATE_MANEUVERING_TURN:
+            return self._handle_maneuvering_turn(mask, image, frame_visualizations,
+                                               image_height, image_width)
+        elif self.current_state == STATE_LOCKING_ON:
+            return self._handle_locking_on(frame_visualizations)
+        elif self.current_state == STATE_AVOIDING_OBSTACLE:
+            return self._handle_avoiding_obstacle(mask, image, frame_visualizations,
+                                                image_height, image_width)
+        
+        return ""
+    
+    def _handle_onboarding(self, mask, image, frame_visualizations, image_height, image_width):
+        """处理上盲道状态"""
+        image_center_x = image_width / 2
+        vp_features = self._get_vanishing_point_features(mask)
+        
+        if vp_features and vp_features['fit_error'] < self.VP_FIT_ERROR_THRESHOLD:
+            # 使用灭点法
+            VP, L_center = vp_features["VP"], vp_features["L_center"]
+            
+            if self.onboarding_step == ONBOARDING_STEP_ROTATION:
+                if abs(VP[0] - image_center_x) < (image_width * self.ONBOARDING_ALIGN_THRESHOLD_RATIO):
+                    guidance_text = "方向已对正！现在校准位置。"
+                    self.onboarding_step = ONBOARDING_STEP_TRANSLATION
+                else:
+                    guidance_text = "请向左转动。" if VP[0] < image_center_x else "请向右转动。"
+                
+                angle_error_px = VP[0] - image_center_x
+                self._add_data_panel(frame_visualizations, {
+                    "状态": "上盲道 (方向)",
+                    "引导": guidance_text,
+                    "角度": f"{angle_error_px:.1f}px",
+                    "偏移": "待校准"
+                }, (25, image_height - 75))
+                
+            elif self.onboarding_step == ONBOARDING_STEP_TRANSLATION:
+                L_center_bottom_x = self._calculate_line_x_at_y(L_center, image_height - 1)
+                
+                if L_center_bottom_x:
+                    center_offset_pixels = L_center_bottom_x - image_center_x
+                    center_offset_ratio = abs(center_offset_pixels) / image_width
+                    
+                    if center_offset_ratio < self.ONBOARDING_CENTER_OFFSET_THRESHOLD_RATIO:
+                        guidance_text = "校准完成！您已在盲道上，开始前行。"
+                        self.current_state = STATE_NAVIGATING
+                    else:
+                        guidance_text = "请向左平移。" if L_center_bottom_x < image_center_x else "请向右平移。"
+                    
+                    self._add_data_panel(frame_visualizations, {
+                        "状态": "上盲道 (位置)",
+                        "引导": guidance_text,
+                        "角度": "已对准",
+                        "偏移": f"{center_offset_ratio * 100:.1f}%"
+                    }, (25, image_height - 75))
+                else:
+                    guidance_text = "请向前移动，让盲道更清晰。"
+        else:
+            # 使用像素域方法
+            pixel_features = self._get_pixel_domain_features(mask, image.shape)
+            if not pixel_features:
+                return ""
+            self._add_navigation_info_visualization(pixel_features, image_height, image_width, frame_visualizations)
+            guidance_text = self._handle_pixel_domain_onboarding(
+                pixel_features, image_height, image_width, frame_visualizations
+            )
+        
+        return guidance_text
+    
+    def _handle_navigating(self, mask, image, frame_visualizations, 
+                          image_height, image_width, curr_gray):
+        """处理常规导航状态"""
+        image_center_x = image_width / 2
+        
+        # 提取路径特征
+        features = self._get_pixel_domain_features(mask, image.shape)
+        if not features:
+            return "路径特征提取失败"
+        self._add_navigation_info_visualization(features, image_height, image_width, frame_visualizations)
+        
+        # 转弯检测
+        if self.turn_cooldown_frames == 0:
+            corner_info = self._detect_sharp_corner(features['centerline_data'])
+            if corner_info:
+                self._update_turn_tracker(corner_info)
+                
+                if self.turn_detection_tracker['consecutive_hits'] >= 3:
+                    stable_corner_info = self.turn_detection_tracker['corner_info']
+                    corner_y = stable_corner_info['corner_point_pixel'][1]
+                    turn_trigger_y_threshold = image_height * 0.65
+                    
+                    if corner_y > turn_trigger_y_threshold:
+                        # 触发转弯
+                        direction_text = '右' if self.turn_detection_tracker['direction'] == 'right' else '左'
+                        self.current_state = STATE_MANEUVERING_TURN
+                        self.maneuver_target_info = stable_corner_info
+                        self.maneuver_step = MANEUVER_STEP_1_ISSUE_COMMAND
+                        self._reset_turn_tracker()
+                        # 不再播报"到达转弯处"，直接返回空字符串，让后续逻辑处理
+                        return ""
+                    else:
+                        # 不再预告转弯，继续常规导航
+                        pass
+        
+        # 优先级1：障碍物检测（最高优先级）
+        obstacles = self._check_obstacles(image, mask, frame_visualizations)
+        if obstacles:
+            # 获取主要障碍物
+            main_obstacle = obstacles[0]
+            obstacle_name = main_obstacle.get('name', '')
+            current_time = time.time()
+            
+            # 检查是否需要播报（避免重复）
+            should_announce = False
+            if obstacle_name != self.last_obstacle_speech:
+                # 不同障碍物，立即播报
+                should_announce = True
+                self.last_obstacle_speech = obstacle_name
+                self.last_obstacle_speech_time = current_time
+            elif current_time - self.last_obstacle_speech_time > self.obstacle_speech_cooldown:
+                # 同一障碍物但超过冷却时间，再次播报
+                should_announce = True
+                self.last_obstacle_speech_time = current_time
+            
+            if should_announce:
+                # 不进入完整的避障流程，只是警告
+                # 设置待播报的障碍物语音，而不是直接返回
+                self.pending_obstacle_voice = self._speech_for_obstacle(obstacle_name)
+            # 如果不需要播报，继续常规导航
+        else:
+            # 没有障碍物，清空记录
+            self.last_obstacle_speech = ""
+            self.pending_obstacle_voice = None
+        
+        # 优先级2：常规导航（左移/右移/左转/右转 > 直行）
+        return self._generate_navigation_guidance(
+            features, image_height, image_width, frame_visualizations
+        )
+    
+    def _handle_maneuvering_turn(self, mask, image, frame_visualizations,
+                                image_height, image_width):
+        """处理转弯状态"""
+        features = self._get_pixel_domain_features(mask, image.shape)
+        if not features:
+            return "丢失路径，重新搜索。"
+        self._add_navigation_info_visualization(features, image_height, image_width, frame_visualizations)
+        if self.maneuver_step == MANEUVER_STEP_1_ISSUE_COMMAND:
+            direction_text = '右' if self.maneuver_target_info['direction'] == 'right' else '左'
+            guidance_text = f"请向{direction_text}平移。"
+            
+            poly_func = features['poly_func']
+            y_check = image_height * 0.7
+            self.maneuver_target_info['old_path_center_x'] = poly_func(y_check)
+            
+            self.maneuver_step = MANEUVER_STEP_2_WAIT_FOR_SHIFT
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "处理转弯",
+                "引导": guidance_text,
+                "步骤": "发出指令",
+                "方向": direction_text
+            }, (25, image_height - 75))
+            
+            return guidance_text
+            
+        elif self.maneuver_step == MANEUVER_STEP_2_WAIT_FOR_SHIFT:
+            old_path_x = self.maneuver_target_info.get('old_path_center_x')
+            if old_path_x is None:
+                self.maneuver_step = MANEUVER_STEP_1_ISSUE_COMMAND
+                return ""
+            
+            poly_func = features['poly_func']
+            y_check = image_height * 0.7
+            current_path_x = poly_func(y_check)
+            shift_distance = abs(current_path_x - old_path_x)
+            
+            centerline_data = features['centerline_data']
+            width_at_check_y = self._get_width_at_y(centerline_data, y_check)
+            
+            if shift_distance > (width_at_check_y * 0.5):
+                guidance_text = "检测到已移动，开始对准新方向。"
+                self.maneuver_step = MANEUVER_STEP_3_ALIGN_ON_NEW_PATH
+            else:
+                direction_text = '右' if self.maneuver_target_info['direction'] == 'right' else '左'
+                guidance_text = f"请继续向{direction_text}平移。"
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "处理转弯",
+                "引导": guidance_text,
+                "步骤": "等待平移",
+                "偏移量": f"{shift_distance:.1f}px"
+            }, (25, image_height - 75))
+            
+            return guidance_text
+            
+        elif self.maneuver_step == MANEUVER_STEP_3_ALIGN_ON_NEW_PATH:
+            poly_func = features['poly_func']
+            y_check = image_height * 0.5
+            current_path_x_at_center = poly_func(y_check)
+            
+            pixel_error = current_path_x_at_center - image_width / 2
+            center_offset_ratio = abs(pixel_error) / image_width
+            
+            if center_offset_ratio < self.NAV_CENTER_OFFSET_THRESHOLD_RATIO:
+                guidance_text = "已对准新路径，请向前直行。"
+                self.current_state = STATE_NAVIGATING
+                self.maneuver_target_info = None
+                self.turn_cooldown_frames = self.TURN_COOLDOWN_DURATION
+            else:
+                move_direction = "右" if pixel_error > 0 else "左"
+                guidance_text = f"请向{move_direction}微调，对准盲道。"
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "处理转弯",
+                "引导": guidance_text,
+                "步骤": "对准新路径",
+                "误差": f"{center_offset_ratio * 100:.1f}%"
+            }, (25, image_height - 75))
+            
+            return guidance_text
+    
+    def _handle_locking_on(self, frame_visualizations):
+        """处理锁定状态"""
+        if not self.lock_on_data:
+            self.current_state = STATE_NAVIGATING
+            return ""
+        
+        main_obstacle = self.lock_on_data['main_obstacle']
+        
+        # 添加脉冲特效
+        self._add_obstacle_visualization(main_obstacle, frame_visualizations, pulse_effect=True)
+        
+        # 检查时间
+        if time.time() - self.lock_on_data['start_time'] > 0.7:
+            self.avoidance_plan = self.lock_on_data['avoidance_plan']
+            self.avoidance_step_index = 0
+            self.current_state = STATE_AVOIDING_OBSTACLE
+            self.lock_on_data = None
+        
+        return ""
+    
+    def _handle_avoiding_obstacle(self, mask, image, frame_visualizations,
+                                 image_height, image_width):
+        """处理避障状态"""
+        if not self.avoidance_plan or self.avoidance_step_index >= len(self.avoidance_plan):
+            self.current_state = STATE_NAVIGATING
+            self.avoidance_plan = None
+            return "避让完成，已回到盲道。"
+        
+        step = self.avoidance_plan[self.avoidance_step_index]
+        
+        if step['type'] == 'sidestep_clear':
+            direction = step['direction']
+            
+            if self.obstacle_detector:
+                final_obstacles = self._detect_obstacles(image, mask)
+            else:
+                final_obstacles = []
+            
+            if final_obstacles:
+                guidance_text = f"路径被挡住，请向{'右' if direction == 'right' else '左'}侧平移。"
+            else:
+                guidance_text = "好的，请停下侧移。"
+                self.avoidance_step_index += 1
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "避障中",
+                "引导": guidance_text,
+                "步骤": "侧向移出",
+                "方向": direction
+            }, (25, image_height - 75))
+            
+            return guidance_text
+            
+        elif step['type'] == 'forward_pass':
+            # 简化处理，直接进入下一步
+            self.avoidance_step_index += 1
+            return "向前直行几步越过障碍物。然后说‘好了’。"
+            
+        elif step['type'] == 'sidestep_return':
+            direction = step['direction']
+            features = self._get_pixel_domain_features(mask, image.shape)
+            
+            if not features:
+                return f"没看到盲道，请向{'右' if direction == 'right' else '左'}侧小幅移动。"
+            
+            poly_func = features['poly_func']
+            y_target = image_height * 0.5
+            x_target = poly_func(y_target)
+            
+            center_offset_pixels = x_target - image_width / 2
+            center_offset_ratio = abs(center_offset_pixels) / image_width
+            
+            if center_offset_ratio < self.NAV_CENTER_OFFSET_THRESHOLD_RATIO:
+                guidance_text = "已回到盲道。"
+                self.avoidance_step_index += 1
+            else:
+                guidance_text = "向右平移，对准盲道" if center_offset_pixels > 0 else "向左平移，对准盲道"
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "避障中",
+                "引导": guidance_text,
+                "步骤": "回归盲道",
+                "偏移": f"{center_offset_ratio * 100:.1f}%"
+            }, (25, image_height - 75))
+            
+            return guidance_text
+    
+    # ========== 辅助方法 ==========
+    
+    def _get_vanishing_point_features(self, mask):
+        """提取灭点特征"""
+        try:
+            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            if not contours: 
+                return None
+            main_contour = max(contours, key=cv2.contourArea)
+            if cv2.contourArea(main_contour) < 5000: 
+                return None
+            
+            rect = cv2.minAreaRect(main_contour)
+            center, _, angle = rect
+            angle_rad = np.deg2rad(angle)
+            R = np.array([[np.cos(angle_rad), -np.sin(angle_rad)], 
+                          [np.sin(angle_rad), np.cos(angle_rad)]])
+            points_transformed = np.dot(main_contour.squeeze(1) - center, R)
+            left_points = main_contour.squeeze(1)[points_transformed[:, 0] < 0]
+            right_points = main_contour.squeeze(1)[points_transformed[:, 0] >= 0]
+            
+            if len(left_points) < 20 or len(right_points) < 20: 
+                return None
+            
+            [vx_l, vy_l, x_l, y_l] = cv2.fitLine(left_points, cv2.DIST_L2, 0, 0.01, 0.01)
+            [vx_r, vy_r, x_r, y_r] = cv2.fitLine(right_points, cv2.DIST_L2, 0, 0.01, 0.01)
+            
+            a1, b1, c1 = vy_l, -vx_l, vx_l * y_l - vy_l * x_l
+            a2, b2, c2 = vy_r, -vx_r, vx_r * y_r - vy_r * x_r
+            determinant = a1 * b2 - a2 * b1
+            
+            if abs(determinant) < 1e-6: 
+                return None
+            
+            vp_x = (b1 * c2 - b2 * c1) / determinant
+            vp_y = (a2 * c1 - a1 * c2) / determinant
+            L_center = ((vx_l + vx_r) / 2, (vy_l + vy_r) / 2, (x_l + x_r) / 2, (y_l + y_r) / 2)
+            
+            total_dist = 0
+            for pt in left_points: 
+                total_dist += abs((pt[0] - x_l) * vy_l - (pt[1] - y_l) * vx_l)
+            for pt in right_points: 
+                total_dist += abs((pt[0] - x_r) * vx_r - (pt[1] - y_r) * vy_r)
+            fit_error = total_dist / (len(left_points) + len(right_points))
+            
+            return {"VP": (vp_x, vp_y), "L_center": L_center, "fit_error": fit_error}
+        except:
+            return None
+    
+    def _get_pixel_domain_features(self, mask, image_shape):
+        """提取像素域特征"""
+        try:
+            height, width = image_shape[:2]
+            
+            centerline_data = []
+            for y in range(height - 1, int(height * 0.3), -5):
+                row = mask[y, :]
+                x_pixels = np.where(row > 0)[0]
+                if x_pixels.size > 10:
+                    x_min, x_max = x_pixels[0], x_pixels[-1]
+                    path_width = x_max - x_min
+                    center_x = (x_min + x_max) / 2
+                    centerline_data.append([y, center_x, path_width])
+            
+            if len(centerline_data) < 20: 
+                return None
+            
+            data = np.array(centerline_data)
+            
+            # 应用中心线平滑
+            data = self._smooth_centerline(data)
+            
+            # 检测急转弯
+            sharp_turn_index = self._find_sharp_turn(data)
+            if sharp_turn_index is not None:
+                cutoff_index = int(sharp_turn_index * 0.6)
+                if cutoff_index >= 10:
+                    data = data[:cutoff_index]
+            
+            y_coords, x_coords, widths = data[:, 0], data[:, 1], data[:, 2]
+            weights = widths
+            
+            # 原始多项式拟合
+            coeffs_raw = np.polyfit(y_coords, x_coords, 2, w=weights)
+            
+            # 【新增】对多项式系数进行时间平滑
+            self.poly_coeffs_history.append(coeffs_raw.copy())
+            if len(self.poly_coeffs_history) > self.poly_coeffs_history_max:
+                self.poly_coeffs_history.pop(0)
+            
+            # 使用指数加权移动平均平滑系数
+            if len(self.poly_coeffs_history) >= 3:
+                # 权重：最近的帧权重更高
+                weights_time = np.array([0.7 ** (len(self.poly_coeffs_history) - i - 1) 
+                                        for i in range(len(self.poly_coeffs_history))])
+                weights_time = weights_time / np.sum(weights_time)
+                
+                # 加权平均系数
+                coeffs = np.zeros_like(coeffs_raw)
+                for i, hist_coeffs in enumerate(self.poly_coeffs_history):
+                    coeffs += hist_coeffs * weights_time[i]
+            else:
+                coeffs = coeffs_raw
+            
+            poly_func = np.poly1d(coeffs)
+            
+            curvature_proxy = abs(coeffs[0])
+            tangent_slope = 2 * coeffs[0] * height + coeffs[1]
+            tangent_angle_rad = np.arctan(tangent_slope)
+            
+            return {
+                "poly_func": poly_func,
+                "curvature_proxy": curvature_proxy,
+                "tangent_angle_rad": tangent_angle_rad,
+                "centerline_data": np.array(centerline_data)
+            }
+        except Exception as e:
+            logger.warning(f"Pixel domain feature calculation failed: {e}")
+            return None
+    
+    def _find_sharp_turn(self, data):
+        """查找急转弯点"""
+        window_size = 5
+        angle_threshold = 30
+        
+        for i in range(len(data) - 2 * window_size):
+            front_window = data[i:i + window_size]
+            back_window = data[i + window_size:i + 2 * window_size]
+            
+            front_dir = [front_window[-1, 1] - front_window[0, 1],
+                        front_window[-1, 0] - front_window[0, 0]]
+            back_dir = [back_window[-1, 1] - back_window[0, 1],
+                       back_window[-1, 0] - back_window[0, 0]]
+            
+            angle1 = np.arctan2(front_dir[1], front_dir[0])
+            angle2 = np.arctan2(back_dir[1], back_dir[0])
+            angle_diff = abs(np.degrees(angle2 - angle1))
+            
+            if angle_diff > 180:
+                angle_diff = 360 - angle_diff
+            
+            if angle_diff > angle_threshold:
+                return i + window_size
+        
+        return None
+    
+    def _detect_sharp_corner(self, centerline_data, angle_threshold_deg=45):
+        """检测急转弯"""
+        try:
+            if len(centerline_data) < 15: 
+                return None
+            points_in_range = np.array(centerline_data)
+            num_points = len(points_in_range)
+            
+            window_size = max(5, int(num_points * 0.15))
+            best_turn_info = None
+            max_angle_diff = 0
+            
+            for i in range(0, num_points - 2 * window_size, 2):
+                front_segment = points_in_range[i:i + window_size]
+                back_segment = points_in_range[i + window_size:i + 2 * window_size]
+                
+                if len(front_segment) < 3 or len(back_segment) < 3:
+                    continue
+                
+                front_y = front_segment[:, 0]
+                front_x = front_segment[:, 1]
+                front_coeffs = np.polyfit(front_y, front_x, 1)
+                front_slope = front_coeffs[0]
+                
+                back_y = back_segment[:, 0]
+                back_x = back_segment[:, 1]
+                back_coeffs = np.polyfit(back_y, back_x, 1)
+                back_slope = back_coeffs[0]
+                
+                front_angle = np.arctan(front_slope)
+                back_angle = np.arctan(back_slope)
+                
+                angle_diff_rad = back_angle - front_angle
+                angle_diff_deg = abs(np.degrees(angle_diff_rad))
+                
+                if angle_diff_deg > max_angle_diff and angle_diff_deg > angle_threshold_deg:
+                    max_angle_diff = angle_diff_deg
+                    corner_point_idx = i + window_size
+                    corner_point = points_in_range[corner_point_idx]
+                    
+                    direction = "right" if angle_diff_rad > 0 else "left"
+                    
+                    post_turn_segment = points_in_range[
+                        corner_point_idx:min(corner_point_idx + window_size * 2, num_points)]
+                    if len(post_turn_segment) > 0:
+                        post_turn_center_x = np.mean(post_turn_segment[:, 1])
+                    else:
+                        post_turn_center_x = corner_point[1]
+                    
+                    best_turn_info = {
+                        "corner_point_pixel": (corner_point[1], corner_point[0]),
+                        "turn_angle": max_angle_diff,
+                        "direction": direction,
+                        "post_turn_center_x": post_turn_center_x,
+                        "corner_point_idx": corner_point_idx
+                    }
+            
+            return best_turn_info
+        
+        except Exception as e:
+            logger.warning(f"Corner detection error: {e}")
+            return None
+    
+    def _update_turn_tracker(self, corner_info):
+        """更新转弯追踪器"""
+        detected_direction = corner_info['direction']
+        
+        if detected_direction == self.turn_detection_tracker['direction']:
+            self.turn_detection_tracker['consecutive_hits'] += 1
+        else:
+            self.turn_detection_tracker['direction'] = detected_direction
+            self.turn_detection_tracker['consecutive_hits'] = 1
+        
+        self.turn_detection_tracker['last_seen_frame'] = self.frame_counter
+        self.turn_detection_tracker['corner_info'] = corner_info
+    
+    def _reset_turn_tracker(self):
+        """重置转弯追踪器"""
+        self.turn_detection_tracker = {
+            'direction': None,
+            'consecutive_hits': 0,
+            'last_seen_frame': 0,
+            'corner_info': None
+        }
+    
+    def _calculate_line_x_at_y(self, line_params, y_target):
+        """计算直线在特定y坐标的x值"""
+        vx, vy, x0, y0 = line_params
+        if abs(vy) < 1e-6:
+            return None
+        t = (y_target - y0) / vy
+        x = x0 + t * vx
+        return x
+    
+    def _get_width_at_y(self, centerline_data, y_target):
+        """获取特定y坐标的路径宽度"""
+        ys = centerline_data[:, 0]
+        ws = centerline_data[:, 2]
+        idx = np.abs(ys - y_target).argmin()
+        return ws[idx]
+    
+    def _detect_obstacles(self, image, path_mask=None):
+        """检测障碍物 - Day 20 性能优化版本"""
+        # Day 20: 移除过量日志，只在 DEBUG 模式下输出详细信息
+        
+        if self.obstacle_detector is None:
+            return []
+        
+        # 【新增】打印白名单类别（只在第一次调用时打印）
+        if not hasattr(self, '_classes_printed'):
+            self._classes_printed = True
+            if hasattr(self.obstacle_detector, 'WHITELIST_CLASSES'):
+                logger.info(f"[障碍物检测] 白名单类别数: {len(self.obstacle_detector.WHITELIST_CLASSES)}")
+        
+        try:
+            detected_obstacles = self.obstacle_detector.detect(image, path_mask=path_mask)
+            
+            # 补充一些可能缺失但后续代码需要的字段
+            H, W = image.shape[:2]
+            for obj in detected_obstacles:
+                if 'mask' in obj and obj['mask'] is not None:
+                    y_coords, x_coords = np.where(obj['mask'] > 0)
+                    if len(y_coords) > 0 and len(x_coords) > 0:
+                        x1, y1 = int(np.min(x_coords)), int(np.min(y_coords))
+                        x2, y2 = int(np.max(x_coords)), int(np.max(y_coords))
+                        obj['box_coords'] = (x1, y1, x2, y2)
+                        
+                        if 'y_position_ratio' not in obj:
+                            obj['y_position_ratio'] = obj.get('center_y', 0) / H
+                        if 'label' not in obj:
+                            obj['label'] = obj.get('name', 'unknown')
+                        if 'center' not in obj:
+                            obj['center'] = (obj.get('center_x', 0), obj.get('center_y', 0))
+                        if 'confidence' not in obj:
+                            obj['confidence'] = 0.5
+            
+            # Day 20: 只输出一行摘要日志
+            if detected_obstacles and self.frame_counter % 30 == 0:
+                names = [o.get('name', '?') for o in detected_obstacles[:3]]
+                logger.info(f"[障碍物] 检测到 {len(detected_obstacles)} 个: {names}")
+            
+            return detected_obstacles
+            
+        except Exception as e:
+            logger.error(f"[障碍物检测] 失败: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    
+    def _check_and_set_obstacle_voice(self, obstacles):
+        """检查障碍物并设置待播报的语音"""
+        if not obstacles:
+            self.last_obstacle_speech = ""
+            self.pending_obstacle_voice = None
+            return
+        
+        # 筛选近距离障碍物（提高阈值，只有非常近才报警）
+        NEAR_DISTANCE_Y_THRESHOLD = 0.75  # 提高到0.75，障碍物底部必须在画面下方75%以下
+        NEAR_DISTANCE_AREA_THRESHOLD = 0.12  # 提高到0.12，障碍物必须占画面12%以上
+        
+        near_obstacles = []
+        for obs in obstacles:
+            if (obs.get('bottom_y_ratio', 0) > NEAR_DISTANCE_Y_THRESHOLD or
+                obs.get('area_ratio', 0) > NEAR_DISTANCE_AREA_THRESHOLD):
+                near_obstacles.append(obs)
+        
+        if near_obstacles:
+            # 获取最主要的障碍物（面积最大）
+            main_obstacle = max(near_obstacles, key=lambda x: x.get('area_ratio', 0))
+            obstacle_name = main_obstacle.get('name', '')
+            current_time = time.time()
+            
+            # 检查是否需要播报
+            should_announce = False
+            if obstacle_name != self.last_obstacle_speech:
+                # 不同障碍物，立即播报
+                should_announce = True
+                self.last_obstacle_speech = obstacle_name
+                self.last_obstacle_speech_time = current_time
+            elif current_time - self.last_obstacle_speech_time > self.obstacle_speech_cooldown:
+                # 同一障碍物但超过冷却时间，再次播报
+                should_announce = True
+                self.last_obstacle_speech_time = current_time
+            
+            if should_announce:
+                self.pending_obstacle_voice = self._speech_for_obstacle(obstacle_name)
+        else:
+            # 没有近距离障碍物
+            self.last_obstacle_speech = ""
+            self.pending_obstacle_voice = None
+
+    def _check_obstacles(self, image, mask, frame_visualizations):
+        """检查并处理障碍物"""
+        # 使用缓存策略
+        if self.frame_counter % self.OBSTACLE_DETECTION_INTERVAL == 0:
+            final_obstacles = self._detect_obstacles(image, mask)
+            # 【新增】稳定化障碍物，避免重复叠加
+            if hasattr(self, 'prev_gray') and self.prev_gray is not None:
+                curr_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+                final_obstacles = self._stabilize_obstacle_list(
+                    final_obstacles, 
+                    self.last_detected_obstacles,
+                    self.prev_gray,
+                    curr_gray,
+                    image.shape[:2]
+                )
+            self.last_detected_obstacles = final_obstacles
+            self.last_obstacle_detection_frame = self.frame_counter
+        else:
+            if self.frame_counter - self.last_obstacle_detection_frame < self.OBSTACLE_CACHE_DURATION_FRAMES:
+                final_obstacles = self.last_detected_obstacles
+            else:
+                final_obstacles = []
+        
+        # 添加可视化
+        for obs in final_obstacles:
+            self._add_obstacle_visualization(obs, frame_visualizations)
+        
+        # 筛选近距离障碍物（提高阈值，只有非常近才报警）
+        NEAR_DISTANCE_Y_THRESHOLD = 0.75  # 提高到0.75，障碍物底部必须在画面下方75%以下
+        NEAR_DISTANCE_AREA_THRESHOLD = 0.12  # 提高到0.12，障碍物必须占画面12%以上
+        
+        near_obstacles = [
+            obs for obs in final_obstacles
+            if (obs.get('bottom_y_ratio', 0) > NEAR_DISTANCE_Y_THRESHOLD or
+                obs.get('area_ratio', 0) > NEAR_DISTANCE_AREA_THRESHOLD)
+        ]
+        
+        return near_obstacles
+    
+    def _plan_avoidance(self, obstacle_info, image_width):
+        """规划避障路径"""
+        obstacle_center_x = obstacle_info['center_x']
+        image_center_x = image_width / 2
+        
+        if obstacle_center_x < image_center_x:
+            turn_direction = 'right'
+        else:
+            turn_direction = 'left'
+        
+        plan = [
+            {'type': 'sidestep_clear', 'direction': turn_direction},
+            {'type': 'forward_pass'},
+            {'type': 'sidestep_return', 'direction': 'left' if turn_direction == 'right' else 'right'}
+        ]
+        return plan
+    
+    def _generate_navigation_guidance(self, features, image_height, image_width, frame_visualizations):
+        """生成导航指引"""
+        poly_func = features['poly_func']
+        is_curve = features['curvature_proxy'] > self.CURVATURE_PROXY_THRESHOLD
+        lookahead_ratio = 0.6 if is_curve else 0.4
+        y_target = image_height * lookahead_ratio
+        x_target = poly_func(y_target)
+        
+        # 添加中心线可视化
+        plot_y = np.arange(int(image_height * 0.3), image_height, 5).astype(int)
+        plot_x = poly_func(plot_y).astype(int)
+        centerline_points = np.vstack((plot_x, plot_y)).T.tolist()
+        frame_visualizations.append({
+            "type": "polyline",
+            "points": centerline_points,
+            "color": "yellow",
+            "width": 2
+        })
+        
+        # 添加目标点
+        frame_visualizations.append({
+            "type": "circle",
+            "center": [int(x_target), int(y_target)],
+            "radius": 10,
+            "color": "red"
+        })
+        
+        # 计算导航指令（优先级：转向/平移 > 直行）
+        center_offset_pixels = x_target - image_width / 2
+        center_offset_ratio = abs(center_offset_pixels) / image_width
+        orientation_error_rad = features['tangent_angle_rad']
+        
+        # 先检查是否需要转向（左转/右转）
+        if orientation_error_rad > self.NAV_ORIENTATION_THRESHOLD_RAD:
+            guidance_text = "左转"
+        elif orientation_error_rad < -self.NAV_ORIENTATION_THRESHOLD_RAD:
+            guidance_text = "右转"
+        # 再检查是否需要平移（左移/右移）
+        elif center_offset_ratio > self.NAV_CENTER_OFFSET_THRESHOLD_RATIO:
+            guidance_text = "右移" if center_offset_pixels > 0 else "左移"
+        # 最后才是直行
+        else:
+            guidance_text = "保持直行"
+        
+        # 添加数据面板
+        self._add_data_panel(frame_visualizations, {
+            "状态": "常规导航",
+            "引导": guidance_text,
+            "朝向": f"{np.degrees(orientation_error_rad):.1f}°",
+            "偏移": f"{center_offset_ratio * 100:.1f}%"
+        }, (25, image_height - 75))
+        
+        return guidance_text
+    
+    def _handle_pixel_domain_onboarding(self, pixel_features, image_height, image_width, frame_visualizations):
+        """处理像素域的上盲道引导"""
+        image_center_x = image_width / 2
+        orientation_error_rad = pixel_features['tangent_angle_rad']
+        poly_func = pixel_features['poly_func']
+        
+        y_bottom = image_height - 1
+        x_target_bottom = poly_func(y_bottom)
+        center_offset_pixels = x_target_bottom - image_center_x
+        center_offset_ratio = abs(center_offset_pixels) / image_width
+        
+        if self.onboarding_step == ONBOARDING_STEP_ROTATION:
+            if abs(orientation_error_rad) < self.ONBOARDING_ORIENTATION_THRESHOLD_RAD:
+                guidance_text = "方向已对正！现在校准位置。"
+                self.onboarding_step = ONBOARDING_STEP_TRANSLATION
+            else:
+                guidance_text = "请向左转动。" if orientation_error_rad > 0.1 else "请向右转动。"
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "上盲道 (方向)",
+                "引导": guidance_text,
+                "角度": f"{np.degrees(orientation_error_rad):.1f}°",
+                "偏移": "待校准"
+            }, (25, image_height - 75))
+            self._add_navigation_info_visualization(pixel_features, image_height, image_width, frame_visualizations)
+    
+            return guidance_text
+            
+        elif self.onboarding_step == ONBOARDING_STEP_TRANSLATION:
+            if center_offset_ratio < self.ONBOARDING_CENTER_OFFSET_THRESHOLD_RATIO:
+                guidance_text = "校准完成！您已在盲道上，开始前行。"
+                self.current_state = STATE_NAVIGATING
+            else:
+                guidance_text = "请向右平移。" if center_offset_pixels > 0 else "请向左平移。"
+            
+            self._add_data_panel(frame_visualizations, {
+                "状态": "上盲道 (位置)",
+                "引导": guidance_text,
+                "角度": "已对准",
+                "偏移": f"{center_offset_ratio * 100:.1f}%"
+            }, (25, image_height - 75))
+        
+        return guidance_text
+    
+    def _add_obstacle_visualization(self, obstacle, visualizations, pulse_effect=False):
+        """添加障碍物可视化（简化版：仅边框，近红远黄）"""
+        try:
+            # 计算障碍物危险等级
+            bottom_y_ratio = obstacle.get('bottom_y_ratio', 0)
+            area_ratio = obstacle.get('area_ratio', 0)
+            
+            # 判断是否为近距离障碍物
+            is_near = bottom_y_ratio > 0.7 or area_ratio > 0.1  # 近距离障碍物
+            
+            # 添加 mask 边框可视化（如果有）
+            if 'mask' in obstacle and obstacle['mask'] is not None:
+                mask = obstacle['mask']
+                contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                
+                if contours:
+                    # 找到最大的轮廓
+                    max_contour = max(contours, key=cv2.contourArea)
+                    points = max_contour.squeeze(1)[::5].tolist()
+                    
+                    # 根据距离选择边框颜色：近距离红色，远距离黄色
+                    if is_near:
+                        outline_color = "rgba(255, 0, 0, 1.0)"  # 红色
+                        thickness = 3
+                    else:
+                        outline_color = "rgba(255, 255, 0, 0.8)"  # 黄色
+                        thickness = 2
+                    
+                    # 只添加边框，不添加填充和文字
+                    visualizations.append({
+                        "type": "outline",
+                        "points": points,
+                        "color": outline_color,
+                        "thickness": thickness
+                    })
+        except Exception as e:
+            logger.error(f"[_add_obstacle_visualization] 添加障碍物可视化失败: {e}")
+
+    def _add_navigation_info_visualization(self, features, image_height, image_width, frame_visualizations):
+        """添加导航计算信息的可视化"""
+        if not features:
+            return
+        
+        try:
+            # 获取计算结果
+            poly_func = features.get('poly_func')
+            curvature_proxy = features.get('curvature_proxy', 0)
+            tangent_angle_rad = features.get('tangent_angle_rad', 0)
+            tangent_angle_deg = np.degrees(tangent_angle_rad)
+            
+            # 绘制切线方向
+            if poly_func:
+                # 在画面底部计算切线
+                y_bottom = image_height - 50
+                x_bottom = poly_func(y_bottom)
+                
+                # 计算切线的终点
+                tangent_length = 100
+                dx = tangent_length * np.cos(tangent_angle_rad)
+                dy = tangent_length * np.sin(tangent_angle_rad)
+                
+                # 【新增】绘制基准虚线（垂直向上）
+                baseline_length = 80
+                frame_visualizations.append({
+                    "type": "dashed_line",
+                    "start": [int(x_bottom), int(y_bottom)],
+                    "end": [int(x_bottom), int(y_bottom - baseline_length)],
+                    "color": "rgba(255, 255, 255, 0.6)",  # 白色虚线
+                    "thickness": 2
+                })
+                
+                # 添加切线可视化
+                frame_visualizations.append({
+                    "type": "arrow",
+                    "start": [int(x_bottom), int(y_bottom)],
+                    "end": [int(x_bottom + dx), int(y_bottom - dy)],  # 注意Y轴方向
+                    "color": "rgba(0, 255, 255, 0.8)",  # 青色
+                    "thickness": 3,
+                    "tip_length": 0.3
+                })
+                
+                # 【新增】绘制夹角弧线标识
+                arc_radius = 40
+                # 基准线角度是-90度（向上），切线角度是tangent_angle_deg
+                # OpenCV中角度是从右侧水平线逆时针测量
+                start_angle = -90  # 基准线（垂直向上）
+                end_angle = -90 + tangent_angle_deg  # 切线角度
+                frame_visualizations.append({
+                    "type": "angle_arc",
+                    "center": [int(x_bottom), int(y_bottom)],
+                    "radius": arc_radius,
+                    "start_angle": start_angle,
+                    "end_angle": end_angle,
+                    "color": "rgba(255, 200, 0, 0.8)",  # 橙黄色
+                    "thickness": 2
+                })
+                
+                # 添加角度文字（文字大小减半）
+                frame_visualizations.append({
+                    "type": "text_with_bg",
+                    "text": f"角度: {tangent_angle_deg:.1f}°",
+                    "position": [int(x_bottom + 10), int(y_bottom - 30)],
+                    "font_scale": 0.3,  # 从0.6减半到0.3
+                    "color": "rgba(255, 255, 255, 1.0)",
+                    "bg_color": "rgba(0, 0, 0, 0.7)"
+                })
+            
+            # 添加曲率信息（文字大小减半）
+            if curvature_proxy > 0.00001:
+                curve_text = "弯道" if curvature_proxy > 0.00005 else "缓弯"
+                frame_visualizations.append({
+                    "type": "text_with_bg",
+                    "text": f"{curve_text}: {curvature_proxy:.2e}",
+                    "position": [20, 100],
+                    "font_scale": 0.25,  # 从0.5减半到0.25
+                    "color": "rgba(255, 255, 0, 1.0)",
+                    "bg_color": "rgba(0, 0, 0, 0.7)"
+                })
+                
+            # 显示中心线数据点
+            if 'centerline_data' in features:
+                centerline_data = features['centerline_data']
+                # 在画面中部显示路径宽度
+                mid_idx = len(centerline_data) // 2
+                if mid_idx < len(centerline_data):
+                    y, x, width = centerline_data[mid_idx]
+                    # 绘制宽度指示线（改为双向箭头）
+                    frame_visualizations.append({
+                        "type": "double_arrow",  # 新增双向箭头类型
+                        "start": [int(x - width/2), int(y)],
+                        "end": [int(x + width/2), int(y)],
+                        "color": "rgba(0, 255, 0, 0.8)",
+                        "thickness": 2,
+                        "tip_length": 0.15
+                    })
+                    # 添加宽度文字（文字大小减半）
+                    frame_visualizations.append({
+                        "type": "text_with_bg",
+                        "text": f"宽度: {width:.0f}px",
+                        "position": [int(x - 30), int(y - 10)],
+                        "font_scale": 0.25,  # 从0.5减半到0.25
+                        "color": "rgba(255, 255, 255, 1.0)",
+                        "bg_color": "rgba(0, 0, 0, 0.7)"
+                    })
+        except Exception as e:
+            logger.error(f"添加导航信息可视化失败: {e}")
+
+    def _add_data_panel(self, visualizations, data, position):
+        """添加数据面板"""
+        visualizations.append({
+            "type": "data_panel",
+            "data": data,
+            "position": position
+        })
+    
+    def _add_crosswalk_info_visualization(self, viz_data, image_height, image_width, visualizations):
+        """添加斑马线检测信息的精美可视化"""
+        try:
+            # 1. 绘制斑马线中心点标识（大十字）
+            center_x = int(viz_data['center_x_ratio'] * image_width)
+            center_y = int(viz_data['center_y_ratio'] * image_height)
+            
+            cross_size = 20 if viz_data['in_arrival'] else 15  # 减小尺寸
+            cross_color = "rgba(255, 100, 0, 1.0)" if viz_data['in_arrival'] else "rgba(0, 200, 255, 0.8)"
+            
+            # 水平线
+            visualizations.append({
+                "type": "line",
+                "start": [center_x - cross_size, center_y],
+                "end": [center_x + cross_size, center_y],
+                "color": cross_color,
+                "thickness": 2  # 减细
+            })
+            # 垂直线
+            visualizations.append({
+                "type": "line",
+                "start": [center_x, center_y - cross_size],
+                "end": [center_x, center_y + cross_size],
+                "color": cross_color,
+                "thickness": 2  # 减细
+            })
+            
+            # 2. 绘制指向斑马线的箭头（从画面中心指向斑马线中心）
+            screen_center_x = image_width // 2
+            screen_center_y = image_height // 2
+            
+            # 只在斑马线不在画面中心时绘制箭头
+            distance = np.sqrt((center_x - screen_center_x)**2 + (center_y - screen_center_y)**2)
+            if distance > 80:  # 提高到80像素才画箭头（减少干扰）
+                visualizations.append({
+                    "type": "arrow",
+                    "start": [screen_center_x, screen_center_y],
+                    "end": [center_x, center_y],
+                    "color": "rgba(255, 150, 0, 0.6)",  # 降低透明度
+                    "thickness": 2,  # 减细
+                    "tip_length": 0.15  # 减小箭头
+                })
+            
+            # 3. 添加信息面板（右上角）
+            panel_x = image_width - 180
+            panel_y = 20
+            
+            # 准备面板数据
+            panel_data = {
+                "斑马线": viz_data['stage'],
+                "面积": f"{viz_data['area_ratio']*100:.1f}%",
+                "方位": viz_data['position'],
+            }
+            
+            if viz_data['has_occlusion']:
+                panel_data["状态"] = "被遮挡"
+            elif viz_data['in_arrival']:
+                panel_data["状态"] = "可过马路"
+            
+            visualizations.append({
+                "type": "data_panel",
+                "data": panel_data,
+                "position": (panel_x, panel_y)
+            })
+            
+            # 4. 添加面积进度条（视觉化面积大小）
+            bar_width = 150
+            bar_height = 20
+            bar_x = image_width - bar_width - 20
+            bar_y = panel_y + 90
+            
+            # 背景框
+            visualizations.append({
+                "type": "rectangle",
+                "top_left": (bar_x, bar_y),
+                "bottom_right": (bar_x + bar_width, bar_y + bar_height),
+                "color": "rgba(50, 50, 50, 0.7)",
+                "filled": True
+            })
+            
+            # 进度填充（0-100%，但最多显示到arrival阈值0.25对应100%）
+            progress = min(viz_data['area_ratio'] / 0.25, 1.0)
+            fill_width = int(bar_width * progress)
+            
+            # 根据阶段选择颜色
+            if viz_data['in_arrival']:
+                fill_color = "rgba(0, 255, 100, 0.8)"  # 绿色（可过马路）
+            elif viz_data['area_ratio'] >= 0.18:
+                fill_color = "rgba(255, 200, 0, 0.8)"  # 黄色（接近）
+            elif viz_data['area_ratio'] >= 0.08:
+                fill_color = "rgba(0, 200, 255, 0.8)"  # 青色（靠近）
+            else:
+                fill_color = "rgba(100, 150, 255, 0.8)"  # 蓝色（发现）
+            
+            visualizations.append({
+                "type": "rectangle",
+                "top_left": (bar_x + 2, bar_y + 2),
+                "bottom_right": (bar_x + fill_width - 2, bar_y + bar_height - 2),
+                "color": fill_color,
+                "filled": True
+            })
+            
+            # 进度条标签（使用中文文本，字体减小）
+            visualizations.append({
+                "type": "text_with_bg",
+                "text": f"接近度: {int(progress * 100)}%",
+                "position": [bar_x, bar_y - 18],
+                "font_scale": 0.25,  # 减小字体
+                "color": "rgba(255, 255, 255, 1.0)",
+                "bg_color": "rgba(0, 0, 0, 0.7)"
+            })
+            
+        except Exception as e:
+            logger.error(f"添加斑马线可视化失败: {e}")
+    
+    def _add_traffic_light_visualization(self, color, visualizations, image_height, image_width):
+        """添加红绿灯状态可视化"""
+        # 在右上角绘制红绿灯指示器
+        x = image_width - 100
+        y = 50
+        
+        # 背景框
+        visualizations.append({
+            "type": "rectangle",
+            "top_left": (x - 40, y - 40),
+            "bottom_right": (x + 40, y + 100),
+            "color": "rgba(0, 0, 0, 0.5)",
+            "filled": True
+        })
+        
+        # 三个圆形灯
+        colors = {
+            "red": [(255, 0, 0), (50, 0, 0), (50, 0, 0)],
+            "yellow": [(50, 50, 0), (255, 255, 0), (50, 50, 0)],
+            "green": [(0, 50, 0), (0, 50, 0), (0, 255, 0)],
+            "unknown": [(50, 50, 50), (50, 50, 50), (50, 50, 50)]
+        }
+        
+        light_colors = colors.get(color, colors["unknown"])
+        positions = [y - 20, y + 20, y + 60]
+        
+        for i, (pos_y, light_color) in enumerate(zip(positions, light_colors)):
+            # 外圈
+            visualizations.append({
+                "type": "circle",
+                "center": [x, pos_y],
+                "radius": 18,
+                "color": f"rgba(100, 100, 100, 1.0)",
+                "thickness": 2
+            })
+            # 内圈（灯的颜色）
+            visualizations.append({
+                "type": "circle",
+                "center": [x, pos_y],
+                "radius": 15,
+                "color": f"rgba({light_color[0]}, {light_color[1]}, {light_color[2]}, 1.0)",
+                "filled": True
+            })
+        
+        # 标签
+        visualizations.append({
+            "type": "text_with_bg",
+            "text": f"信号灯: {color}",
+            "position": [x - 35, y + 90],
+            "font_scale": 0.5,
+            "color": "rgba(255, 255, 255, 1.0)",
+            "bg_color": "rgba(0, 0, 0, 0.7)"
+        })
+    
+    def _to_cn_obstacle(self, name: str) -> str:
+        """转换障碍物名称为中文"""
+        try:
+            key = (name or '').strip().lower()
+            return _OBSTACLE_NAME_CN.get(key, '障碍物')
+        except:
+            return '障碍物'
+
+    def _speech_for_obstacle(self, name: str) -> str:
+        k = (name or '').strip().lower()
+        if k == 'person': return "前方有人，注意避让。"
+        if k == 'car': return "前方有车，注意避让。"
+        if k == 'bicycle': return "前方有自行车，停一下。"
+        if k == 'motorcycle': return "前方有摩托车，停一下。"
+        if k == 'bus': return "前方有公交车，停一下。"
+        if k == 'truck': return "前方有卡车，停一下。"
+        if k == 'scooter': return "前方有电瓶车，停一下。"
+        if k == 'stroller': return "前方有婴儿车，停一下。"
+        if k == 'dog': return "前方有狗，停一下。"
+        if k == 'animal': return "前方有动物，停一下。"
+        return "前方有障碍物，注意避让。"
+
+    def _draw_command_button(self, image, text):
+        """绘制底部中央的指令按钮（与斑马线模式统一）"""
+        try:
+            H, W = image.shape[:2]
+            full_text = f"当前指令：{text if text else '—'}"
+            
+            # 按钮参数
+            font_px = 14
+            pad_x, pad_y = 14, 8
+            bottom_margin = 28
+            
+            # 计算文字尺寸
+            if PIL_AVAILABLE:
+                try:
+                    from PIL import Image as PILImage, ImageDraw, ImageFont
+                    # 尝试加载中文字体
+                    font = None
+                    for font_path in [
+                        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+                    ]:
+                        if os.path.exists(font_path):
+                            try:
+                                font = ImageFont.truetype(font_path, font_px)
+                                break
+                            except:
+                                continue
+                    if font:
+                        bbox = ImageDraw.Draw(PILImage.new('RGB', (1, 1))).textbbox((0, 0), full_text, font=font)
+                        tw = max(1, bbox[2] - bbox[0])
+                        th = max(1, bbox[3] - bbox[1])
+                    else:
+                        scale = font_px / 24.0
+                        (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+                except:
+                    scale = font_px / 24.0
+                    (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+            else:
+                scale = font_px / 24.0
+                (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+            
+            # 计算按钮位置（底部居中）
+            bw = tw + pad_x * 2
+            bh = th + pad_y * 2
+            radius = max(10, bh // 2)
+            
+            cx = W // 2
+            left = max(8, cx - bw // 2)
+            top = H - bottom_margin - bh
+            right = min(W - 8, left + bw)
+            bottom = top + bh
+            
+            # 绘制半透明圆角背景
+            overlay = image.copy()
+            bg_color = (26, 32, 41)  # 深色背景
+            border_color = (60, 76, 102)  # 边框
+            
+            # 圆角矩形（中间+两个圆）
+            cv2.rectangle(overlay, (left + radius, top), (right - radius, bottom), bg_color, -1)
+            cv2.circle(overlay, (left + radius, (top + bottom) // 2), radius, bg_color, -1)
+            cv2.circle(overlay, (right - radius, (top + bottom) // 2), radius, bg_color, -1)
+            
+            # 混合半透明
+            cv2.addWeighted(overlay, 0.75, image, 0.25, 0, image)
+            
+            # 绘制边框
+            cv2.rectangle(image, (left + radius, top), (right - radius, bottom), border_color, 1)
+            cv2.circle(image, (left + radius, (top + bottom) // 2), radius, border_color, 1)
+            cv2.circle(image, (right - radius, (top + bottom) // 2), radius, border_color, 1)
+            
+            # 绘制文字
+            text_x = left + pad_x
+            text_y = top + pad_y + th
+            
+            if PIL_AVAILABLE and 'font' in locals() and font:
+                # 使用PIL绘制中文
+                pil_img = PILImage.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+                draw = ImageDraw.Draw(pil_img)
+                draw.text((text_x, top + pad_y), full_text, font=font, fill=(255, 255, 255))
+                image = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+            else:
+                # 使用OpenCV绘制
+                cv2.putText(image, full_text, (text_x, text_y), 
+                           cv2.FONT_HERSHEY_SIMPLEX, scale, (255, 255, 255), 1)
+            
+            return image
+        except Exception as e:
+            logger.error(f"绘制指令按钮失败: {e}")
+            return image
+    
+    def _parse_color(self, color_str):
+        """解析颜色字符串，返回BGR格式"""
+        try:
+            if color_str.startswith('rgba('):
+                values = color_str[5:-1].split(',')
+                r, g, b = int(values[0]), int(values[1]), int(values[2])
+                return (b, g, r)  # OpenCV 使用 BGR 格式
+            elif color_str == 'yellow':
+                return (0, 255, 255)
+            elif color_str == 'red':
+                return (0, 0, 255)
+            else:
+                return (0, 0, 255)  # 默认红色
+        except:
+            return (0, 0, 255)
+
+    def _draw_data_panel_no_bg(self, image, data, position=(15, 15)):
+        """绘制数据面板（无黑底版本）"""
+        if not PIL_AVAILABLE:
+            return image
+        
+        try:
+            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            draw = ImageDraw.Draw(pil_img, "RGBA")
+            
+            env_scale = float(os.getenv("AIGLASS_PANEL_SCALE", "0.7"))
+            base_font_size = max(10, int(round(14 * env_scale)))
+            
+            # 尝试多种字体，确保中文显示
+            font = None
+            font_paths = [
+                "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+            ]
+            
+            for font_path in font_paths:
+                try:
+                    if os.path.exists(font_path):
+                        font = ImageFont.truetype(font_path, base_font_size)
+                        break
+                except:
+                    continue
+            
+            if font is None:
+                font = ImageFont.load_default()
+            
+            # 绘制文本，使用描边效果
+            y_offset = position[1]
+            for key, value in data.items():
+                text = f"{key}: {value}"
+                
+                # 绘制黑色描边（8个方向）
+                for dx in [-1, 0, 1]:
+                    for dy in [-1, 0, 1]:
+                        if dx != 0 or dy != 0:
+                            draw.text((position[0] + dx, y_offset + dy), text, 
+                                    font=font, fill=(0, 0, 0, 255))
+                
+                # 绘制白色文字
+                draw.text((position[0], y_offset), text, font=font, fill=(255, 255, 255, 255))
+                y_offset += base_font_size + 5
+            
+            return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+        
+        except Exception as e:
+            logger.warning(f"绘制数据面板失败: {e}")
+            return image
+
+
+    def _draw_visualizations(self, image, viz_elements):
+        """增强的可视化绘制方法"""
+        if not viz_elements:
+            return image
+        
+        # 获取当前时间用于动画效果
+        current_time = time.time()
+        
+        # 分离不同类型的元素
+        panel_elements = [v for v in viz_elements if v.get("type") == "data_panel"]
+        standard_elements = [v for v in viz_elements if v.get("type") != "data_panel"]
+        
+        # 第一遍：绘制填充（Day 20 优化：用轮廓替代半透明填充，大幅提升性能）
+        for element in standard_elements:
+            elem_type = element.get("type")
+            
+            if elem_type in ['blind_path_mask', 'obstacle_mask', 'crosswalk_mask']:
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 255, 0.5)"))
+                    
+                    # Day 20 性能优化：只绘制轮廓，不做半透明填充
+                    # 原因：逐像素混合 (~200-300ms) 改为轮廓绘制 (~5-10ms)
+                    
+                    # 根据类型选择轮廓样式
+                    if elem_type == 'blind_path_mask':
+                        thickness = 3  # 盲道用粗线
+                    elif elem_type == 'obstacle_mask':
+                        thickness = 2  # 障碍物用中等线
+                    else:
+                        thickness = 2  # 斑马线用中等线
+                    
+                    # 直接用 points 绘制轮廓（快速）
+                    cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
+        
+        # 第二遍：绘制轮廓和其他元素
+        for element in standard_elements:
+            elem_type = element.get("type")
+            
+            # 【新增】绘制直线
+            if elem_type == 'line':
+                start = tuple(element.get("start", (0, 0)))
+                end = tuple(element.get("end", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(255, 255, 255, 1.0)"))
+                thickness = element.get("thickness", 2)
+                cv2.line(image, start, end, color, thickness)
+            
+            # 绘制轮廓/描边
+            elif elem_type == 'outline':
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 255, 1.0)"))
+                    thickness = element.get("thickness", 3)
+                    cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
+            
+            # 绘制折线
+            elif elem_type == 'polyline':
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 0, 1.0)"))
+                    thickness = element.get("width", 2)
+                    cv2.polylines(image, [points], isClosed=False, color=color, thickness=thickness)
+            
+            # 绘制圆形
+            elif elem_type == 'circle':
+                center = tuple(element.get("center", (0, 0)))
+                radius = element.get("radius", 10)
+                color = self._parse_color(element.get("color", "rgba(255, 0, 0, 1.0)"))
+                thickness = element.get("thickness", -1 if element.get("filled", True) else 2)
+                cv2.circle(image, center, radius, color, thickness)
+            
+            # 绘制矩形
+            elif elem_type == 'rectangle':
+                top_left = tuple(element.get("top_left", (0, 0)))
+                bottom_right = tuple(element.get("bottom_right", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(0, 0, 0, 0.5)"))
+                thickness = -1 if element.get("filled", True) else 2
+                cv2.rectangle(image, top_left, bottom_right, color, thickness)
+            
+            # 绘制箭头
+            elif elem_type == 'arrow':
+                start = tuple(element.get("start", (0, 0)))
+                end = tuple(element.get("end", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(0, 255, 255, 1.0)"))
+                thickness = element.get("thickness", 2)
+                tip_length = element.get("tip_length", 0.3)
+                cv2.arrowedLine(image, start, end, color, thickness, tipLength=tip_length)
+            
+            # 【新增】绘制双向箭头
+            elif elem_type == 'double_arrow':
+                start = tuple(element.get("start", (0, 0)))
+                end = tuple(element.get("end", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(0, 255, 0, 0.8)"))
+                thickness = element.get("thickness", 2)
+                tip_length = element.get("tip_length", 0.15)
+                # 绘制中间的线
+                cv2.line(image, start, end, color, thickness)
+                # 绘制两端的箭头
+                # 计算箭头方向向量
+                dx = end[0] - start[0]
+                dy = end[1] - start[1]
+                length = np.sqrt(dx*dx + dy*dy)
+                if length > 0:
+                    # 单位方向向量
+                    ux, uy = dx/length, dy/length
+                    # 箭头长度
+                    arrow_len = length * tip_length
+                    # 左端箭头
+                    tip1_x = int(start[0] + arrow_len * ux)
+                    tip1_y = int(start[1] + arrow_len * uy)
+                    # 绘制左端箭头（指向左）
+                    angle = np.arctan2(dy, dx)
+                    arrow_angle = 30 * np.pi / 180  # 箭头角度
+                    p1 = (int(start[0] + arrow_len * np.cos(angle - arrow_angle)),
+                          int(start[1] + arrow_len * np.sin(angle - arrow_angle)))
+                    p2 = (int(start[0] + arrow_len * np.cos(angle + arrow_angle)),
+                          int(start[1] + arrow_len * np.sin(angle + arrow_angle)))
+                    cv2.line(image, start, p1, color, thickness)
+                    cv2.line(image, start, p2, color, thickness)
+                    # 右端箭头（指向右）
+                    p3 = (int(end[0] - arrow_len * np.cos(angle - arrow_angle)),
+                          int(end[1] - arrow_len * np.sin(angle - arrow_angle)))
+                    p4 = (int(end[0] - arrow_len * np.cos(angle + arrow_angle)),
+                          int(end[1] - arrow_len * np.sin(angle + arrow_angle)))
+                    cv2.line(image, end, p3, color, thickness)
+                    cv2.line(image, end, p4, color, thickness)
+            
+            # 【新增】绘制虚线
+            elif elem_type == 'dashed_line':
+                start = np.array(element.get("start", (0, 0)))
+                end = np.array(element.get("end", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(255, 255, 255, 0.6)"))
+                thickness = element.get("thickness", 2)
+                dash_length = 10
+                gap_length = 5
+                # 计算总长度和方向
+                total_vec = end - start
+                total_len = np.linalg.norm(total_vec)
+                if total_len > 0:
+                    unit_vec = total_vec / total_len
+                    # 绘制虚线段
+                    current_len = 0
+                    while current_len < total_len:
+                        seg_start = start + unit_vec * current_len
+                        seg_end = start + unit_vec * min(current_len + dash_length, total_len)
+                        cv2.line(image, tuple(seg_start.astype(int)), tuple(seg_end.astype(int)), color, thickness)
+                        current_len += dash_length + gap_length
+            
+            # 【新增】绘制角度弧线
+            elif elem_type == 'angle_arc':
+                center = tuple(element.get("center", (100, 100)))
+                radius = element.get("radius", 40)
+                start_angle = element.get("start_angle", -90)
+                end_angle = element.get("end_angle", 0)
+                color = self._parse_color(element.get("color", "rgba(255, 200, 0, 0.8)"))
+                thickness = element.get("thickness", 2)
+                # OpenCV的ellipse函数：startAngle和endAngle是从右侧水平线开始顺时针测量
+                # 需要转换：我们的角度是从右侧水平线逆时针（数学标准）
+                # OpenCV需要的是从右侧水平线顺时针
+                cv2_start = -end_angle  # 转换为OpenCV格式
+                cv2_end = -start_angle
+                # 确保角度范围正确
+                if cv2_start > cv2_end:
+                    cv2_start, cv2_end = cv2_end, cv2_start
+                cv2.ellipse(image, center, (radius, radius), 0, cv2_start, cv2_end, color, thickness)
+            
+            # 【修改】绘制带背景的文本（使用中文支持）
+            elif elem_type == 'text_with_bg':
+                text = element.get("text", "")
+                pos = element.get("position", [10, 30])
+                font_scale = element.get("font_scale", 0.6)
+                color = self._parse_color(element.get("color", "rgba(255, 255, 255, 1.0)"))
+                
+                # 使用新的中文文本绘制函数
+                image = self._draw_chinese_text(image, text, tuple(pos), 
+                                              font_scale=font_scale, 
+                                              color=color,
+                                              stroke_color=(0, 0, 0),
+                                              stroke_width=1)
+            
+            # 绘制警告图标
+            elif elem_type == 'warning_icon':
+                pos = element.get("position", (100, 100))
+                level = element.get("level", "info")
+                text = element.get("text", "")
+                flash = element.get("flash", False)
+                
+                # 根据级别选择颜色
+                if level == "danger":
+                    icon_color = (0, 0, 255)  # 红色
+                    text_color = (255, 255, 255)
+                elif level == "warning":
+                    icon_color = (0, 165, 255)  # 橙色
+                    text_color = (255, 255, 255)
+                else:
+                    icon_color = (0, 255, 255)  # 黄色
+                    text_color = (0, 0, 0)
+                
+                # 闪烁效果
+                if flash:
+                    alpha = 0.5 + 0.5 * np.sin(current_time * 4 * np.pi)
+                    icon_color = tuple(int(c * alpha) for c in icon_color)
+                
+                # 绘制三角形警告图标
+                triangle = np.array([
+                    [pos[0], pos[1] - 20],
+                    [pos[0] - 15, pos[1]],
+                    [pos[0] + 15, pos[1]]
+                ], np.int32)
+                cv2.fillPoly(image, [triangle], icon_color)
+                cv2.polylines(image, [triangle], True, (255, 255, 255), 2)
+                
+                # 绘制感叹号
+                cv2.putText(image, "!", (pos[0] - 5, pos[1] - 5), 
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                
+                # 绘制文本标签（使用中文支持）
+                if text:
+                    font_scale = 0.5
+                    # 使用新的中文文本绘制函数
+                    text_pos = (pos[0] - 20, pos[1] + 20)  # 简化位置计算
+                    image = self._draw_chinese_text(image, text, text_pos,
+                                                  font_scale=font_scale,
+                                                  color=text_color,
+                                                  stroke_color=(0, 0, 0),
+                                                  stroke_width=1)
+            
+            # 普通文本
+            elif elem_type == 'text':
+                text = element.get("text", "")
+                pos = tuple(element.get("pos", (10, 30)))
+                # 使用中文文本绘制函数
+                image = self._draw_chinese_text(image, text, pos,
+                                              font_scale=0.7,
+                                              color=(255, 255, 255),
+                                              stroke_color=(0, 0, 0),
+                                              stroke_width=1)
+        
+        # 【修改】绘制数据面板（使用无黑底版本）
+        if PIL_AVAILABLE:
+            for panel in panel_elements:
+                image = self._draw_data_panel_no_bg(image, panel["data"], panel["position"])
+        else:
+            # 如果没有PIL，也使用描边效果
+            for panel in panel_elements:
+                y_offset = panel["position"][1]
+                for key, value in panel["data"].items():
+                    text = f"{key}: {value}"
+                    # 绘制文字描边
+                    for dx in [-1, 0, 1]:
+                        for dy in [-1, 0, 1]:
+                            if dx != 0 or dy != 0:
+                                cv2.putText(image, text, (panel["position"][0] + dx, y_offset + dy), 
+                                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3)
+                    # 绘制白色文字
+                    cv2.putText(image, text, (panel["position"][0], y_offset), 
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
+                    y_offset += 25
+        
+        return image
+
+
+    
+    def _draw_chinese_text(self, image, text, position, font_scale=0.6, color=(255, 255, 255), 
+                         stroke_color=(0, 0, 0), stroke_width=1):
+        """绘制中文文本，使用微软雅黑字体，白字黑边"""
+        if not PIL_AVAILABLE:
+            # 如果没有PIL，回退到cv2.putText（会显示问号）
+            cv2.putText(image, text, position, cv2.FONT_HERSHEY_SIMPLEX, 
+                       font_scale, color, 2)
+            return image
+        
+        try:
+            # 转换为PIL图像
+            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            draw = ImageDraw.Draw(pil_img)
+            
+            # 计算字体大小（基于font_scale）
+            base_size = 24  # 基准字体大小
+            font_size = int(base_size * font_scale / 0.6)
+            
+            # 尝试加载微软雅黑字体
+            font = None
+            font_paths = [
+                "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+            ]
+            
+            for font_path in font_paths:
+                if os.path.exists(font_path):
+                    try:
+                        font = ImageFont.truetype(font_path, font_size)
+                        break
+                    except:
+                        continue
+            
+            if font is None:
+                font = ImageFont.load_default()
+            
+            # 将OpenCV的BGR颜色转换为RGB
+            rgb_color = (color[2], color[1], color[0])
+            rgb_stroke = (stroke_color[2], stroke_color[1], stroke_color[0])
+            
+            # 绘制文本（带描边效果）
+            x, y = position
+            # 绘制描边
+            draw.text((x, y), text, font=font, fill=rgb_stroke, 
+                     stroke_width=stroke_width, stroke_fill=rgb_stroke)
+            # 绘制主文本
+            draw.text((x, y), text, font=font, fill=rgb_color)
+            
+            # 转换回OpenCV格式
+            return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+            
+        except Exception as e:
+            logger.warning(f"绘制中文文本失败: {e}")
+            # 回退到cv2.putText
+            cv2.putText(image, text, position, cv2.FONT_HERSHEY_SIMPLEX, 
+                       font_scale, color, 2)
+            return image
+
+    def _draw_data_panel(self, image, data, position=(15, 15)):
+        """绘制数据面板（需要Pillow）"""
+        if not PIL_AVAILABLE:
+            return image
+        
+        try:
+            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            draw = ImageDraw.Draw(pil_img, "RGBA")
+            
+            env_scale = float(os.getenv("AIGLASS_PANEL_SCALE", "0.65"))
+            base_font_size = max(8, int(round(16 * env_scale)))
+            padding = max(4, int(round(8 * env_scale)))
+            
+            # 尝试加载微软雅黑字体
+            font = None
+            font_paths = [
+                "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+            ]
+            
+            for font_path in font_paths:
+                if os.path.exists(font_path):
+                    try:
+                        font = ImageFont.truetype(font_path, base_font_size)
+                        break
+                    except:
+                        continue
+            
+            if font is None:
+                font = ImageFont.load_default()
+            
+            text_lines = [f"{key}: {value}" for key, value in data.items()]
+            text_to_draw = "\n".join(text_lines)
+            
+            bbox = draw.textbbox(position, text_to_draw, font=font)
+            text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1]
+            
+            bg_rect = [
+                (position[0] - padding, position[1] - padding),
+                (position[0] + text_w + padding, position[1] + text_h + padding)
+            ]
+            draw.rectangle(bg_rect, fill=(0, 0, 0, 128))
+            draw.text(position, text_to_draw, font=font, fill=(255, 255, 255, 255))
+            
+            return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+        
+        except Exception:
+            return image
+    
+    def reset(self):
+        """重置导航器状态"""
+        self.current_state = STATE_ONBOARDING
+        self.onboarding_step = ONBOARDING_STEP_ROTATION
+        self.maneuver_step = MANEUVER_STEP_1_ISSUE_COMMAND
+        self.maneuver_target_info = None
+        self.turn_detection_tracker = {
+            'direction': None,
+            'consecutive_hits': 0,
+            'last_seen_frame': 0,
+            'corner_info': None
+        }
+        self.turn_cooldown_frames = 0
+        self.avoidance_plan = None
+        self.avoidance_step_index = 0
+        self.lock_on_data = None
+        
+        # 重置光流和平滑相关
+        self.flow_points = {}
+        self.flow_grace = {}
+        self.centerline_history = []
+        self.blind_miss_ttl = 0
+        self.cross_miss_ttl = 0
+        
+        # 重置语音相关
+        self.pending_obstacle_voice = None
+        self.last_obstacle_speech = ""
+        self.last_obstacle_speech_time = 0
+        
+        # 重置多项式系数历史
+        self.poly_coeffs_history = []
+        self.crosswalk_tracker = {
+            'stage': 'not_detected',
+            'consecutive_frames': 0,
+            'last_area_ratio': 0.0,
+            'last_bottom_y_ratio': 0.0,
+            'last_center_x_ratio': 0.5,
+            'position_announced': False,
+            'alignment_status': 'not_aligned',
+            'last_seen_frame': 0,
+            'last_angle': 0.0
+        }
+        self.frame_counter = 0
+        self.prev_gray = None
+        self.prev_blind_path_mask = None
+        self.prev_crosswalk_mask = None
+        self.prev_obstacle_cache = []
+        self.last_guidance_message = ""
+        self.last_detected_obstacles = []
+        self.last_obstacle_detection_frame = 0
+        self.last_obstacle_speech = ""
+        self.last_obstacle_speech_time = 0
+        self.last_any_speech_time = 0
+        self.crosswalk_ready_announced = False
+        self.crosswalk_ready_time = 0
+        self.traffic_light_history.clear()
+        self.last_traffic_light_state = "unknown"
+        self.green_light_announced = False
+    
+    def _stabilize_obstacle_list(self, obstacles, prev_obstacles, prev_gray, curr_gray, 
+                                image_shape, threshold=0.5):
+        """稳定障碍物检测结果，避免重复叠加"""
+        if not obstacles or prev_gray is None or curr_gray is None:
+            return obstacles
+        
+        H, W = image_shape
+        stabilized = []
+        used_prev = set()  # 记录已使用的历史障碍物
+        
+        # 对每个当前检测到的障碍物
+        for curr_obs in obstacles:
+            if 'mask' not in curr_obs or curr_obs['mask'] is None:
+                stabilized.append(curr_obs)
+                continue
+                
+            curr_mask = curr_obs['mask']
+            best_match = None
+            best_iou = 0
+            best_idx = -1
+            
+            # 寻找最佳匹配的历史障碍物
+            if prev_obstacles:
+                for idx, prev_obs in enumerate(prev_obstacles):
+                    if idx in used_prev or 'mask' not in prev_obs:
+                        continue
+                    
+                    # 使用光流预测历史障碍物的新位置
+                    flow_mask = self._predict_mask_with_flow(prev_obs['mask'], prev_gray, curr_gray)
+                    if flow_mask is None:
+                        flow_mask = prev_obs['mask']
+                    
+                    # 计算IoU
+                    inter = np.logical_and(curr_mask > 0, flow_mask > 0).sum()
+                    union = np.logical_or(curr_mask > 0, flow_mask > 0).sum()
+                    iou = float(inter) / float(union) if union > 0 else 0.0
+                    
+                    if iou > best_iou and iou > threshold:
+                        best_iou = iou
+                        best_match = flow_mask
+                        best_idx = idx
+            
+            # 如果找到匹配，融合结果
+            if best_match is not None and best_idx >= 0:
+                used_prev.add(best_idx)
+                # 融合当前检测和光流预测，提高稳定性
+                fused_mask = ((0.8 * curr_mask + 0.2 * best_match) > 128).astype(np.uint8) * 255
+                curr_obs['mask'] = fused_mask
+                # 更新派生属性
+                self._update_obstacle_properties(curr_obs, H, W)
+            
+            stabilized.append(curr_obs)
+        
+        return stabilized
+  
+    def _speech_for_obstacle(self, name: str) -> str:
+        k = (name or '').strip().lower()
+        if k == 'person': return "前方有人，注意避让。"
+        if k == 'car': return "前方有车，注意避让。"
+        if k == 'bicycle': return "前方有自行车，停一下。"
+        if k == 'motorcycle': return "前方有摩托车，停一下。"
+        if k == 'bus': return "前方有公交车，停一下。"
+        if k == 'truck': return "前方有卡车，停一下。"
+        if k == 'scooter': return "前方有电瓶车，停一下。"
+        if k == 'stroller': return "前方有婴儿车，停一下。"
+        if k == 'dog': return "前方有狗，停一下。"
+        if k == 'animal': return "前方有动物，停一下。"
+        return "前方有障碍物，注意避让。"
+
+    def _update_obstacle_properties(self, obs, H, W):
+        """更新障碍物的派生属性"""
+        if 'mask' not in obs or obs['mask'] is None:
+            return
+        
+        mask = obs['mask']
+        y_coords, x_coords = np.where(mask > 0)
+        
+        if len(y_coords) > 0:
+            obs['area'] = len(y_coords)
+            obs['center_x'] = float(np.mean(x_coords))
+            obs['center_y'] = float(np.mean(y_coords))
+            obs['y_position_ratio'] = obs['center_y'] / H
+            obs['area_ratio'] = obs['area'] / (H * W)
+            obs['bottom_y_ratio'] = np.max(y_coords) / H
+            
+            # 更新边界框
+            x1, y1 = int(np.min(x_coords)), int(np.min(y_coords))
+            x2, y2 = int(np.max(x_coords)), int(np.max(y_coords))
+            obs['box_coords'] = (x1, y1, x2, y2)
\ No newline at end of file
diff --git a/workflow_crossstreet.py b/workflow_crossstreet.py
new file mode 100644
index 0000000..c9fb3c6
--- /dev/null
+++ b/workflow_crossstreet.py
@@ -0,0 +1,1832 @@
+# -*- coding: utf-8 -*-
+"""
+过马路工作流（简化版 - 仅斑马线检测，但保留导航功能）
+- 直连版本，无 Celery/Redis
+- 仅检测斑马线，无交通灯检测
+- 保留斑马线导航功能（角度、偏移计算）
+- 保留可视化（引导线、目标点等）
+- 每帧都进行分割；若该帧分割失败，则用上一帧从掩码打点的光流特征点追踪，重建掩码保持位置，直到下一次分割检出
+"""
+import torch
+import os
+import time
+import logging
+import numpy as np
+import cv2
+from dataclasses import dataclass
+from typing import Optional, List, Dict, Any
+# 【移除】from audio_player import play_voice_text - 不在工作流内部播放音频
+
+# 可选：用于更精致的数据面板（与 blindpath 一致）
+try:
+    from PIL import Image, ImageDraw, ImageFont
+    PIL_AVAILABLE = True
+except ImportError:
+    PIL_AVAILABLE = False
+    Image, ImageDraw, ImageFont = None, None, None
+
+# 可选：自动启用障碍物检测（与 blindpath 一致）
+try:
+    from obstacle_detector_client import ObstacleDetectorClient
+except Exception:
+    ObstacleDetectorClient = None
+
+# 红绿灯检测模块
+try:
+    import trafficlight_detection
+    TRAFFIC_LIGHT_AVAILABLE = True
+except Exception:
+    TRAFFIC_LIGHT_AVAILABLE = False
+    trafficlight_detection = None
+
+# Day 20: TensorRT 模型加载工具
+try:
+    from model_utils import get_best_model_path
+except ImportError:
+    def get_best_model_path(path): return path
+
+logger = logging.getLogger(__name__)
+
+# ========== 状态常量 ==========
+STATE_SEEKING = "SEEKING_CROSSWALK"      # 寻找并对准远处的斑马线
+STATE_WAIT_LIGHT = "WAIT_TRAFFIC_LIGHT"  # 等待红绿灯判定
+STATE_CROSSING = "CROSSING"              # 正在过马路
+
+# ========== 配置参数 ==========
+CROSSWALK_MIN_CONF = float(os.getenv('CROSSWALK_MIN_CONF', '0.3'))
+CROSSWALK_MIN_AREA = int(os.getenv('CROSSWALK_MIN_AREA', '5000'))
+BLIND_MIN_CONF = float(os.getenv('BLIND_MIN_CONF', '0.34'))  # 盲道最低置信度（更高，防误判）
+ANGLE_THRESH_DEG = float(os.getenv('CROSSWALK_ANGLE_THRESH_DEG', '5.0'))  # 默认阈值略放宽
+OFFSET_THRESH = float(os.getenv('CROSSWALK_OFFSET_THRESH', '0.08'))        # 默认阈值略放宽
+
+# 远距离对准阈值（更宽松，避免过于敏感）
+SEEKING_ANGLE_THRESH_DEG = 15.0  # 远距离角度阈值（更宽松）
+SEEKING_OFFSET_THRESH = 0.20     # 远距离偏移阈值（更宽松）
+
+# 远距离对准阈值（判定"很近"的条件，更严格）
+CROSSWALK_NEAR_AREA_RATIO = 0.30  # 斑马线占画面30%认为"很近"（提高）
+CROSSWALK_NEAR_BOTTOM_RATIO = 0.80  # 斑马线底部超过画面80%认为"很近"（提高）
+CROSSWALK_NEAR_MIN_HEIGHT_RATIO = 0.35  # 斑马线高度占画面35%以上（新增条件）
+
+# 红绿灯判定参数
+GREEN_LIGHT_STABLE_FRAMES = 5  # 绿灯稳定帧数
+
+# 类别ID绑定（与训练集对应）
+CW_ID = int(os.getenv("AIGLASS_SEG_CW_ID", "0"))  # 斑马线
+BP_ID = int(os.getenv("AIGLASS_SEG_BP_ID", "1"))  # 盲道
+
+# 斑马线与盲道的同义名集合
+_CW = {'zebra_crossing', 'zebra crossing', 'zebra', 'crosswalk', 'road_crossing', 'road crossing'}
+_BP = {'blind_path', 'tactile_paving', 'tactile paving', 'blind path'}
+
+# 盲道"真伪判定"阈值
+BP_VALID_IOU_THR = 0.40  # 与斑马线 IoU 超过此值，判为"混淆"，不当盲道
+
+# 追踪/打点参数
+INNER_OFFSET_PX_LOCK = 5
+EDGE_DILATE_PX = 2
+LK_PARAMS = dict(
+    winSize=(21, 21),
+    maxLevel=3,
+    criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 12, 0.03)
+)
+FEATURE_PARAMS = dict(
+    maxCorners=600,
+    qualityLevel=0.001,
+    minDistance=5,
+    blockSize=7
+)
+
+# 时序平滑与保活
+MASK_EMA_ALPHA = 0.6   # EMA 平滑权重
+TRACK_MIN_POINTS = 30  # 追踪最少特征点阈值
+TRACK_RESEED_EVERY = 12  # 每隔 N 帧在成功分割时重播种一次特征点
+
+# 可视化颜色（BGR）
+VIS_COLORS = {
+    "crosswalk": (0, 165, 255),   # 橙色
+    "centerline": (255, 255, 0),  # 青色 - 引导中心线
+    "target_point": (255, 0, 255), # 粉色 - 引导目标点
+    "hint": (0, 255, 255),        # 黄色
+    "stripes": (0, 128, 255),     # 橙蓝 - 条纹线段
+    "heading": (0, 0, 255),       # 红色 - 方向箭头
+}
+
+@dataclass
+class CrossStreetResult:
+    """过马路导航结果"""
+    annotated_image: Optional[np.ndarray] = None
+    guidance_text: str = ""
+    visualizations: List[Dict[str, Any]] = None
+    should_switch_to_blindpath: bool = False
+
+    def __post_init__(self):
+        if self.visualizations is None:
+            self.visualizations = []
+
+# ========== 辅助函数 ==========
+def _score_of(d) -> float:
+    """兼容不同检测结构，取出置信度；取不到就给 0.0（保守）"""
+    for k in ("conf", "confidence", "score", "prob"):
+        v = getattr(d, k, None)
+        if v is not None:
+            try:
+                return float(v)
+            except Exception:
+                break
+    return 0.0
+
+def _norm_name(s: str) -> str:
+    """标准化名称"""
+    return str(s).lower().replace('_', ' ').strip()
+
+def _in_set(name: str, pool: set) -> bool:
+    """检查名称是否在集合中"""
+    return _norm_name(name) in {_norm_name(x) for x in pool}
+
+def _mask_iou(a: np.ndarray, b: np.ndarray) -> float:
+    """计算两个mask的IoU"""
+    if a is None or b is None: 
+        return 0.0
+    ai = a > 0
+    bi = b > 0
+    inter = np.logical_and(ai, bi).sum()
+    union = np.logical_or(ai, bi).sum()
+    return float(inter) / float(union + 1e-6)
+
+def _looks_like_blind_path(bp_mask: np.ndarray, cw_mask: np.ndarray, H: int, W: int) -> bool:
+    """几何+互斥检查，过滤'横条纹/路牙'伪盲道"""
+    if bp_mask is None: 
+        return False
+    ys, xs = np.where(bp_mask > 0)
+    if xs.size < 80:  # 太小的片段直接丢
+        return False
+
+    # 计算主轴角度
+    pts = np.stack([xs.astype(np.float32), ys.astype(np.float32)], axis=1)
+    mean = pts.mean(axis=0)
+    cov = np.cov((pts - mean).T)
+    eigvals, eigvecs = np.linalg.eig(cov)
+    v = eigvecs[:, np.argmax(eigvals)]
+    angle_deg = np.degrees(np.arctan2(v[1], v[0]))
+    if angle_deg > 90: angle_deg -= 180
+    if angle_deg < -90: angle_deg += 180
+    
+    h = (ys.max() - ys.min() + 1)
+    w = (xs.max() - xs.min() + 1)
+    aspect = h / float(w + 1e-6)  # 期望盲道"更竖一些"
+    iou_cw = _mask_iou(bp_mask, cw_mask)
+
+    # 1) 横向条纹过滤（放宽到 20°，给远端/轻微倾斜更多空间）
+    if abs(angle_deg) <= 20.0:
+        return False
+    # 2) 形状过滤（放宽到 0.52）
+    if aspect < 0.52:
+        return False
+    # 3) 与斑马线高度重叠
+    if iou_cw >= BP_VALID_IOU_THR:
+        return False
+    # 4) 底边窄条（疑似路牙）过滤
+    bottom = bp_mask[int(0.88 * H):, :]
+    if bottom.sum() > 0:
+        bottom_share = bottom.sum() / float((bp_mask > 0).sum() + 1e-6)
+        if bottom_share > 0.50 and (w / float(W)) < 0.35:
+            return False
+    return True
+
+def _cls_of(d):
+    """提取检测对象的类别ID"""
+    for k in ("cls", "class_id", "category_id"):
+        v = getattr(d, k, None)
+        if v is not None:
+            try:
+                return int(v)
+            except Exception:
+                pass
+    return None
+
+class CrossStreetNavigator:
+    """简化版过马路导航器 - 仅斑马线检测但保留导航（每帧分割 + 失败用光流保活）"""
+
+    def __init__(self, seg_model=None, coco_model=None, obs_model=None, device_id: str = "esp32"):
+        self.seg_model = seg_model
+        self.device_id = device_id
+        self.frame_counter = 0
+        self.last_guidance = ""
+        self.crosswalk_detected = False
+        self.last_guide_time = 0
+        self.guide_interval = 3.0  # 语音引导间隔（秒）
+
+        # —— 状态机 ——
+        self.state = STATE_SEEKING           # 当前状态
+        self.green_light_counter = 0         # 绿灯稳定帧计数
+        self.last_traffic_light = None       # 上一帧检测到的红绿灯
+        self.last_seeking_guidance = ""      # 上一次SEEKING状态的引导文本（用于节流）
+        self.last_waiting_light_time = 0     # 上次播报"正在等待绿灯"的时间
+        self.crossing_end_announced = False  # 是否已播报"过马路结束"（CROSSING状态用）
+        self.last_crosswalk_seen_time = 0    # 上次检测到斑马线的时间
+        self.last_blindpath_announce_time = 0  # 上次播报盲道提示的时间（用于节流重复播报）
+
+        # —— 时序/追踪状态 ——
+        self.prev_mask = None            # 上一帧稳定后的二值掩码
+        self.prev_mask_float = None      # 掩码 EMA 浮点缓冲
+        self.prev_mask_ts = 0.0          # 最近一次掩码更新时间
+        self.old_gray = None             # 上一帧灰度图（供 LK）
+        self.p0 = None                   # 上一帧特征点（N,1,2）
+        self.last_seed_frame = 0         # 上次播种特征点的帧号
+
+        # —— 避障（与 blindpath 一致） ——
+        self.obstacle_detector = obs_model
+        self.prev_gray = None
+        self.last_detected_obstacles = []
+        self.last_obstacle_detection_frame = 0
+        self.OBSTACLE_DETECTION_INTERVAL = int(os.getenv("AIGLASS_OBS_INTERVAL", "15"))
+        self.OBSTACLE_CACHE_DURATION_FRAMES = int(os.getenv("AIGLASS_OBS_CACHE_FRAMES", "0"))
+        
+        # 【新增】斑马线检测间隔配置
+        self.CROSSWALK_DETECTION_INTERVAL = int(os.getenv("AIGLASS_CROSSWALK_INTERVAL", "4"))  # 每4帧检测一次
+        self.last_crosswalk_detection_frame = 0
+        self.last_detected_crosswalk_mask = None
+        self.last_detected_blindpath_mask = None
+
+        # 自动启用障碍物检测（若未传入 obs_model）
+        if self.obstacle_detector is None and os.getenv("AIGLASS_OBS_AUTO", "1") != "0":
+            try:
+                if ObstacleDetectorClient is not None:
+                    model_path = os.getenv("AIGLASS_OBS_MODEL", "model/yoloe-11l-seg.pt")
+                    # Day 20: 优先使用 TensorRT 引擎
+                    model_path = get_best_model_path(model_path)
+                    self.obstacle_detector = ObstacleDetectorClient(model_path)
+                    logger.info("[CROSS_STREET] 障碍物检测器已自动加载")
+                else:
+                    logger.warning("[CROSS_STREET] 未找到 ObstacleDetectorClient，跳过自动加载")
+            except Exception as e:
+                logger.warning(f"[CROSS_STREET] 自动加载障碍物检测器失败: {e}")
+
+        # 如果模型有 predict 方法但没有 detect 方法，进行包装
+        if self.seg_model and hasattr(self.seg_model, 'predict') and not hasattr(self.seg_model, 'detect'):
+            logger.info("[CROSS_STREET] 包装 YOLO 模型")
+            self.seg_model = YOLOModelWrapper(self.seg_model)
+        
+        # 【新增】打印检测间隔配置
+        logger.info(f"[CROSS_STREET] 斑马线检测间隔: 每{self.CROSSWALK_DETECTION_INTERVAL}帧")
+
+        # 确保模型在 GPU 上
+        # Day 20: TensorRT 引擎不需要 .to()
+        if self.seg_model and torch.cuda.is_available():
+            try:
+                # 检查是否是 TensorRT 引擎
+                model_path = getattr(self.seg_model, 'ckpt_path', '') or ''
+                if not model_path.endswith('.engine'):
+                    if hasattr(self.seg_model, 'model') and hasattr(self.seg_model.model, 'to'):
+                        self.seg_model.model.to('cuda')
+                    elif hasattr(self.seg_model, 'to'):
+                        self.seg_model.to('cuda')
+                    logger.info("[CROSS_STREET] 模型已移至 GPU")
+                else:
+                    logger.info("[CROSS_STREET] TensorRT 引擎已加载，跳过 .to()")
+            except Exception as e:
+                logger.warning(f"[CROSS_STREET] 无法将模型移至 GPU: {e}")
+
+    def reset(self):
+        """重置状态"""
+        self.frame_counter = 0
+        self.last_guidance = ""
+        self.crosswalk_detected = False
+        self.last_guide_time = 0
+        # 状态机
+        self.state = STATE_SEEKING
+        self.green_light_counter = 0
+        self.last_traffic_light = None
+        self.last_seeking_guidance = ""
+        self.last_waiting_light_time = 0
+        self.crossing_end_announced = False
+        self.last_crosswalk_seen_time = 0
+        self.last_blindpath_announce_time = 0
+        # 追踪
+        self.prev_mask = None
+        self.prev_mask_float = None
+        self.prev_mask_ts = 0.0
+        self.old_gray = None
+        self.p0 = None
+        self.last_seed_frame = 0
+        # 避障缓存
+        self.prev_gray = None
+        self.last_detected_obstacles = []
+        self.last_obstacle_detection_frame = 0
+        # 重置红绿灯检测状态
+        if TRAFFIC_LIGHT_AVAILABLE and trafficlight_detection:
+            trafficlight_detection.reset_detection_state()
+        logger.info("[CROSS_STREET] 导航器已重置")
+
+    # —— 打点/追踪辅助 ——
+    @staticmethod
+    def _inner_offset_edge(mask_bin: np.ndarray, offset_px=5, edge_dilate_px=2) -> np.ndarray:
+        """对二值掩码做内收后提边缘，便于在目标内部打光流特征点"""
+        if offset_px > 0:
+            k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*offset_px+1, 2*offset_px+1))
+            eroded = cv2.erode(mask_bin.astype(np.uint8), k, iterations=1)
+        else:
+            eroded = mask_bin.astype(np.uint8)
+        edges = cv2.Canny(eroded*255, 50, 150)
+        if edge_dilate_px > 0:
+            k2 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*edge_dilate_px+1, 2*edge_dilate_px+1))
+            edges = cv2.dilate(edges, k2, iterations=1)
+        return edges  # uint8 0/255
+
+    @staticmethod
+    def _hull_mask_from_points(points: np.ndarray, shape_hw: tuple) -> Optional[np.ndarray]:
+        """从一组点的凸包生成二值掩码"""
+        if points is None or len(points) < 3:
+            return None
+        H, W = shape_hw
+        pts = points.reshape(-1, 2).astype(np.float32)
+        hull = cv2.convexHull(pts.reshape(-1,1,2))
+        poly = hull.reshape(-1, 2).astype(np.int32)
+        mask = np.zeros((H, W), dtype=np.uint8)
+        cv2.fillPoly(mask, [poly], 1)
+        return mask
+
+    def _seed_points_from_mask(self, gray: np.ndarray, mask_bin: np.ndarray) -> Optional[np.ndarray]:
+        """基于掩码的内收边界，播种 LK 光流特征点"""
+        edge_mask = self._inner_offset_edge(mask_bin, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+        try:
+            pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+            return pts
+        except Exception as e:
+            logger.warning(f"[CROSS_STREET] goodFeaturesToTrack 失败: {e}")
+            return None
+
+    @staticmethod
+    def _ensure_binary_mask(mask: np.ndarray, shape_hw: tuple) -> np.ndarray:
+        """阈值化并调整尺寸到图像大小，返回二值 0/1 uint8"""
+        H, W = shape_hw
+        if mask.dtype != np.uint8:
+            mask = (mask > 0.5).astype(np.uint8)
+        if mask.shape[:2] != (H, W):
+            mask = cv2.resize(mask, (W, H), interpolation=cv2.INTER_NEAREST)
+        return (mask > 0).astype(np.uint8)
+
+    def _postprocess_mask(self, mask_bin: np.ndarray) -> np.ndarray:
+        """形态学净化 + 移除小碎片，缓解毛边与噪点"""
+        try:
+            m = (mask_bin > 0).astype(np.uint8)
+            H, W = m.shape[:2]
+            # 轻度开闭操作，去毛刺并填补细小空洞
+            k_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+            k_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+            m = cv2.morphologyEx(m, cv2.MORPH_OPEN, k_open, iterations=1)
+            m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, k_close, iterations=1)
+            # 移除过小连通域
+            num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
+            if num_labels > 1:
+                areas = stats[1:, cv2.CC_STAT_AREA]
+                keep_area = max(int(0.003 * H * W), 1500)  # 约 0.3% 画面或 1500 px
+                keep_labels = np.where(areas >= keep_area)[0] + 1
+                m2 = np.zeros_like(m)
+                for lbl in keep_labels:
+                    m2[labels == lbl] = 1
+                if m2.sum() > 0:
+                    m = m2
+            return (m > 0).astype(np.uint8)
+        except Exception:
+            return (mask_bin > 0).astype(np.uint8)
+
+    @staticmethod
+    def _largest_contour(mask_bin: np.ndarray):
+        cts, _ = cv2.findContours((mask_bin>0).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        if not cts:
+            return None
+        return max(cts, key=cv2.contourArea)
+
+
+    def _mask_center(self, mask: np.ndarray):
+        """用图像矩计算掩码质心；失败返回 None"""
+        M = cv2.moments((mask > 0).astype(np.uint8))
+        if abs(M["m00"]) < 1e-6:
+            return None
+        cx = int(M["m10"] / M["m00"])
+        cy = int(M["m01"] / M["m00"])
+        return (cx, cy)
+    
+    def _is_crosswalk_near(self, mask: np.ndarray, h: int, w: int) -> bool:
+        """判断斑马线是否"很近"（到用户跟前）- 更严格的判定条件"""
+        if mask is None:
+            return False
+        area = int(mask.sum())
+        area_ratio = float(area) / float(h * w)
+        
+        # 获取底部位置和高度
+        ys = np.where(mask > 0)[0]
+        if ys.size == 0:
+            return False
+        top_y = int(ys.min())
+        bottom_y = int(ys.max())
+        mask_height = bottom_y - top_y + 1
+        height_ratio = float(mask_height) / float(h)
+        bottom_ratio = float(bottom_y) / float(h)
+        
+        # 需要同时满足多个条件（AND逻辑，更严格）：
+        # 1. 面积足够大
+        # 2. 底部位置足够低
+        # 3. 高度占比足够大（防止只是因为抬头导致的误判）
+        is_near = (area_ratio >= CROSSWALK_NEAR_AREA_RATIO and 
+                   bottom_ratio >= CROSSWALK_NEAR_BOTTOM_RATIO and
+                   height_ratio >= CROSSWALK_NEAR_MIN_HEIGHT_RATIO)
+        return is_near
+    
+    def _is_crosswalk_almost_done(self, mask: np.ndarray, h: int, w: int) -> bool:
+        """判断斑马线是否"快消失"（斑马线在画面底部且面积很小）- 更严格的判定"""
+        if mask is None:
+            return False
+        area = int(mask.sum())
+        area_ratio = float(area) / float(h * w)
+        
+        ys = np.where(mask > 0)[0]
+        if ys.size == 0:
+            return False
+        
+        # 计算斑马线的顶部和底部位置
+        top_y = int(ys.min())
+        bottom_y = int(ys.max())
+        
+        top_ratio = float(top_y) / float(h)
+        bottom_ratio = float(bottom_y) / float(h)
+        
+        # 更严格的判断条件（避免过早触发）：
+        # 1. 顶部已经过了画面70%（>0.7），说明斑马线主要在画面最下方
+        # 2. 底部接近画面底部（>0.85）
+        # 3. 面积很小（<0.08），说明快消失了
+        is_almost_done = (top_ratio > 0.7 and bottom_ratio > 0.85 and area_ratio < 0.08)
+        return is_almost_done
+    
+    def _compute_远_distance_alignment(self, mask: np.ndarray, h: int, w: int) -> tuple:
+        """计算远距离对准的角度和偏移（基于mask几何，不依赖条纹）"""
+        ys, xs = np.where(mask > 0)
+        if xs.size < 50:
+            return 0.0, 0.0
+        
+        # 使用PCA计算主方向
+        pts = np.stack([xs.astype(np.float32), ys.astype(np.float32)], axis=1)
+        mean = pts.mean(axis=0)
+        cov = np.cov((pts - mean).T)
+        eigvals, eigvecs = np.linalg.eig(cov)
+        v = eigvecs[:, np.argmax(eigvals)]
+        
+        # 计算角度（相对水平）
+        angle = np.degrees(np.arctan2(v[1], v[0]))
+        if angle > 90: angle -= 180
+        if angle < -90: angle += 180
+        
+        # 计算水平偏移（质心相对画面中心）
+        cx = float(mean[0])
+        offset = (cx - (w / 2.0)) / max(1.0, w / 2.0)
+        
+        return float(angle), float(offset)
+
+    def _draw_line_vertical_angle(self, image, center, angle_deg, length_ratio=0.7, color=(255, 255, 0), thickness=3):
+        """
+        以“竖直方向”为0°基准，angle_deg>0 表示左偏，<0 表示右偏。
+        在 center 处画一条通过点的直线。
+        """
+        H, W = image.shape[:2]
+        half_len = int(0.5 * length_ratio * min(H, W))
+        rad = np.radians(angle_deg)
+        # 竖直基准: 向上的单位向量(0, -1)
+        # 旋转 angle 后的方向向量 = (sin, -cos)
+        vx = np.sin(rad);
+        vy = -np.cos(rad)
+        x0, y0 = center
+        p1 = (int(x0 - vx * half_len), int(y0 - vy * half_len))
+        p2 = (int(x0 + vx * half_len), int(y0 + vy * half_len))
+        cv2.line(image, p1, p2, color, thickness)
+
+    def _draw_dashed_line_vertical_angle(self, image, center, angle_deg, length_ratio=0.7,
+                                         dash=12, gap=8, color=(255, 255, 255), thickness=2):
+        """同样以竖直为0°，画 through center 的虚线。"""
+        H, W = image.shape[:2]
+        half_len = int(0.5 * length_ratio * min(H, W))
+        rad = np.radians(angle_deg)
+        vx = np.sin(rad);
+        vy = -np.cos(rad)
+        x0, y0 = center
+        x1, y1 = int(x0 - vx * half_len), int(y0 - vy * half_len)
+        x2, y2 = int(x0 + vx * half_len), int(y0 + vy * half_len)
+
+        # 沿整条线分段画虚线
+        total_len = int(np.hypot(x2 - x1, y2 - y1))
+        if total_len <= 0: return
+        dx = (x2 - x1) / total_len
+        dy = (y2 - y1) / total_len
+        s = 0
+        while s < total_len:
+            e = min(s + dash, total_len)
+            xa, ya = int(x1 + dx * s), int(y1 + dy * s)
+            xb, yb = int(x1 + dx * e), int(y1 + dy * e)
+            cv2.line(image, (xa, ya), (xb, yb), color, thickness)
+            s += (dash + gap)
+
+    def _offset_from_centerline(self, center_pt, angle_vertical_deg, width, height, y_ratio=0.75) -> float:
+        """
+        基于“青色法线中央直线”计算左右偏移：
+        - angle_vertical_deg: 以“竖直方向为0°”的角（与 _draw_line_vertical_angle 相同坐标系）
+        - center_pt: 掩码质心 (cx, cy)
+        - y_ratio: 预瞄行高度（相对图像高度的比例），默认0.75（底部偏下更稳定）
+        返回：归一化偏移（右为正，左为负），与原 offset 含义一致。
+        """
+        if center_pt is None:
+            return 0.0
+        x0, y0 = center_pt
+        rad = np.radians(angle_vertical_deg)
+        # 与 _draw_line_vertical_angle 完全一致的方向向量定义
+        vx = np.sin(rad)
+        vy = -np.cos(rad)
+
+        # 取预瞄行的 y
+        y_target = float(int(height * y_ratio))
+
+        # 若法线几乎水平（极少出现），避免除0
+        if abs(vy) < 1e-6:
+            x_at = float(x0)
+        else:
+            t = (y_target - float(y0)) / vy
+            x_at = float(x0) + t * vx
+
+        x_at = float(np.clip(x_at, 0, width - 1))
+        # 与旧 offset 定义一致：相对画面中心的归一化水平偏移（右正左负）
+        return float((x_at - (width / 2.0)) / max(1.0, width / 2.0))
+
+    def _compute_angle_and_offset(self, mask: np.ndarray) -> tuple:
+        """计算斑马线的角度和偏移（PCA 回退用）"""
+        H, W = mask.shape[:2]
+        ys, xs = np.where(mask > 0)
+        if xs.size < 50:
+            return 0.0, 0.0
+
+        # 使用PCA计算主方向
+        pts = np.stack([xs.astype(np.float32), ys.astype(np.float32)], axis=1)
+        mean = pts.mean(axis=0)
+        cov = np.cov((pts - mean).T)
+        eigvals, eigvecs = np.linalg.eig(cov)
+        v = eigvecs[:, np.argmax(eigvals)]
+
+        # 计算角度
+        angle = np.degrees(np.arctan2(v[1], v[0]))
+        if angle > 90: angle -= 180
+        if angle < -90: angle += 180
+
+        # 计算水平偏移
+        cx = float(mean[0])
+        offset = (cx - (W / 2.0)) / max(1.0, W / 2.0)
+
+        return float(angle), float(offset)
+
+    def _estimate_angle_by_stripes(self, mask: np.ndarray, gray: np.ndarray) -> Optional[Dict[str, Any]]:
+        """
+        基于掩码内条纹（霍夫线）估计角度和可视化（放宽参数 + 鲁棒聚类）:
+        返回 dict: {
+          'angle_deg': float,       # 相对竖直方向偏角（[-45,45]），正=左偏，负=右偏
+          'lines': List[(x1,y1,x2,y2)],  # 选中的条纹线段（图像坐标）
+          'confidence': float,      # [0,1] 加权圆均值合力强度
+          'count': int              # 线段数量
+        }
+        """
+        try:
+            H, W = mask.shape[:2]
+            roi_top = int(0.45 * H)  # 关注下半部分，稳定性更好
+            m_roi = (mask[roi_top:H, :] > 0).astype(np.uint8)
+            g_roi = gray[roi_top:H, :]
+
+            # 放宽边缘阈值
+            g_blur = cv2.GaussianBlur(g_roi, (5, 5), 0)
+            edges = cv2.Canny(g_blur, 50, 150)
+            edges = cv2.bitwise_and(edges, edges, mask=m_roi * 255)
+
+            # 放宽霍夫参数
+            lines = cv2.HoughLinesP(
+                edges,
+                rho=1,
+                theta=np.pi / 180,
+                threshold=max(30, int(0.03 * W)),
+                minLineLength=int(0.15 * W),
+                maxLineGap=20
+            )
+            if lines is None:
+                return None
+
+            angles, weights = [], []
+            all_lines = []
+            for x1, y1, x2, y2 in lines.reshape(-1, 4):
+                dx, dy = x2 - x1, y2 - y1
+                length = float(np.hypot(dx, dy))
+                if length < 8:
+                    continue
+                ang = float(np.degrees(np.arctan2(dy, dx)))  # 相对 x 轴
+                if ang > 90: ang -= 180
+                if ang < -90: ang += 180
+                # 放宽角度接受范围
+                if abs(ang) > 65:
+                    continue
+                # 底部越近权重越大
+                ymid = (y1 + y2) * 0.5 + roi_top
+                w = length * (0.5 + 0.5 * (ymid / max(1.0, H)))
+                angles.append(ang)
+                weights.append(w)
+                all_lines.append((int(x1), int(y1 + roi_top), int(x2), int(y2 + roi_top)))
+
+            if len(angles) < 5:
+                return None
+
+            # 角度鲁棒聚类：加权中位数 + MAD 剔除离群
+            angs = np.array(angles, dtype=np.float32)
+            wts = np.array(weights, dtype=np.float32)
+
+            # 加权中位数
+            sort_idx = np.argsort(angs)
+            angs_sorted = angs[sort_idx]
+            wts_sorted = wts[sort_idx]
+            cum = np.cumsum(wts_sorted)
+            med_idx = np.searchsorted(cum, cum[-1] * 0.5)
+            med = float(angs_sorted[min(max(med_idx, 0), len(angs_sorted) - 1)])
+
+            # MAD（围绕中位数的绝对偏差中位数），阈值更宽
+            dev = np.abs(angs - med)
+            mad = float(np.median(dev) + 1e-6)
+            deg_thr = max(12.0, 2.8 * mad)  # 适度放宽
+            keep = dev <= deg_thr
+
+            if keep.sum() >= 3:
+                angs_keep = angs[keep]
+                wts_keep = wts[keep]
+                lines_keep = [all_lines[i] for i, k in enumerate(keep) if k]
+            else:
+                angs_keep = angs
+                wts_keep = wts
+                lines_keep = all_lines
+
+            # 加权圆均值
+            ang_rad = np.radians(angs_keep)
+            C = float(np.sum(wts_keep * np.cos(ang_rad)))
+            S = float(np.sum(wts_keep * np.sin(ang_rad)))
+            norm = float(np.sum(wts_keep) + 1e-6)
+            if abs(C) < 1e-6 and abs(S) < 1e-6:
+                return None
+            mean = float(np.degrees(np.arctan2(S, C)))
+            confidence = float(np.hypot(C, S) / norm)
+
+            return {
+                "angle_deg": mean,
+                "lines": lines_keep,
+                "confidence": confidence,
+                "count": len(lines_keep),
+            }
+        except Exception:
+            return None
+
+    def _get_crosswalk_guidance_features(self, mask: np.ndarray, image_shape: tuple) -> dict:
+        """计算斑马线引导特征（鲁棒中心线 + 目标点 + 角度/偏移）"""
+        try:
+            height, width = image_shape[:2]
+            min_run_px = max(12, int(width * 0.02))
+            centerline_rows = []
+
+            # 自底向上扫描，按最大连续区段取左右边界的中点，忽略零散噪点
+            for y in range(height - 1, int(height * 0.4), -5):
+                row = mask[y, :]
+                xs = np.where(row > 0)[0]
+                if xs.size <= min_run_px:
+                    continue
+                splits = np.where(np.diff(xs) > 1)[0] + 1
+                segments = np.split(xs, splits) if xs.size else []
+                if not segments:
+                    continue
+                seg = max(segments, key=lambda s: (s[-1] - s[0] + 1))
+                if seg.size == 0 or (seg[-1] - seg[0] + 1) < min_run_px:
+                    continue
+                center_x = 0.5 * (seg[0] + seg[-1])
+                centerline_rows.append([y, center_x])
+
+            if len(centerline_rows) < 10:
+                return None
+
+            data = np.array(centerline_rows, dtype=np.float32)
+            y_coords, x_coords = data[:, 0], data[:, 1]
+
+            # 初始加权（底部更重要）
+            w_base = y_coords / float(height)
+            coeffs = np.polyfit(y_coords, x_coords, 2, w=w_base)
+            poly = np.poly1d(coeffs)
+
+            # 一次鲁棒再加权（抑制弯折/异常点）
+            res = x_coords - poly(y_coords)
+            mad = np.median(np.abs(res - np.median(res))) + 1e-6
+            c = 2.5 * mad
+            w_robust = 1.0 / (1.0 + (res / c) ** 2)
+            w_total = w_base * w_robust
+            coeffs = np.polyfit(y_coords, x_coords, 2, w=w_total)
+            poly = np.poly1d(coeffs)
+
+            # 目标点与绘制点
+            lookahead_y = int(height * 0.6)
+            target_x = float(poly(lookahead_y))
+            plot_y = np.arange(int(height * 0.4), height, 5).astype(int)
+            plot_x = poly(plot_y).astype(int)
+            centerline_points = np.vstack((plot_x, plot_y)).T.tolist()
+
+            # 角度（基于 x(y) 的导数）与水平偏移
+            dpoly = np.polyder(poly)
+            dx_dy = float(dpoly(lookahead_y))
+            angle_deg = float(np.degrees(np.arctan(dx_dy)))
+            offset = float((target_x - (width / 2.0)) / max(1.0, width / 2.0))
+
+            # 截断目标点范围
+            tx = int(np.clip(target_x, 0, width - 1))
+            return {
+                "target_point": (tx, lookahead_y),
+                "centerline_points": centerline_points,
+                "angle_deg": angle_deg,
+                "offset": offset,
+            }
+        except Exception:
+            return None
+
+    # —— 障碍物：光流辅助方法（与 blindpath 一致） ——
+    def _get_edge_mask(self, mask, offset=10):
+        """获取掩码的内边缘区域，用于特征点检测"""
+        if mask is None:
+            return None
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (offset*2, offset*2))
+        inner = cv2.erode(mask, kernel, iterations=1)
+        edge = cv2.subtract(mask, inner)
+        kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+        edge = cv2.dilate(edge, kernel_small, iterations=1)
+        return edge
+
+    def _predict_mask_with_flow(self, prev_mask, prev_gray, curr_gray):
+        """使用 Lucas-Kanade 光流预测掩码位置（与 blindpath 一致）"""
+        try:
+            edge_mask = self._get_edge_mask(prev_mask, offset=10)
+            p0 = cv2.goodFeaturesToTrack(prev_gray, mask=edge_mask, **FEATURE_PARAMS)
+            if p0 is None or len(p0) < 8:
+                return None
+            p1, st, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, p0, None, **LK_PARAMS)
+            if p1 is None or st is None:
+                return None
+            good_new = p1[st == 1]
+            good_old = p0[st == 1]
+            if len(good_new) < 5:
+                return None
+            M, inliers = cv2.estimateAffinePartial2D(good_old, good_new, method=cv2.RANSAC, ransacReprojThreshold=5.0)
+            if M is None:
+                return None
+            H, W = curr_gray.shape[:2]
+            flow_mask = cv2.warpAffine(prev_mask, M, (W, H),
+                                       flags=cv2.INTER_NEAREST,
+                                       borderMode=cv2.BORDER_CONSTANT,
+                                       borderValue=0)
+            return flow_mask
+        except Exception:
+            return None
+
+    # —— 障碍物：检测与可视化（与 blindpath 一致） ——
+    def _detect_obstacles(self, image, path_mask=None):
+        """检测障碍物，调用 ObstacleDetectorClient.detect（与 blindpath 同步）"""
+        logger.info(f"[_detect_obstacles] 开始执行，Frame={self.frame_counter}, obstacle_detector={'已加载' if self.obstacle_detector else '未加载'}")
+        if self.obstacle_detector is None:
+            logger.warning("[_detect_obstacles] 障碍物检测器未加载！")
+            return []
+
+        try:
+            logger.info(f"[_detect_obstacles] 调用ObstacleDetectorClient.detect()... image.shape={image.shape}")
+            detected_obstacles = self.obstacle_detector.detect(image, path_mask=path_mask)
+            logger.info(f"[_detect_obstacles] 返回 {len(detected_obstacles)} 个物体")
+
+            # 补充派生字段
+            H, W = image.shape[:2]
+            for i, obj in enumerate(detected_obstacles):
+                if 'mask' in obj and obj['mask'] is not None:
+                    y_coords, x_coords = np.where(obj['mask'] > 0)
+                    if len(y_coords) > 0 and len(x_coords) > 0:
+                        x1, y1 = int(np.min(x_coords)), int(np.min(y_coords))
+                        x2, y2 = int(np.max(x_coords)), int(np.max(y_coords))
+                        obj['box_coords'] = (x1, y1, x2, y2)
+                        if 'y_position_ratio' not in obj:
+                            obj['y_position_ratio'] = obj.get('center_y', 0) / H
+                        if 'label' not in obj:
+                            obj['label'] = obj.get('name', 'unknown')
+                        if 'center' not in obj:
+                            obj['center'] = (obj.get('center_x', 0), obj.get('center_y', 0))
+                        if 'confidence' not in obj:
+                            obj['confidence'] = 0.5
+            return detected_obstacles
+        except Exception as e:
+            logger.error(f"[_detect_obstacles] 障碍物检测失败: {e}", exc_info=True)
+            return []
+
+    def _stabilize_obstacle_list(self, obstacles, prev_obstacles, prev_gray, curr_gray, image_shape, threshold=0.5):
+        """稳定障碍物检测结果，避免重复叠加（与 blindpath 一致）"""
+        if not obstacles or prev_gray is None or curr_gray is None:
+            return obstacles
+
+        H, W = image_shape
+        stabilized = []
+        used_prev = set()
+        for curr_obs in obstacles:
+            if 'mask' not in curr_obs or curr_obs['mask'] is None:
+                stabilized.append(curr_obs)
+                continue
+            curr_mask = curr_obs['mask']
+            best_match = None
+            best_iou = 0
+            best_idx = -1
+
+            if prev_obstacles:
+                for idx, prev_obs in enumerate(prev_obstacles):
+                    if idx in used_prev or 'mask' not in prev_obs:
+                        continue
+                    flow_mask = self._predict_mask_with_flow(prev_obs['mask'], prev_gray, curr_gray)
+                    if flow_mask is None:
+                        flow_mask = prev_obs['mask']
+                    inter = np.logical_and(curr_mask > 0, flow_mask > 0).sum()
+                    union = np.logical_or(curr_mask > 0, flow_mask > 0).sum()
+                    iou = float(inter) / float(union) if union > 0 else 0.0
+                    if iou > best_iou and iou > threshold:
+                        best_iou = iou
+                        best_match = flow_mask
+                        best_idx = idx
+
+            if best_match is not None and best_idx >= 0:
+                used_prev.add(best_idx)
+                fused_mask = ((0.8 * curr_mask + 0.2 * best_match) > 128).astype(np.uint8) * 255
+                curr_obs['mask'] = fused_mask
+                self._update_obstacle_properties(curr_obs, H, W)
+            stabilized.append(curr_obs)
+        return stabilized
+
+    def _update_obstacle_properties(self, obs, H, W):
+        """更新障碍物的派生属性"""
+        if 'mask' not in obs or obs['mask'] is None:
+            return
+        mask = obs['mask']
+        y_coords, x_coords = np.where(mask > 0)
+        if len(y_coords) > 0:
+            obs['area'] = int(len(y_coords))
+            obs['center_x'] = float(np.mean(x_coords))
+            obs['center_y'] = float(np.mean(y_coords))
+            obs['y_position_ratio'] = obs['center_y'] / H
+            obs['area_ratio'] = obs['area'] / float(H * W)
+            obs['bottom_y_ratio'] = np.max(y_coords) / float(H)
+            x1, y1 = int(np.min(x_coords)), int(np.min(y_coords))
+            x2, y2 = int(np.max(x_coords)), int(np.max(y_coords))
+            obs['box_coords'] = (x1, y1, x2, y2)
+
+    # —— 可视化通用方法（与 blindpath 一致） ——
+    def _parse_color(self, color_str):
+        """解析颜色字符串，返回BGR格式"""
+        try:
+            if isinstance(color_str, tuple) and len(color_str) == 3:
+                return color_str
+            if color_str.startswith('rgba('):
+                values = color_str[5:-1].split(',')
+                r, g, b = int(values[0]), int(values[1]), int(values[2])
+                return (b, g, r)  # OpenCV: BGR
+            elif color_str == 'yellow':
+                return (0, 255, 255)
+            elif color_str == 'red':
+                return (0, 0, 255)
+            else:
+                return (0, 0, 255)
+        except:
+            return (0, 0, 255)
+
+    def _add_obstacle_visualization(self, obstacle, visualizations, pulse_effect=False):
+        """添加障碍物可视化（简化版：仅边框，近红远黄）"""
+        try:
+            bottom_y_ratio = obstacle.get('bottom_y_ratio', 0)
+            area_ratio = obstacle.get('area_ratio', 0)
+            is_near = bottom_y_ratio > 0.7 or area_ratio > 0.1
+
+            if 'mask' in obstacle and obstacle['mask'] is not None:
+                mask = obstacle['mask']
+                contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                if contours:
+                    max_contour = max(contours, key=cv2.contourArea)
+                    points = max_contour.squeeze(1)[::5].tolist()
+                    
+                    # 根据距离选择边框颜色：近距离红色，远距离黄色
+                    if is_near:
+                        outline_color = "rgba(255, 0, 0, 1.0)"  # 红色
+                        thickness = 3
+                    else:
+                        outline_color = "rgba(255, 255, 0, 0.8)"  # 黄色
+                        thickness = 2
+
+                    # 只添加边框，不添加填充和文字
+                    visualizations.append({
+                        "type": "outline",
+                        "points": points,
+                        "color": outline_color,
+                        "thickness": thickness
+                    })
+        except Exception as e:
+            logger.error(f"[_add_obstacle_visualization] 添加障碍物可视化失败: {e}")
+
+    def _draw_command_button(self, image, text):
+        """绘制底部中央的指令按钮（类似yolomedia风格）"""
+        try:
+            H, W = image.shape[:2]
+            full_text = f"当前指令：{text if text else '—'}"
+            
+            # 按钮参数
+            font_px = 14
+            pad_x, pad_y = 14, 8
+            bottom_margin = 28
+            
+            # 计算文字尺寸
+            if PIL_AVAILABLE:
+                try:
+                    from PIL import Image as PILImage, ImageDraw, ImageFont
+                    # 尝试加载中文字体
+                    font = None
+                    for font_path in [
+                        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+                    ]:
+                        if os.path.exists(font_path):
+                            try:
+                                font = ImageFont.truetype(font_path, font_px)
+                                break
+                            except:
+                                continue
+                    if font:
+                        bbox = ImageDraw.Draw(PILImage.new('RGB', (1, 1))).textbbox((0, 0), full_text, font=font)
+                        tw = max(1, bbox[2] - bbox[0])
+                        th = max(1, bbox[3] - bbox[1])
+                    else:
+                        scale = font_px / 24.0
+                        (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+                except:
+                    scale = font_px / 24.0
+                    (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+            else:
+                scale = font_px / 24.0
+                (tw, th), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, scale, 1)
+            
+            # 计算按钮位置（底部居中）
+            bw = tw + pad_x * 2
+            bh = th + pad_y * 2
+            radius = max(10, bh // 2)
+            
+            cx = W // 2
+            left = max(8, cx - bw // 2)
+            top = H - bottom_margin - bh
+            right = min(W - 8, left + bw)
+            bottom = top + bh
+            
+            # 绘制半透明圆角背景
+            overlay = image.copy()
+            bg_color = (26, 32, 41)  # 深色背景
+            border_color = (60, 76, 102)  # 边框
+            
+            # 圆角矩形（中间+两个圆）
+            cv2.rectangle(overlay, (left + radius, top), (right - radius, bottom), bg_color, -1)
+            cv2.circle(overlay, (left + radius, (top + bottom) // 2), radius, bg_color, -1)
+            cv2.circle(overlay, (right - radius, (top + bottom) // 2), radius, bg_color, -1)
+            
+            # 混合半透明
+            cv2.addWeighted(overlay, 0.75, image, 0.25, 0, image)
+            
+            # 绘制边框
+            cv2.rectangle(image, (left + radius, top), (right - radius, bottom), border_color, 1)
+            cv2.circle(image, (left + radius, (top + bottom) // 2), radius, border_color, 1)
+            cv2.circle(image, (right - radius, (top + bottom) // 2), radius, border_color, 1)
+            
+            # 绘制文字
+            text_x = left + pad_x
+            text_y = top + pad_y + th
+            
+            if PIL_AVAILABLE and font:
+                # 使用PIL绘制中文
+                pil_img = PILImage.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+                draw = ImageDraw.Draw(pil_img)
+                draw.text((text_x, top + pad_y), full_text, font=font, fill=(255, 255, 255))
+                image = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+            else:
+                # 使用OpenCV绘制
+                cv2.putText(image, full_text, (text_x, text_y), 
+                           cv2.FONT_HERSHEY_SIMPLEX, scale, (255, 255, 255), 1)
+            
+            return image
+        except Exception as e:
+            logger.error(f"绘制指令按钮失败: {e}")
+            return image
+    
+    def _draw_data_panel_no_bg(self, image, data, position=(15, 15)):
+        """绘制数据面板（无黑底，描边文字），与 blindpath 一致"""
+        if not PIL_AVAILABLE:
+            return image
+        try:
+            pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            draw = ImageDraw.Draw(pil_img, "RGBA")
+            env_scale = float(os.getenv("AIGLASS_PANEL_SCALE", "0.7"))
+            base_font_size = max(10, int(round(14 * env_scale)))
+            font = None
+            font_paths = [
+                "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+                "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+            ]
+            for font_path in font_paths:
+                try:
+                    if os.path.exists(font_path):
+                        font = ImageFont.truetype(font_path, base_font_size)
+                        break
+                except:
+                    continue
+            if font is None:
+                font = ImageFont.load_default()
+
+            y_offset = position[1]
+            for key, value in data.items():
+                text = f"{key}: {value}"
+                for dx in [-1, 0, 1]:
+                    for dy in [-1, 0, 1]:
+                        if dx != 0 or dy != 0:
+                            draw.text((position[0] + dx, y_offset + dy), text,
+                                      font=font, fill=(0, 0, 0, 255))
+                draw.text((position[0], y_offset), text, font=font, fill=(255, 255, 255, 255))
+                y_offset += base_font_size + 5
+            return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+        except Exception as e:
+            logger.warning(f"绘制数据面板失败: {e}")
+            return image
+
+    def _draw_visualizations(self, image, viz_elements):
+        """增强的可视化绘制方法（与 blindpath 一致）"""
+        if not viz_elements:
+            return image
+        current_time = time.time()
+        panel_elements = [v for v in viz_elements if v.get("type") == "data_panel"]
+        standard_elements = [v for v in viz_elements if v.get("type") != "data_panel"]
+
+        # 第一遍：半透明填充
+        for element in standard_elements:
+            elem_type = element.get("type")
+            if elem_type in ['blind_path_mask', 'obstacle_mask', 'crosswalk_mask']:
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 255, 0.5)"))
+                    if element.get("effect") == "pulse":
+                        pulse_speed = element.get("pulse_speed", 1.0)
+                        alpha = 0.3 + 0.3 * np.sin(current_time * pulse_speed * 2 * np.pi)
+                    else:
+                        alpha = 0.4
+                    x, y, w, h = cv2.boundingRect(points)
+                    x = max(0, x); y = max(0, y)
+                    w = min(w, image.shape[1] - x)
+                    h = min(h, image.shape[0] - y)
+                    if w > 0 and h > 0:
+                        binary_mask = np.zeros((h, w), dtype=np.uint8)
+                        local_points = points - np.array([x, y])
+                        cv2.fillPoly(binary_mask, [local_points], 255)
+                        local_region = image[y:y+h, x:x+w].copy()
+                        color_overlay = np.zeros((h, w, 3), dtype=np.uint8)
+                        color_overlay[:] = color
+                        for c in range(3):
+                            local_region[:, :, c] = np.where(
+                                binary_mask > 0,
+                                (1 - alpha) * local_region[:, :, c] + alpha * color_overlay[:, :, c],
+                                local_region[:, :, c]
+                            )
+                        image[y:y+h, x:x+w] = local_region
+
+        # 第二遍：轮廓和元素
+        for element in standard_elements:
+            elem_type = element.get("type")
+            if elem_type == 'outline':
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 255, 1.0)"))
+                    thickness = element.get("thickness", 3)
+                    cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
+            elif elem_type == 'polyline':
+                points = np.array(element.get("points", []), dtype=np.int32)
+                if points.size > 0:
+                    color = self._parse_color(element.get("color", "rgba(255, 255, 0, 1.0)"))
+                    thickness = element.get("width", 2)
+                    cv2.polylines(image, [points], isClosed=False, color=color, thickness=thickness)
+            elif elem_type == 'circle':
+                center = tuple(element.get("center", (0, 0)))
+                radius = element.get("radius", 10)
+                color = self._parse_color(element.get("color", "rgba(255, 0, 0, 1.0)"))
+                thickness = -1 if element.get("filled", True) else 2
+                cv2.circle(image, center, radius, color, thickness)
+            elif elem_type == 'arrow':
+                start = tuple(element.get("start", (0, 0)))
+                end = tuple(element.get("end", (100, 100)))
+                color = self._parse_color(element.get("color", "rgba(0, 255, 255, 1.0)"))
+                thickness = element.get("thickness", 2)
+                tip_length = element.get("tip_length", 0.3)
+                cv2.arrowedLine(image, start, end, color, thickness, tipLength=tip_length)
+            elif elem_type == 'text_with_bg':
+                text = element.get("text", "")
+                pos = element.get("position", [10, 30])
+                font_scale = element.get("font_scale", 0.6)
+                color = self._parse_color(element.get("color", "rgba(255, 255, 255, 1.0)"))
+                for dx in [-1, 0, 1]:
+                    for dy in [-1, 0, 1]:
+                        if dx != 0 or dy != 0:
+                            cv2.putText(image, text, (pos[0] + dx, pos[1] + dy),
+                                        cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), 3)
+                cv2.putText(image, text, tuple(pos), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2)
+            elif elem_type == 'warning_icon':
+                pos = element.get("position", (100, 100))
+                level = element.get("level", "info")
+                text = element.get("text", "")
+                flash = element.get("flash", False)
+                if level == "danger":
+                    icon_color = (0, 0, 255)
+                    text_color = (255, 255, 255)
+                elif level == "warning":
+                    icon_color = (0, 165, 255)
+                    text_color = (255, 255, 255)
+                else:
+                    icon_color = (0, 255, 255)
+                    text_color = (0, 0, 0)
+                if flash:
+                    alpha = 0.5 + 0.5 * np.sin(current_time * 4 * np.pi)
+                    icon_color = tuple(int(c * alpha) for c in icon_color)
+                triangle = np.array([
+                    [pos[0], pos[1] - 20],
+                    [pos[0] - 15, pos[1]],
+                    [pos[0] + 15, pos[1]]
+                ], np.int32)
+                cv2.fillPoly(image, [triangle], icon_color)
+                cv2.polylines(image, [triangle], True, (255, 255, 255), 2)
+                cv2.putText(image, "!", (pos[0] - 5, pos[1] - 5),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                if text:
+                    font_scale = 0.5
+                    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, 1)
+                    text_pos = (pos[0] - tw // 2, pos[1] + 20)
+                    for dx in [-1, 0, 1]:
+                        for dy in [-1, 0, 1]:
+                            if dx != 0 or dy != 0:
+                                cv2.putText(image, text, (text_pos[0] + dx, text_pos[1] + dy),
+                                            cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), 2)
+                    cv2.putText(image, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, font_scale, text_color, 1)
+            elif elem_type == 'text':
+                text = element.get("text", "")
+                pos = tuple(element.get("pos", (10, 30)))
+                cv2.putText(image, text, pos, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
+        # 数据面板
+        if PIL_AVAILABLE:
+            for panel in panel_elements:
+                image = self._draw_data_panel_no_bg(image, panel["data"], panel["position"])
+        else:
+            for panel in panel_elements:
+                y_offset = panel["position"][1]
+                for key, value in panel["data"].items():
+                    text = f"{key}: {value}"
+                    for dx in [-1, 0, 1]:
+                        for dy in [-1, 0, 1]:
+                            if dx != 0 or dy != 0:
+                                cv2.putText(image, text, (panel["position"][0] + dx, y_offset + dy),
+                                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 3)
+                    cv2.putText(image, text, (panel["position"][0], y_offset),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
+                    y_offset += 25
+        return image
+
+    def _speech_for_obstacle(self, name: str) -> str:
+        """生成障碍物语音提示"""
+        k = (name or '').strip().lower()
+        if k == 'person': return "前方有人，注意避让。"
+        if k == 'car': return "前方有车，注意避让。"
+        if k == 'bicycle': return "前方有自行车，停一下。"
+        if k == 'motorcycle': return "前方有摩托车，停一下。"
+        if k == 'bus': return "前方有公交车，停一下。"
+        if k == 'truck': return "前方有卡车，停一下。"
+        if k == 'scooter': return "前方有电瓶车，停一下。"
+        if k == 'stroller': return "前方有婴儿车，停一下。"
+        if k == 'dog': return "前方有狗，停一下。"
+        if k == 'animal': return "前方有动物，停一下。"
+        return "前方有障碍物，注意避让。"
+
+    def process_frame(self, bgr_image: np.ndarray) -> CrossStreetResult:
+        """处理单帧图像（每帧分割；若失败，用光流追踪上一帧掩码保持可视化与导航）"""
+        self.frame_counter += 1
+        current_time = time.time()
+
+        try:
+            annotated = bgr_image.copy()
+            h, w = bgr_image.shape[:2]
+            frame_visualizations = []
+
+            # 当前灰度图供 LK 与避障稳定使用
+            gray = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY)
+
+            # ========== 1) 间隔执行分割（每4帧检测一次） ==========
+            crosswalk_mask = None
+            blindpath_mask = None
+            det_area = 0
+
+            # 【新增】检测间隔逻辑
+            if self.seg_model and self.frame_counter % self.CROSSWALK_DETECTION_INTERVAL == 0:
+                # 执行新的检测
+                # 使用较低的基础阈值获取所有候选
+                base_thr = min(CROSSWALK_MIN_CONF, BLIND_MIN_CONF)
+                detections = self.seg_model.detect(bgr_image, confidence_threshold=base_thr) or []
+                
+                # 按类别ID和名称分拣
+                raw_cw, raw_bp = [], []
+                for det in detections:
+                    if not hasattr(det, 'mask') or det.mask is None:
+                        continue
+                    
+                    cid = _cls_of(det)
+                    name = str(getattr(det, "name", "")).lower()
+                    
+                    # 斑马线：ID匹配或名称匹配
+                    if (cid == CW_ID) or _in_set(name, _CW):
+                        raw_cw.append(det)
+                    # 盲道：ID匹配或名称匹配
+                    elif (cid == BP_ID) or _in_set(name, _BP):
+                        raw_bp.append(det)
+                
+                # 二次阈值过滤
+                cw_list = [d for d in raw_cw if _score_of(d) >= CROSSWALK_MIN_CONF]
+                bp_list = [d for d in raw_bp if _score_of(d) >= BLIND_MIN_CONF]
+                
+                # 合并斑马线mask
+                if cw_list:
+                    cw_masks = []
+                    for det in cw_list:
+                        mask = det.mask
+                        if mask.shape != (h, w):
+                            mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+                        mask_bin = (mask > 0.5).astype(np.uint8)
+                        cw_masks.append(mask_bin)
+                    if cw_masks:
+                        crosswalk_mask = np.maximum.reduce(cw_masks)
+                        det_area = int(crosswalk_mask.sum())
+                        if det_area < CROSSWALK_MIN_AREA:
+                            crosswalk_mask = None
+                            det_area = 0
+                
+                # 合并盲道mask
+                if bp_list:
+                    bp_masks = []
+                    for det in bp_list:
+                        mask = det.mask
+                        if mask.shape != (h, w):
+                            mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
+                        mask_bin = (mask > 0.5).astype(np.uint8)
+                        bp_masks.append(mask_bin)
+                    if bp_masks:
+                        blindpath_mask = np.maximum.reduce(bp_masks)
+                
+                # 去交叠：从斑马线mask中移除盲道区域
+                if crosswalk_mask is not None and blindpath_mask is not None:
+                    crosswalk_mask = crosswalk_mask.copy()
+                    crosswalk_mask[blindpath_mask > 0] = 0
+                
+                # 盲道真伪判定
+                if blindpath_mask is not None:
+                    if not _looks_like_blind_path(blindpath_mask, crosswalk_mask, h, w):
+                        blindpath_mask = None
+                
+                # 【新增】保存检测结果到缓存
+                self.last_detected_crosswalk_mask = crosswalk_mask
+                self.last_detected_blindpath_mask = blindpath_mask
+                self.last_crosswalk_detection_frame = self.frame_counter
+            
+            else:
+                # 【新增】使用缓存的检测结果
+                crosswalk_mask = self.last_detected_crosswalk_mask
+                blindpath_mask = self.last_detected_blindpath_mask
+
+            # ========== 2) 分割失败 → 用上一帧特征点光流追踪重建 ==========
+            used_tracking = False
+            if crosswalk_mask is None:
+                if self.old_gray is not None and self.p0 is not None and len(self.p0) >= TRACK_MIN_POINTS:
+                    try:
+                        p1, st, err = cv2.calcOpticalFlowPyrLK(self.old_gray, gray, self.p0, None, **LK_PARAMS)
+                        if p1 is not None and st is not None:
+                            good_new = p1[st == 1]
+                            if good_new is not None and len(good_new) >= TRACK_MIN_POINTS:
+                                tracked_mask = self._hull_mask_from_points(good_new, (h, w))
+                                if tracked_mask is not None and int(tracked_mask.sum()) >= (0.3 * (self.prev_mask.sum() if self.prev_mask is not None else 1)):
+                                    crosswalk_mask = tracked_mask
+                                    used_tracking = True
+                                    self.p0 = good_new.reshape(-1, 1, 2)
+                                else:
+                                    self.p0 = None
+                                    self.old_gray = None
+                    except Exception as e:
+                        logger.warning(f"[CROSS_STREET] LK 光流失败: {e}")
+                        self.p0 = None
+                        self.old_gray = None
+
+            # ========== 3) EMA 平滑（减少抖动） + 形态学净化 ==========
+            if crosswalk_mask is not None:
+                m = crosswalk_mask.astype(np.float32)
+                if self.prev_mask_float is not None and self.prev_mask_float.shape == m.shape:
+                    self.prev_mask_float = MASK_EMA_ALPHA * m + (1.0 - MASK_EMA_ALPHA) * self.prev_mask_float
+                else:
+                    self.prev_mask_float = m
+                crosswalk_mask = (self.prev_mask_float > 0.5).astype(np.uint8)
+                crosswalk_mask = self._postprocess_mask(crosswalk_mask)
+                self.prev_mask = crosswalk_mask
+                self.prev_mask_ts = current_time
+
+            # ========== 4) 若分割成功（或追踪成功）→ 播种/更新特征点 ==========
+            if crosswalk_mask is not None:
+                need_seed = (self.p0 is None or len(self.p0) < TRACK_MIN_POINTS or
+                             (self.frame_counter - self.last_seed_frame) >= TRACK_RESEED_EVERY)
+                if need_seed:
+                    pts = self._seed_points_from_mask(gray, crosswalk_mask)
+                    if pts is not None and len(pts) >= TRACK_MIN_POINTS:
+                        self.p0 = pts
+                        self.old_gray = gray.copy()
+                        self.last_seed_frame = self.frame_counter
+                else:
+                    self.old_gray = gray.copy()
+            else:
+                self.crosswalk_detected = False
+                self.p0 = None
+                self.old_gray = None
+
+            # ========== 4.5) 障碍物检测与可视化（与 blindpath 一致） ==========
+            # 使用 crosswalk_mask 作为 path_mask，若无则全局检测
+            detected_obstacles = []
+            if self.obstacle_detector is not None:
+                if self.frame_counter % self.OBSTACLE_DETECTION_INTERVAL == 0:
+                    detected_obstacles = self._detect_obstacles(bgr_image, path_mask=crosswalk_mask)
+                    # 稳定化
+                    if self.prev_gray is not None:
+                        detected_obstacles = self._stabilize_obstacle_list(
+                            detected_obstacles,
+                            self.last_detected_obstacles,
+                            self.prev_gray,
+                            gray,
+                            bgr_image.shape[:2]
+                        )
+                    self.last_detected_obstacles = detected_obstacles
+                    self.last_obstacle_detection_frame = self.frame_counter
+                else:
+                    if self.frame_counter - self.last_obstacle_detection_frame < self.OBSTACLE_CACHE_DURATION_FRAMES:
+                        detected_obstacles = self.last_detected_obstacles
+                    else:
+                        detected_obstacles = []
+                # 可视化所有障碍物
+                for obs in detected_obstacles:
+                    self._add_obstacle_visualization(obs, frame_visualizations)
+
+            # ========== 5) 状态机 + 可视化与导航指令 ==========
+            guidance_text = ""
+            
+            # 先绘制盲道（绿色mask，无黑底）
+            if blindpath_mask is not None:
+                # 只在掩码区域混合绿色，避免黑底
+                mask_area = (blindpath_mask > 0).astype(bool)
+                green_color = np.array([0, 255, 0], dtype=np.float32)  # BGR
+                # 在掩码区域内混合颜色
+                for c in range(3):
+                    annotated[:, :, c] = np.where(
+                        mask_area,
+                        (annotated[:, :, c] * 0.7 + green_color[c] * 0.3).astype(np.uint8),
+                        annotated[:, :, c]
+                    )
+                # 绘制盲道边框
+                bp_ct = self._largest_contour(blindpath_mask)
+                if bp_ct is not None:
+                    cv2.drawContours(annotated, [bp_ct], -1, (0, 255, 0), 2)
+            
+            # 绘制斑马线（橙色mask，无描边，与盲道模式颜色一致）
+            if crosswalk_mask is not None:
+                self.crosswalk_detected = True
+                # 使用与盲道模式相同的橙色：BGR(0, 165, 255)，只在掩码区域混合
+                mask_area = (crosswalk_mask > 0).astype(bool)
+                orange_color = np.array([0, 165, 255], dtype=np.float32)  # BGR
+                # 在掩码区域内混合颜色
+                for c in range(3):
+                    annotated[:, :, c] = np.where(
+                        mask_area,
+                        (annotated[:, :, c] * 0.7 + orange_color[c] * 0.3).astype(np.uint8),
+                        annotated[:, :, c]
+                    )
+            
+            # ===== 状态机逻辑 =====
+            if self.state == STATE_SEEKING:
+                # 阶段1：寻找并对准远处的斑马线
+                if crosswalk_mask is not None:
+                    is_near = self._is_crosswalk_near(crosswalk_mask, h, w)
+                    
+                    if is_near:
+                        # 斑马线已到跟前，切换到红绿灯判定
+                        self.state = STATE_WAIT_LIGHT
+                        guidance_text = "斑马线已在跟前，进入红绿灯判定模式"
+                        self.last_seeking_guidance = ""  # 重置节流状态
+                    else:
+                        # 远距离对准引导（使用更宽松的阈值）
+                        angle, offset = self._compute_远_distance_alignment(crosswalk_mask, h, w)
+                        
+                        # 优先角度，其次方位（使用SEEKING专用的宽松阈值）
+                        if abs(angle) >= SEEKING_ANGLE_THRESH_DEG:
+                            direction = "左转一点" if angle > 0 else "右转一点"
+                        elif abs(offset) >= SEEKING_OFFSET_THRESH:
+                            direction = "向右平移" if offset > 0 else "向左平移"
+                        else:
+                            direction = "保持直行"
+                        
+                        # 【移除左上角文字，改为右上角数据面板】
+                        # 添加右上角数据面板
+                        frame_visualizations.append({
+                            "type": "data_panel",
+                            "data": {
+                                "状态": "对准斑马线",
+                                "角度": f"{angle:.1f}°",
+                                "偏移": f"{offset:.2f}"
+                            },
+                            "position": (w - 180, 20)
+                        })
+                        
+                        # 节流：只有当引导文本改变或超过时间间隔时才播报
+                        if current_time - self.last_guide_time > self.guide_interval:
+                            if direction != self.last_seeking_guidance:
+                                guidance_text = direction
+                                self.last_seeking_guidance = direction
+                            elif current_time - self.last_guide_time > self.guide_interval * 2:
+                                # 超过2倍间隔，重复播报
+                                guidance_text = direction
+                else:
+                    # 【移除左上角文字，改为右上角数据面板】
+                    frame_visualizations.append({
+                        "type": "data_panel",
+                        "data": {
+                            "状态": "寻找斑马线"
+                        },
+                        "position": (w - 180, 20)
+                    })
+                    self.last_seeking_guidance = ""  # 没有斑马线时重置
+            
+            elif self.state == STATE_WAIT_LIGHT:
+                # 阶段2：红绿灯判定
+                # 【移除左上角文字，稍后添加右上角数据面板】
+                
+                if TRAFFIC_LIGHT_AVAILABLE and trafficlight_detection:
+                    try:
+                        # 传入annotated（已包含斑马线和盲道），红绿灯检测在此基础上添加检测框
+                        result = trafficlight_detection.process_single_frame(annotated)
+                        
+                        # 可视化红绿灯检测结果（绘制检测框）
+                        if result and 'vis_image' in result:
+                            vis_img = result['vis_image']
+                            if vis_img is not None:
+                                # 将红绿灯检测的可视化结果（带斑马线、盲道和检测框）更新到annotated
+                                annotated = vis_img
+                        
+                        if result and 'stable_light' in result:
+                            stable_light = result['stable_light']
+                            
+                            if stable_light == 'go':
+                                self.green_light_counter += 1
+                                # 【移除左上角文字，改为右上角数据面板】
+                                frame_visualizations.append({
+                                    "type": "data_panel",
+                                    "data": {
+                                        "状态": "红绿灯判定",
+                                        "检测": f"绿灯 {self.green_light_counter}/{GREEN_LIGHT_STABLE_FRAMES}"
+                                    },
+                                    "position": (w - 180, 20)
+                                })
+                                
+                                if self.green_light_counter >= GREEN_LIGHT_STABLE_FRAMES:
+                                    self.state = STATE_CROSSING
+                                    guidance_text = "绿灯稳定，开始通行。"
+                                    self.green_light_counter = 0
+                                    self.crossing_end_announced = False      # 重置过马路结束标志
+                                    self.last_crosswalk_seen_time = current_time  # 初始化斑马线检测时间
+                                    self.last_blindpath_announce_time = 0    # 重置盲道播报时间
+                                else:
+                                    # 检测到绿灯但还不稳定，节流播报
+                                    if current_time - self.last_waiting_light_time > 3.0:
+                                        guidance_text = "正在等待绿灯…"
+                                        self.last_waiting_light_time = current_time
+                            else:
+                                self.green_light_counter = 0
+                                if stable_light in ['stop', 'countdown_stop']:
+                                    # 【移除左上角文字，改为右上角数据面板】
+                                    frame_visualizations.append({
+                                        "type": "data_panel",
+                                        "data": {
+                                            "状态": "红绿灯判定",
+                                            "检测": "红灯，请等待"
+                                        },
+                                        "position": (w - 180, 20)
+                                    })
+                                    # 红灯状态播报（节流）
+                                    if current_time - self.last_waiting_light_time > 3.0:
+                                        guidance_text = "正在等待绿灯…"
+                                        self.last_waiting_light_time = current_time
+                                else:
+                                    # 其他状态（黄灯或未检测到），节流播报
+                                    if current_time - self.last_waiting_light_time > 3.0:
+                                        guidance_text = "正在等待绿灯…"
+                                        self.last_waiting_light_time = current_time
+                        else:
+                            # 没有检测到稳定的红绿灯，节流播报
+                            if current_time - self.last_waiting_light_time > 3.0:
+                                guidance_text = "正在等待绿灯…"
+                                self.last_waiting_light_time = current_time
+                    except Exception as e:
+                        logger.warning(f"[CROSS_STREET] 红绿灯检测失败: {e}")
+                        if current_time - self.last_waiting_light_time > 3.0:
+                            guidance_text = "正在等待绿灯…"
+                            self.last_waiting_light_time = current_time
+                else:
+                    # 无红绿灯模块，直接切换
+                    # 【移除左上角文字，改为右上角数据面板】
+                    frame_visualizations.append({
+                        "type": "data_panel",
+                        "data": {
+                            "状态": "红绿灯判定",
+                            "检测": "模块未加载"
+                        },
+                        "position": (w - 180, 20)
+                    })
+                    if current_time - self.last_guide_time > 2.0:
+                        self.state = STATE_CROSSING
+                        guidance_text = "开始通行"
+                        self.crossing_end_announced = False          # 重置过马路结束标志
+                        self.last_crosswalk_seen_time = current_time # 初始化斑马线检测时间
+                        self.last_blindpath_announce_time = 0        # 重置盲道播报时间
+            
+            elif self.state == STATE_CROSSING:
+                # 阶段3：过马路引导（原有逻辑）
+                
+                # 【新增】实时红绿灯检测（在CROSSING状态中）
+                traffic_light_warning = None  # 用于存储红绿灯警告信息
+                if TRAFFIC_LIGHT_AVAILABLE and trafficlight_detection:
+                    try:
+                        # 传入annotated（已包含斑马线和盲道），红绿灯检测在此基础上添加检测框
+                        result = trafficlight_detection.process_single_frame(annotated)
+                        
+                        # 将红绿灯检测的可视化结果（带斑马线、盲道和检测框）更新到annotated
+                        if result and 'vis_image' in result:
+                            vis_img = result['vis_image']
+                            if vis_img is not None:
+                                # 将红绿灯检测框叠加到annotated上（保留斑马线和盲道）
+                                annotated = vis_img
+                        
+                        # 检查稳定状态，如果是绿灯倒计时，播报警告
+                        if result and 'stable_light' in result:
+                            stable_light = result['stable_light']
+                            if stable_light == 'countdown_go':
+                                # 绿灯倒计时，播报警告（节流）
+                                if current_time - self.last_guide_time > 2.0:
+                                    traffic_light_warning = "绿灯快没了"
+                    except Exception as e:
+                        logger.warning(f"[CROSS_STREET] CROSSING状态红绿灯检测失败: {e}")
+                
+                if crosswalk_mask is not None:
+                    # 更新斑马线检测时间
+                    self.last_crosswalk_seen_time = current_time
+                    
+                    # 检测到斑马线：如果之前误播报了结束，现在重置标志回到正常流程
+                    area = int(crosswalk_mask.sum())
+                    area_ratio = float(area) / float(h * w)
+                    # 如果斑马线面积还比较大（>0.1），说明还在过马路中，重置结束标志
+                    if area_ratio > 0.1 and self.crossing_end_announced:
+                        self.crossing_end_announced = False
+                        self.blindpath_announced = False
+                        logger.info("[CROSS_STREET] 检测到斑马线，重置结束标志，回到正常过马路流程")
+                    
+                    # 【移除左上角文字，改为右上角数据面板】
+                    panel_data = {
+                        "状态": "正在过马路",
+                        "面积": f"{area_ratio:.2f}"
+                    }
+                    if self.crossing_end_announced:
+                        panel_data["提示"] = "已播报结束"
+                    frame_visualizations.append({
+                        "type": "data_panel",
+                        "data": panel_data,
+                        "position": (w - 180, 20)
+                    })
+                    
+                    # 使用"斑马线横纹法线的中央直线"来推导偏移（offset 初值仍给 0，后面根据青色法线更新）
+                    angle_deg, offset = 0.0, 0.0
+
+                    # 角度：优先使用条纹霍夫线估计；失败回退 PCA
+                    angle_source = "条纹"
+                    stripes = self._estimate_angle_by_stripes(crosswalk_mask, gray)
+                    if stripes and ("angle_deg" in stripes):
+                        angle_deg = -float(stripes["angle_deg"])
+                        for (x1, y1, x2, y2) in stripes.get("lines", []):
+                            cv2.line(annotated, (x1, y1), (x2, y2), VIS_COLORS["stripes"], 2)
+                        # 可视化方向箭头（底部中心，表示偏角相对竖直）
+                        cx, cy = int(w * 0.5), int(h * 0.85)
+                        length = int(60)
+                        rad = np.radians(angle_deg)
+                        dx = int(length * np.sin(rad))
+                        dy = int(length * np.cos(rad))
+                        cv2.arrowedLine(annotated, (cx, cy), (cx + dx, cy - dy), VIS_COLORS["heading"], 3, tipLength=0.25)
+                    else:
+                        angle_source = "PCA"
+                        angle_deg, _ = self._compute_angle_and_offset(crosswalk_mask)
+
+
+                    # === 基于掩码质心 + 条纹法线，绘制"青色法线中央直线" & "白色虚线（与红箭头同向）" ===
+                    # === 过中心的两条参考线：青色=法线、白色虚线=与红箭头同向 ===
+                    center_pt = self._mask_center(crosswalk_mask)
+                    if center_pt is not None and stripes and ("angle_deg" in stripes):
+                        # 1) 青色法线：使用"条纹均值角"作为【法线相对竖直】角，保证与橙色条纹垂直
+                        angle_blue = float(stripes["angle_deg"])  # ← 关键：不要再取负，不要再加减 90°
+                        self._draw_line_vertical_angle(annotated, center_pt, angle_blue,
+                                                       length_ratio=0.7,
+                                                       color=VIS_COLORS["centerline"], thickness=3)
+
+                        # 2) 白色虚线：过质心的"画面竖直(0°)"——代表用户假定行走朝向
+                        angle_white = 0.0
+                        self._draw_dashed_line_vertical_angle(annotated, center_pt, angle_white,
+                                                              length_ratio=0.7,
+                                                              dash=12, gap=8, color=(255, 255, 255), thickness=2)
+
+                        # 3) 角差显示（可选）：青色 vs 白虚线
+                        diff = angle_blue - 0.0  # = angle_blue
+                        diff = (diff + 180.0) % 360.0 - 180.0  # wrap 到 [-180,180]
+                        cv2.putText(annotated, f"{abs(diff):.1f}°",
+                                    (min(center_pt[0] + 12, w - 110), max(center_pt[1] - 12, 30)),
+                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
+                        # === 用青色法线中央直线 计算"左右偏移" ===
+                        try:
+                            # 注意：_offset_from_centerline 的角度坐标系与 _draw_line_vertical_angle 一致（竖直为0°）
+                            offset_new = self._offset_from_centerline(center_pt, angle_blue, w, h, y_ratio=0.75)
+                            offset = float(offset_new)
+                        except Exception:
+                            # 兜底：若计算异常，保持原 offset（默认为0）
+                            pass
+
+                    # 导航方向（基础）
+                    if abs(angle_deg) >= ANGLE_THRESH_DEG:
+                        direction = "左转一点" if angle_deg > 0 else "右转一点"
+                    elif abs(offset) >= OFFSET_THRESH:
+                        direction = "向右平移" if offset > 0 else "向左平移"
+                    else:
+                        direction = "保持直行"
+
+                    # 障碍物引导优先级（近距离优先覆盖方向提示）
+                    obstacle_override = None
+                    if detected_obstacles:
+                        NEAR_Y = 0.7
+                        NEAR_AREA = 0.1
+                        near_list = [o for o in detected_obstacles if (o.get('bottom_y_ratio', 0) > NEAR_Y or o.get('area_ratio', 0) > NEAR_AREA)]
+                        if near_list:
+                            name = (near_list[0].get('name') or '障碍物')
+                            obstacle_override = self._speech_for_obstacle(name)
+
+                    # 【移除左上角调试信息，改为右上角数据面板】
+                    # 更新右上角数据面板（合并到已有的面板数据中）
+                    src_text = "分割" if not used_tracking else "追踪"
+                    # 数据面板在前面已经添加了，这里只记录调试数据
+                    # 稍后会统一添加完整的数据面板
+
+                    # 语音输出（节流）
+                    if current_time - self.last_guide_time > self.guide_interval:
+                        # 检查是否快走完斑马线
+                        is_almost_done = self._is_crosswalk_almost_done(crosswalk_mask, h, w)
+                        
+                        # 调试信息：显示判定条件
+                        if self.frame_counter % 30 == 0:
+                            ys = np.where(crosswalk_mask > 0)[0]
+                            if ys.size > 0:
+                                top_y, bottom_y = int(ys.min()), int(ys.max())
+                                logger.info(f"[CROSS_STREET] area_ratio={area_ratio:.3f}, top_ratio={top_y/h:.3f}, bottom_ratio={bottom_y/h:.3f}, almost_done={is_almost_done}")
+                        
+                        # 优先级1：红绿灯警告（绿灯倒计时）
+                        if traffic_light_warning:
+                            guidance_text = traffic_light_warning
+                            self.last_guide_time = current_time
+                        # 优先级2：过马路结束提示（斑马线快消失）
+                        elif is_almost_done and not self.crossing_end_announced:
+                            guidance_text = "过马路结束，准备上人行道。"
+                            self.crossing_end_announced = True
+                            self.last_guide_time = current_time
+                        # 优先级3：盲道提示（过马路结束后检测到盲道，可重复播报但节流4秒）
+                        elif self.crossing_end_announced and blindpath_mask is not None:
+                            if current_time - self.last_blindpath_announce_time > 4.0:
+                                guidance_text = "远处有盲道，继续前行。"
+                                self.last_blindpath_announce_time = current_time
+                                self.last_guide_time = current_time
+                        # 优先级4：障碍物
+                        elif obstacle_override:
+                            guidance_text = obstacle_override
+                            self.last_guide_time = current_time
+                        # 优先级5：方向引导
+                        else:
+                            guidance_text = direction
+                            self.last_guide_time = current_time
+                else:
+                    # CROSSING 阶段但没有检测到斑马线
+                    no_crosswalk_duration = current_time - self.last_crosswalk_seen_time
+                    # 【移除左上角文字，改为右上角数据面板】
+                    frame_visualizations.append({
+                        "type": "data_panel",
+                        "data": {
+                            "状态": "正在过马路",
+                            "斑马线": f"未检测到 ({no_crosswalk_duration:.1f}s)"
+                        },
+                        "position": (w - 180, 20)
+                    })
+                    
+                    # 连续超过10秒没有斑马线，才播报"过马路结束"
+                    if no_crosswalk_duration > 10.0:
+                        if not self.crossing_end_announced:
+                            if current_time - self.last_guide_time > self.guide_interval:
+                                # 优先级1：红绿灯警告
+                                if traffic_light_warning:
+                                    guidance_text = traffic_light_warning
+                                    self.last_guide_time = current_time
+                                # 优先级2：过马路结束
+                                else:
+                                    guidance_text = "过马路结束，准备上人行道。"
+                                    self.crossing_end_announced = True
+                                    self.last_guide_time = current_time
+                        # 播报结束后，检测到盲道则重复播报（节流4秒）
+                        elif blindpath_mask is not None:
+                            if current_time - self.last_blindpath_announce_time > 4.0:
+                                guidance_text = "远处有盲道，继续前行。"
+                                self.last_blindpath_announce_time = current_time
+                                self.last_guide_time = current_time
+
+            # 【移除帧信息】
+            # 添加底部指令按钮（显示当前状态或引导内容）
+            if guidance_text:
+                current_instruction = guidance_text
+            elif self.state == STATE_SEEKING:
+                current_instruction = self.last_seeking_guidance if self.last_seeking_guidance else "寻找斑马线..."
+            elif self.state == STATE_WAIT_LIGHT:
+                current_instruction = "等待绿灯..."
+            elif self.state == STATE_CROSSING:
+                current_instruction = "过马路中..."
+            else:
+                current_instruction = "等待中..."
+            annotated = self._draw_command_button(annotated, current_instruction)
+
+            # 统一渲染障碍物等可视化图层（blindpath 风格）
+            if frame_visualizations:
+                annotated = self._draw_visualizations(annotated, frame_visualizations)
+
+            # 【修改】不在工作流内部播放音频，由app_main统一处理
+            # 直接返回guidance_text给上层调用者（app_main）来播放
+
+            # 更新 prev_gray（供障碍物稳定化使用）
+            self.prev_gray = gray
+
+            return CrossStreetResult(
+                annotated_image=annotated,
+                guidance_text=guidance_text,
+                visualizations=frame_visualizations,
+                should_switch_to_blindpath=False
+            )
+
+        except Exception as e:
+            logger.error(f"[CROSS_STREET] 处理帧时出错: {e}", exc_info=True)
+            return CrossStreetResult(
+                annotated_image=bgr_image,
+                guidance_text="",
+                visualizations=[],
+                should_switch_to_blindpath=False
+            )
+
+class YOLOModelWrapper:
+    """YOLO 模型包装器，将 predict 方法适配为 detect"""
+
+    def __init__(self, yolo_model):
+        self.model = yolo_model
+
+    def detect(self, image, confidence_threshold=0.25):
+        """使用 predict 方法并转换为 detect 格式"""
+        try:
+            results = self.model.predict(image, conf=confidence_threshold, verbose=False)
+            detections = []
+            if results and len(results) > 0:
+                result = results[0]
+                if hasattr(result, 'masks') and result.masks is not None:
+                    for i, mask in enumerate(result.masks.data):
+                        if hasattr(result, 'boxes') and result.boxes is not None:
+                            cls = int(result.boxes.cls[i].cpu().numpy())
+                            conf = float(result.boxes.conf[i].cpu().numpy())
+                            class Detection:
+                                def __init__(self):
+                                    self.cls = cls
+                                    self.conf = conf
+                                    self.mask = mask.cpu().numpy()
+                            detections.append(Detection())
+            return detections
+        except Exception as e:
+            logger.error(f"[YOLO Wrapper] 检测错误: {e}")
+            return []
\ No newline at end of file
diff --git a/yoloe_backend.py b/yoloe_backend.py
new file mode 100644
index 0000000..223c10c
--- /dev/null
+++ b/yoloe_backend.py
@@ -0,0 +1,97 @@
+# yoloe_backend.py
+# -*- coding: utf-8 -*-
+from typing import List, Dict, Any, Optional, Tuple, Union
+import os
+import cv2
+import numpy as np
+
+# Day 20: TensorRT 模型加载工具
+from model_utils import get_best_model_path
+
+# 兼容 YOLOE / YOLO
+try:
+    from ultralytics import YOLOE as _MODEL
+except Exception:
+    from ultralytics import YOLO as _MODEL
+
+# Day 20: 优先使用 TensorRT 引擎
+DEFAULT_MODEL_PATH = get_best_model_path(os.getenv("YOLOE_MODEL_PATH", "model/yoloe-11l-seg.pt"))
+TRACKER_CFG        = os.getenv("YOLO_TRACKER_YAML", "bytetrack.yaml")
+
+class YoloEBackend:
+    def __init__(self, model_path: Optional[str] = None, device: Optional[Union[str, int]] = None):
+        actual_path = model_path or DEFAULT_MODEL_PATH
+        self.model = _MODEL(actual_path)
+        # Day 20: TensorRT 引擎不需要 .to()
+        from model_utils import is_tensorrt_engine
+        if not is_tensorrt_engine(actual_path):
+            self.model.to("cuda")
+        self.device = device
+
+    def set_text_classes(self, names: List[str]):
+        # YOLOE 文本提示：与你模板一致
+        # Day 20: TensorRT 引擎不支持 get_text_pe
+        if hasattr(self.model, 'get_text_pe'):
+            self.model.set_classes(names, self.model.get_text_pe(names))
+        else:
+            print(f"[YOLOE] TensorRT 模式：跳过 set_text_classes")
+
+    def segment(self,
+                frame_bgr: np.ndarray,
+                conf: float = 0.20,
+                iou: float = 0.45,
+                imgsz: int = None,  # Day 20: 改为 None，从环境变量读取
+                persist: bool = True
+                ) -> Dict[str, Any]:
+        """
+        返回:
+          dict{
+            'masks': List[np.uint8(H,W)],      # 0/1 mask
+            'boxes': List[Tuple[x1,y1,x2,y2]],
+            'cls_ids': List[int],
+            'names': List[str],
+            'ids': List[Optional[int]]
+          }
+        """
+        # Day 20: 使用环境变量，保持与 TensorRT 导出尺寸一致
+        if imgsz is None:
+            imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
+        
+        r = self.model.track(
+            frame_bgr,
+            conf=conf, iou=iou, imgsz=imgsz,
+            persist=persist, tracker=TRACKER_CFG, verbose=False
+        )[0]
+
+        out = {"masks": [], "boxes": [], "cls_ids": [], "names": [], "ids": []}
+        masks_obj = getattr(r, "masks", None)
+        boxes_obj = getattr(r, "boxes", None)
+
+        if masks_obj is None or getattr(masks_obj, "data", None) is None:
+            return out
+
+        mask_arr = masks_obj.data.cpu().numpy()  # [N, h, w], float(0..1)
+        H, W = frame_bgr.shape[:2]
+        id2name = r.names if hasattr(r, "names") else {}
+        N = mask_arr.shape[0]
+
+        if boxes_obj is not None:
+            xyxy = boxes_obj.xyxy.cpu().numpy()
+            cls  = boxes_obj.cls.cpu().tolist()
+            tids = boxes_obj.id.int().cpu().tolist() if boxes_obj.id is not None else [None]*N
+        else:
+            xyxy = [None]*N
+            cls  = [0]*N
+            tids = [None]*N
+
+        for i in range(N):
+            bin_mask = (mask_arr[i] > 0.5).astype(np.uint8)
+            if bin_mask.shape[:2] != (H, W):
+                bin_mask = cv2.resize(bin_mask, (W, H), interpolation=cv2.INTER_NEAREST)
+            out["masks"].append(bin_mask)
+            out["boxes"].append(tuple(xyxy[i]) if xyxy[i] is not None else None)
+            cid = int(cls[i]) if cls is not None else 0
+            out["cls_ids"].append(cid)
+            out["names"].append(id2name.get(cid, str(cid)))
+            out["ids"].append(int(tids[i]) if tids[i] is not None else None)
+        return out
diff --git a/yolomedia.py b/yolomedia.py
new file mode 100644
index 0000000..a49854f
--- /dev/null
+++ b/yolomedia.py
@@ -0,0 +1,1567 @@
+# -*- coding: utf-8 -*-
+"""
+YOLOv8 单类分割 + MediaPipe Hand Landmarker + 光流追踪（多边形）
+更新点（本版重点）：
+- 左下角第二个进度条"距离(≈1)" 已完全替换为：ratio = 物体面积 / 手面积 的"接近 1 程度"可视化
+  -> range_score = 1 - clamp(|ratio - 1| / RATIO_TOL, 0..1)
+  -> 画面同时显示 ratio 数值；ratio<1 提示"向前靠近"，ratio>1 提示"后退"，在 [1±RATIO_TOL] 内为"保持"
+其他特性：
+- Enter 锁定：在分割掩码"内收 5px"的内边界上取光流点
+- TRACK 期间：监控当前多边形外扩 40px 周边区域的分割，命中即重锁
+- 成功判定：放宽"握持(Grasp)"启发式（拿瓶子无需特别紧）
+- 手骨架单色渲染；测距箭头（端点定位线 + 箭头 + 像素值）
+- 中文绘制优先 Pillow + 系统中文字体（避免问号）
+"""
+
+import os
+import time
+import threading
+import math
+import cv2
+import numpy as np
+import mediapipe as mp
+from mediapipe.framework.formats import landmark_pb2
+from ultralytics import YOLO
+from ultralytics.utils.plotting import Colors
+import bridge_io
+import pygame  # 用于播放本地音频文件
+
+from audio_player import play_audio_threadsafe
+PERF_DEBUG = False        # 打印调试信息（False 关闭）
+HAND_DOWNSCALE = 0.8      # HandLandmarker 的输入缩放 0.5=长宽各减半（≈1/4 像素量）
+HAND_FPS_DIV = 1          # 人手每 2 帧跑一次（1=每帧；2=隔帧；3=每3帧）
+
+
+# === 前端风格配色（BGR） + UI叠加管理（左下角按行堆叠） ===
+FRONTEND_COLORS = {
+    "text": (230, 237, 243),   # --text: #e6edf3
+    "muted": (159, 176, 195),  # --muted: #9fb0c3
+    "ok": (126, 231, 135),     # --ok: #7ee787
+    "err": (128, 128, 255),    # --err: #ff8080 (BGR)
+    "accent": (251, 218, 97),  # #61dafb 近似的强调色（BGR 取近似亮色）
+}
+
+# 底部指令按钮文本
+CURRENT_COMMAND_TEXT = "—"
+
+_UI_LINE = 0
+_UI_H = 0
+_UI_TR_LINE = 0  # 右上角逐行叠放计数
+_UI_TOP_MARGIN = 12
+_UI_RIGHT_MARGIN = 12
+UNIFIED_FONT_PX = 12  # 统一字号
+
+
+def ui_reset_overlay(img_h: int):
+    """每帧调用一次，重置叠加行计数（改为右上角布局）。"""
+    global _UI_LINE, _UI_H, _UI_TR_LINE
+    _UI_LINE = 0
+    _UI_TR_LINE = 0
+    _UI_H = int(img_h)
+
+
+def _ui_next_y_top(font_size: int) -> int:
+    """返回右上角下一行的y(顶部对齐)，并推进行计数。"""
+    global _UI_TR_LINE
+    line_gap = max(4, int(font_size * 0.25))
+    y_top = _UI_TOP_MARGIN + (_UI_TR_LINE * (font_size + line_gap))
+    _UI_TR_LINE += 1
+    return y_top
+
+
+def set_current_command(text: str):
+    global CURRENT_COMMAND_TEXT
+    try:
+        CURRENT_COMMAND_TEXT = str(text) if text else "—"
+    except Exception:
+        CURRENT_COMMAND_TEXT = "—"
+
+
+def draw_command_pill(img_bgr: np.ndarray, label: str):
+    """统一改为右上角白色文案。不再绘制底部圆角按钮。"""
+    text_prefix = "当前指令："
+    full_text = f"{text_prefix}{label if label else '—'}"
+    # 直接用统一文本渲染
+    draw_text_cn(img_bgr, full_text, (0, 0), font_size=UNIFIED_FONT_PX, color=(255,255,255), ui_hint=True)
+
+try:
+    from yoloe_backend import YoloEBackend
+    _YOLOE_READY = True
+except Exception as e:
+    _YOLOE_READY = False
+    print(f"[DETECTOR] YOLOE backend not ready: {e}", flush=True)
+
+# ========= 路径参数（按需修改）=========
+YOLO_MODEL_PATH = 'model/shoppingbest5.pt'
+HAND_TASK_PATH  = 'model/hand_landmarker.task'
+
+# ========= 摄像头 =========
+CAM_INDEX = 0
+INPUT_W, INPUT_H = 600, 480
+
+# ========= 分割显示 =========
+STROKE_WIDTH = 5  # 增加描边宽度，让黄框和绿框更粗
+MASK_ALPHA   = 0.45
+CONF_THRESHOLD = 0.20
+
+# —— 单 prompt 识别（只显示一个类）——
+PROMPT_NAME   = "AD_milk"
+PROMPT_STRICT = True
+
+# ========= 对齐条参数 =========
+ALIGN_LOOSE_PCT      = 0.12   # 归一化距离阈（相对画面对角线）
+
+# ========= 距离条参数（本版采用"ratio≈1"为目标）=========
+RATIO_IDEAL          = 1.0    # 理想值：物体面积/手面积 ≈ 1
+RATIO_TOL            = 0.25   # 容许偏离：±25% 内认为距离合适
+
+# ========= 语音播报 =========
+TTS_INTERVAL_SEC     = 1.0
+ENABLE_TTS           = True
+
+# ========= 光流（LK）与特征点 =========
+LK_PARAMS = dict(winSize=(21, 21),
+                 maxLevel=3,
+                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 12, 0.03))
+FEATURE_PARAMS = dict(maxCorners=600,
+                      qualityLevel=0.001,
+                      minDistance=5,
+                      blockSize=7)
+
+# ========= 关键参数：内收与周边监控 =========
+INNER_OFFSET_PX_LOCK = 5     # Enter 锁定：掩码腐蚀像素，保证点在物体内部
+EDGE_DILATE_PX       = 2     # 取内边界后小膨胀，利于提点
+PERI_MONITOR_PX      = 40    # TRACK：监控多边形外扩 40px 的周边带
+PERI_CHECK_EVERY     = 5     # 每隔 N 帧做一次周边分割检查，改为每帧
+
+# ========= 轮廓精度参数 =========
+CONTOUR_EPSILON_FACTOR = 0.002  # Douglas-Peucker算法的精度因子，越小越精细
+TRACK_EPSILON_FACTOR = 0.003    # 追踪模式下的轮廓精度因子
+
+# ========= YOLO实时矫正参数 =========
+YOLO_CORRECTION_IOU_THRESHOLD = 0.2  # IoU阈值，越低越积极矫正
+YOLO_CORRECTION_CONF_THRESHOLD = 0.15  # 置信度阈值，越低检测越敏感
+
+# ========= 方向引导音频路径 =========
+AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "music")  # 相对路径
+AUDIO_FILES = {
+    "向上": os.path.join(AUDIO_DIR, "向上.wav"),
+    "向下": os.path.join(AUDIO_DIR, "向下.wav"),
+    "向左": os.path.join(AUDIO_DIR, "向左.wav"),
+    "向右": os.path.join(AUDIO_DIR, "向右.wav"),
+    "向前": os.path.join(AUDIO_DIR, "向前.wav"),
+    "后退": os.path.join(AUDIO_DIR, "向后.wav"),
+    "OK": os.path.join(AUDIO_DIR, "已对中.wav"),
+}
+GUIDANCE_INTERVAL_SEC = 1.5  # 引导播报间隔
+
+# 初始化pygame音频
+pygame.mixer.init()
+
+# ========= 窗口 =========
+WINDOW = "YOLO Seg + Flow Polygon (Peri-Relock) (Grab Guidance)"
+
+# ======== MediaPipe 别名 ========
+BaseOptions           = mp.tasks.BaseOptions
+VisionRunningMode     = mp.tasks.vision.RunningMode
+HandLandmarker        = mp.tasks.vision.HandLandmarker
+HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
+HAND_CONNECTIONS      = mp.solutions.hands.HAND_CONNECTIONS
+
+# ======== HandLandmarker 回调缓存 ========
+_last_result = None  # (result, timestamp_ms)
+
+def on_result(result: mp.tasks.vision.HandLandmarkerResult,
+              output_image: mp.Image, timestamp_ms: int):
+    global _last_result
+    _last_result = (result, timestamp_ms)
+
+def _to_proto(hand_lms) -> landmark_pb2.NormalizedLandmarkList:
+    proto = landmark_pb2.NormalizedLandmarkList()
+    proto.landmark.extend([
+        landmark_pb2.NormalizedLandmark(x=p.x, y=p.y, z=p.z) for p in hand_lms
+    ])
+    return proto
+
+# —— 手骨架单色渲染 —— #
+def draw_hands_mono(img_bgr, hand_lms, color=(0, 255, 255), r=2, t=2):
+    mp_drawing = mp.solutions.drawing_utils
+    landmark_spec   = mp_drawing.DrawingSpec(color=color, thickness=-1, circle_radius=r)
+    connection_spec = mp_drawing.DrawingSpec(color=color, thickness=t,  circle_radius=r)
+    if hasattr(hand_lms, "landmark"):
+        proto = hand_lms
+    else:
+        proto = _to_proto(hand_lms)
+    mp_drawing.draw_landmarks(
+        img_bgr,
+        landmark_list=proto,
+        connections=HAND_CONNECTIONS,
+        landmark_drawing_spec=landmark_spec,
+        connection_drawing_spec=connection_spec,
+    )
+
+def norm_name(s: str) -> str:
+    return "".join(str(s).lower().split())
+
+# ======== TTS（pyttsx3）========
+class Speaker:
+    def __init__(self, enable=True):
+        self.enable = enable
+        self._engine = None
+        self._lock = threading.Lock()
+        if enable:
+            try:
+                import pyttsx3
+                self._engine = pyttsx3.init()
+                self._engine.setProperty('rate', 190)
+                self._engine.setProperty('volume', 1.0)
+            except Exception:
+                self._engine = None
+                self.enable = False
+
+    def say_async(self, text: str):
+        if not self.enable or not text:
+            return
+        def _run():
+            try:
+                with self._lock:
+                    self._engine.stop()
+                    self._engine.say(text)
+                    self._engine.iterate()
+                    t0 = time.time()
+                    while self._engine.isBusy() and (time.time() - t0) < 1.2:
+                        self._engine.iterate()
+                        time.sleep(0.01)
+            except Exception:
+                pass
+        threading.Thread(target=_run, daemon=True).start()
+
+# ======== 中文文本绘制（优先 Pillow）========
+_PIL_OK = False
+_FONT_PATH = None
+def _init_font():
+    global _PIL_OK, _FONT_PATH
+    try:
+        from PIL import ImageFont  # noqa
+        _PIL_OK = True
+    except Exception:
+        _PIL_OK = False
+        return
+    candidates = [
+        # Linux 中文字体路径 (Ubuntu/Debian)
+        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
+        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
+        "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
+    ]
+    for p in candidates:
+        if os.path.exists(p):
+            _FONT_PATH = p
+            return
+    _PIL_OK = False
+_init_font()
+
+def draw_text_cn(img_bgr, text, xy, font_size=20, color=(255,255,255), stroke=None, ui_hint=True):
+    """
+    统一的文本绘制：
+    - 默认采用前端风格：小字体、左下角按行堆叠(ui_hint=True)。
+    - 若 ui_hint=False 则按传入 xy 精确定位（用于贴近目标的小标注）。
+    """
+    # 统一样式：微软雅黑 + 固定字号 + 纯白
+    color = (255, 255, 255)
+    font_size = int(UNIFIED_FONT_PX)
+
+    H, W = img_bgr.shape[:2]
+    # 右上角堆叠布局：计算y顶边，并按文本宽度右对齐
+    y_top = _ui_next_y_top(font_size) if ui_hint else _ui_next_y_top(font_size)
+    # 先估算文本尺寸
+    tw = th = 0
+    font_obj = None
+
+    if _PIL_OK and _FONT_PATH:
+        try:
+            from PIL import Image, ImageDraw, ImageFont
+            font_obj = ImageFont.truetype(_FONT_PATH, font_size)
+            # 计算文本尺寸
+            bbox = ImageDraw.Draw(Image.new('RGB', (1,1))).textbbox((0,0), text, font=font_obj)
+            tw = max(1, bbox[2] - bbox[0])
+            th = max(1, bbox[3] - bbox[1])
+        except Exception:
+            pass
+    if _PIL_OK and _FONT_PATH and font_obj is not None:
+        try:
+            from PIL import Image, ImageDraw
+            img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+            pil_img = Image.fromarray(img_rgb)
+            draw = ImageDraw.Draw(pil_img)
+            x = max(8, W - _UI_RIGHT_MARGIN - tw)
+            y = y_top
+            draw.text((x, y), text, fill=(255,255,255), font=font_obj)
+            img_bgr[:] = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
+            return
+        except Exception:
+            pass
+    # OpenCV 回退：估算尺寸并右对齐
+    if tw <= 0 or th <= 0:
+        scale = font_size/24.0
+        (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, 2)
+    x = max(8, W - _UI_RIGHT_MARGIN - int(tw))
+    y_baseline = int(y_top + th)
+    cv2.putText(img_bgr, text, (x, y_baseline), cv2.FONT_HERSHEY_SIMPLEX, font_size/24.0, color, 2, cv2.LINE_AA)
+
+# ======== 工具函数 ========
+def clamp01(x): return max(0.0, min(1.0, x))
+
+def draw_progress_bars(vis, align_score, range_score):
+    """第一条=对齐，第二条=距离(≈1)，对应 ratio 与 1 的接近程度"""
+    H, W = vis.shape[:2]
+    bar_w = int(W * 0.28)
+    bar_h = 12
+    gap   = 8
+    x0    = 12
+    y0    = H - 2*bar_h - gap - 12
+    # 背景
+    cv2.rectangle(vis, (x0, y0), (x0 + bar_w, y0 + bar_h), (50, 50, 50), -1)
+    cv2.rectangle(vis, (x0, y0 + bar_h + gap), (x0 + bar_w, y0 + 2*bar_h + gap), (50, 50, 50), -1)
+    # 填充
+    cv2.rectangle(vis, (x0, y0), (x0 + int(bar_w * clamp01(align_score)), y0 + bar_h), (0, 220, 0), -1)
+    cv2.rectangle(vis, (x0, y0 + bar_h + gap), (x0 + int(bar_w * clamp01(range_score)), y0 + 2*bar_h + gap), (0, 180, 255), -1)
+    draw_text_cn(vis, "对齐",       (x0, y0 - 18),                 font_size=18, color=(180,180,180))
+    draw_text_cn(vis, "距离(≈1)",   (x0, y0 + bar_h + gap - 18),   font_size=18, color=(180,180,180))
+
+def polygon_center_and_area(poly):
+    if poly is None or len(poly) < 3:
+        return None, 0.0
+    poly = np.array(poly, dtype=np.float32)
+    M = cv2.moments(poly)
+    if abs(M["m00"]) < 1e-6:
+        c = np.mean(poly, axis=0)
+        return (float(c[0]), float(c[1])), 0.0
+    cx = float(M["m10"] / M["m00"])
+    cy = float(M["m01"] / M["m00"])
+    area = float(cv2.contourArea(poly.astype(np.int32)))
+    return (cx, cy), area
+
+def hand_bbox_and_area(lms, W, H):
+    xs = [int(p.x * W) for p in lms]
+    ys = [int(p.y * H) for p in lms]
+    if not xs or not ys:
+        return None, 0.0
+    x0, y0, x1, y1 = min(xs), min(ys), max(xs), max(ys)
+    w = max(1, x1 - x0)
+    h = max(1, y1 - y0)
+    area = float(w * h)
+    return (x0, y0, w, h), area
+
+# ======== 手势：握持(Grasp) 识别（放宽版启发式）========
+THUMB_INDEX_CLOSE = 0.34   # 放宽
+FINGERTIP_NEAR    = 0.44   # 放宽
+MIN_CURLED_COUNT  = 1      # 放宽
+
+def detect_grasp(hand_lms, W, H):
+    box, _ = hand_bbox_and_area(hand_lms, W, H)
+    if not box:
+        return False, 0.0
+    x0, y0, w0, h0 = box
+    hand_diag = float(np.hypot(w0, h0)) + 1e-6
+    palm_idx = [0, 5, 9, 13, 17]
+    px = np.mean([hand_lms[i].x * W for i in palm_idx])
+    py = np.mean([hand_lms[i].y * H for i in palm_idx])
+    palm = np.array([px, py], dtype=np.float32)
+    t4 = np.array([hand_lms[4].x * W, hand_lms[4].y * H], dtype=np.float32)
+    t8 = np.array([hand_lms[8].x * W, hand_lms[8].y * H], dtype=np.float32)
+    thumb_index_dist = float(np.linalg.norm(t4 - t8)) / hand_diag
+    tips = [12, 16, 20]
+    dists = []
+    for i in tips:
+        ti = np.array([hand_lms[i].x * W, hand_lms[i].y * H], dtype=np.float32)
+        dists.append(float(np.linalg.norm(ti - palm)) / hand_diag)
+    curled_cnt = sum(1 for d in dists if d < FINGERTIP_NEAR)
+    cond1 = (thumb_index_dist < THUMB_INDEX_CLOSE)
+    cond2 = (curled_cnt >= MIN_CURLED_COUNT)
+    score = 0.5 * (1.0 - min(thumb_index_dist / THUMB_INDEX_CLOSE, 1.0)) + \
+            0.5 * min(curled_cnt / 3.0, 1.0)
+    return (cond1 and cond2), score
+
+# ======== 内收后的边界提点 ========
+def inner_offset_edge(mask_bin, offset_px=5, edge_dilate_px=2):
+    if offset_px > 0:
+        k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*offset_px+1, 2*offset_px+1))
+        eroded = cv2.erode(mask_bin.astype(np.uint8), k, iterations=1)
+    else:
+        eroded = mask_bin.astype(np.uint8)
+    edges = cv2.Canny(eroded*255, 50, 150)
+    if edge_dilate_px > 0:
+        k2 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*edge_dilate_px+1, 2*edge_dilate_px+1))
+        edges = cv2.dilate(edges, k2, iterations=1)
+    return edges  # uint8 0/255
+
+# ======== YOLO 分割：全帧或 ROI 内选择最佳 mask ========
+def find_best_mask(frame_bgr, yolo, W, H, target_cls_id, conf_thr=0.10, roi_rect=None):
+    results = yolo(frame_bgr, verbose=False)
+    best_mask = None
+    best_score = 0.0
+    if results and results[0].masks is not None:
+        r0 = results[0]
+        for mask_t, conf_t, cls_t in zip(r0.masks.data, r0.boxes.conf, r0.boxes.cls):
+            cls_id = int(cls_t.item())
+            conf_value = float(conf_t.item())
+            if target_cls_id is not None and cls_id != target_cls_id:
+                continue
+            if conf_value < conf_thr:
+                continue
+            mask_np = mask_t.detach().cpu().numpy()
+            mask_rz = cv2.resize(mask_np, (W, H), interpolation=cv2.INTER_LINEAR)
+            mask_bin = (mask_rz > 0.5).astype(np.uint8)
+
+            if roi_rect is not None:
+                x0, y0, x1, y1 = roi_rect
+                x0, y0 = max(0, x0), max(0, y0)
+                x1, y1 = min(W-1, x1), min(H-1, y1)
+                roi = np.zeros_like(mask_bin, dtype=np.uint8)
+                roi[y0:y1+1, x0:x1+1] = 1
+                overlap = (mask_bin & roi).sum()
+                score = float(overlap)
+            else:
+                score = float(mask_bin.sum())
+
+            if score > best_score:
+                best_score = score
+                best_mask = mask_bin
+    return best_mask
+
+# ======== 工程化：测距箭头（端点定位线 + 箭头 + 像素值）========
+def draw_measure_arrow(img, p1, p2, txt=None):
+    p1 = (int(p1[0]), int(p1[1]))
+    p2 = (int(p2[0]), int(p2[1]))
+    # 端点定位线
+    def end_cap(pt, size=8, color=(255,255,255), t=1):
+        x, y = pt
+        cv2.line(img, (x - size, y), (x + size, y), color, t, cv2.LINE_AA)
+        cv2.line(img, (x, y - size), (x, y + size), color, t, cv2.LINE_AA)
+    end_cap(p1, size=7, color=(255,255,255), t=1)
+    end_cap(p2, size=7, color=(255,255,255), t=1)
+    # 箭头
+    cv2.arrowedLine(img, p1, p2, (255,255,255), 2, cv2.LINE_AA, tipLength=0.18)
+    # 文本
+    if txt is None:
+        d = int(np.hypot(p2[0]-p1[0], p2[1]-p1[1]))
+        txt = f"{d}px"
+    mid = ((p1[0]+p2[0])//2, (p1[1]+p2[1])//2)
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    fs, th = 0.6, 2
+    (tw, th_text), _ = cv2.getTextSize(txt, font, fs, th)
+    pad = 4
+    x0 = mid[0] - tw//2 - pad
+    y0 = mid[1] - th_text - 6
+    x1 = mid[0] + tw//2 + pad
+    y1 = mid[1] + 6
+    cv2.rectangle(img, (x0, y0), (x1, y1), (32,32,32), -1)
+    cv2.putText(img, txt, (x0+pad, y1-6), font, fs, (255,255,255), th, cv2.LINE_AA)
+
+# 添加绘制虚线的函数
+def draw_dashed_line(img, pt1, pt2, color=(255, 255, 255), thickness=2, dash_length=10, gap_length=5):
+    """绘制虚线"""
+    pt1 = np.array(pt1, dtype=np.float32)
+    pt2 = np.array(pt2, dtype=np.float32)
+    line_vec = pt2 - pt1
+    line_len = np.linalg.norm(line_vec)
+    if line_len < 1:
+        return
+    
+    line_vec = line_vec / line_len  # 单位向量
+    
+    # 绘制虚线段
+    current_pos = 0
+    while current_pos < line_len:
+        start_pos = current_pos
+        end_pos = min(current_pos + dash_length, line_len)
+        
+        start_pt = pt1 + line_vec * start_pos
+        end_pt = pt1 + line_vec * end_pos
+        
+        cv2.line(img, tuple(start_pt.astype(int)), tuple(end_pt.astype(int)), color, thickness)
+        
+        current_pos += dash_length + gap_length
+
+# 添加绘制手部轮廓的函数
+def draw_hand_contour(img, hand_lms, W, H, color=(255, 255, 255), thickness=1):
+    """绘制手部landmarks的凸包轮廓"""
+    # 获取所有手部关键点
+    points = []
+    for lm in hand_lms:
+        x = int(lm.x * W)
+        y = int(lm.y * H)
+        points.append([x, y])
+    
+    if len(points) > 3:
+        points = np.array(points, dtype=np.int32)
+        # 计算凸包
+        hull = cv2.convexHull(points)
+        # 绘制凸包轮廓
+        cv2.polylines(img, [hull], True, color, thickness)
+
+# 检测手和物体是否接触
+def check_hand_object_contact(hand_box, poly, overlap_threshold=0.15):
+    """
+    检测手的边界框和物体多边形是否有重叠
+    返回: (是否接触, 重叠比例)
+    """
+    if hand_box is None or poly is None or len(poly) < 3:
+        return False, 0.0
+    
+    # 获取手的边界框
+    hx, hy, hw, hh = hand_box
+    hand_rect = np.array([
+        [hx, hy],
+        [hx + hw, hy],
+        [hx + hw, hy + hh],
+        [hx, hy + hh]
+    ], dtype=np.int32)
+    
+    # 创建掩码来计算重叠
+    H = int(max(hy + hh, np.max(poly[:, 1])) + 10)
+    W = int(max(hx + hw, np.max(poly[:, 0])) + 10)
+    
+    hand_mask = np.zeros((H, W), dtype=np.uint8)
+    cv2.fillPoly(hand_mask, [hand_rect], 1)
+    
+    obj_mask = np.zeros((H, W), dtype=np.uint8)
+    cv2.fillPoly(obj_mask, [poly.astype(np.int32)], 1)
+    
+    # 计算重叠
+    intersection = np.logical_and(hand_mask, obj_mask).sum()
+    hand_area = hand_mask.sum()
+    
+    # 重叠比例（相对于手的面积）
+    overlap_ratio = intersection / max(1.0, hand_area)
+    
+    return overlap_ratio > overlap_threshold, overlap_ratio
+
+# 添加方向判断函数
+def get_guidance_direction(hand_center, object_center, hand_area, object_area, hand_box=None, poly=None):
+    """
+    根据手心和物体中心位置，以及面积比，返回引导方向
+    返回: (方向文字, 是否需要前后调整)
+    """
+    if hand_center is None or object_center is None:
+        return None, None
+    
+    # 首先检查手和物体是否接触
+    is_touching = False
+    overlap_ratio = 0.0
+    if hand_box is not None and poly is not None:
+        is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
+    
+    hx, hy = hand_center
+    ox, oy = object_center
+    
+    # 计算水平和垂直偏差
+    dx = ox - hx  # 正数表示物体在右边
+    dy = oy - hy  # 正数表示物体在下边
+    
+    # 如果手和物体已经接触，直接返回"向前"
+    if is_touching:
+        return "向前", f"接触度: {overlap_ratio:.1%}"
+    
+    # 如果没有接触，引导上下左右
+    # 判断主要方向
+    h_threshold = 30  # 水平偏差阈值（像素）
+    v_threshold = 30  # 垂直偏差阈值（像素）
+    
+    h_dir = None
+    v_dir = None
+    
+    # 水平方向
+    if abs(dx) > h_threshold:
+        h_dir = "向右" if dx > 0 else "向左"
+    
+    # 垂直方向
+    if abs(dy) > v_threshold:
+        v_dir = "向下" if dy > 0 else "向上"
+    
+    # 选择偏移最大的方向
+    if abs(dx) > abs(dy) and h_dir:
+        # 水平偏移更大
+        return h_dir, v_dir
+    elif v_dir:
+        # 垂直偏移更大或相等
+        return v_dir, h_dir
+    else:
+        # 已经在中心附近但还没接触，提示靠近
+        distance = np.sqrt(dx**2 + dy**2)
+        if distance < 50:  # 很近但还没接触
+            return "向前", "请缓慢靠近"
+        else:
+            return "保持", None
+
+# 播放音频的函数
+def play_guidance_audio(direction):
+    """播放方向引导音频"""
+    # 直接调用新的音频播放函数
+    play_audio_threadsafe(direction)
+    # 同步更新底部按钮的指令文本
+    try:
+        if isinstance(direction, str) and direction.strip():
+            set_current_command(direction.strip())
+    except Exception:
+        pass
+
+# 添加居中判断函数
+def get_center_guidance(object_center, frame_center, threshold=30):
+    """
+    判断物体是否在画面中心，返回引导方向
+    返回: (方向文字, 是否已居中)
+    """
+    if object_center is None:
+        return None, False
+    
+    ox, oy = object_center
+    cx, cy = frame_center
+    
+    dx = cx - ox  # 正数表示需要向右移动
+    dy = cy - oy  # 正数表示需要向下移动
+    
+    # 判断是否已经居中
+    distance = np.sqrt(dx**2 + dy**2)
+    if distance < threshold:
+        return "已居中", True
+    
+    # 判断主要方向（对调左右和上下）
+    if abs(dx) > abs(dy):
+        return "向左" if dx > 0 else "向右", False  # 对调了
+    else:
+        return "向上" if dy > 0 else "向下", False  # 对调了
+
+def main(headless: bool = False, prompt_name: str = None, stop_event=None):
+
+    # OpenCV 优化
+    try:
+        import cv2
+        cv2.setUseOptimized(True)
+        cv2.setNumThreads(2)   # 视 CPU 核心数而定；树莓派类设备可设 1
+    except Exception:
+        pass
+
+
+
+
+    # 如果传入了 prompt_name，使用它替换全局的 PROMPT_NAME
+    global PROMPT_NAME
+    if prompt_name:
+        PROMPT_NAME = prompt_name
+        print(f"[YOLOMEDIA] Using dynamic prompt: {PROMPT_NAME}")
+    
+    speaker = Speaker(ENABLE_TTS)
+    last_tts_ts = 0.0
+    MODE = "SEGMENT"  # 模式：SEGMENT -> FLASH -> CENTER_GUIDE -> TRACK
+    colors = Colors()
+
+    FRAME_IDX    = 0
+    last_mask    = None      # 上一帧"目标掩膜"（用于 IoU 降噪）
+    flow_mask    = None      # 光流外推得到的掩膜（你现有代码里会更新它）
+    flow_grace   = 0         # YOLOE 丢检后，允许光流顶住的计数
+    last_seen_ts = 0.0       # 最近一次 YOLOE 成功检测的时间戳
+    locked_id    = None      # （可选）若你在 tracker 里记录了 id，可在下面选择相同 id
+    # 刷新/容错参数（可按需微调）
+    REDETECT_EVERY = 5       # 每 5 帧强制"信任 YOLOE 一次"
+    FLOW_GRACE_MAX = 8       # YOLOE 连续丢检时，光流最多顶 8 帧
+    IOU_MIN_KEEP   = 0.20    # 新/旧掩膜 IoU 太低时，用平滑合成，避免闪烁
+
+
+
+    print("[INIT] 加载 YOLO 模型...")
+    # NOTE: shoppingbest 不再用于找东西流程；如其他模式仍需，可保留 yolo = YOLO(...) 但不在本流程使用
+    # yolo = YOLO(YOLO_MODEL_PATH)
+
+    # —— 直接启用 YOLOE 文本提示后端（不再先查 shoppingbest）——
+    use_yoloe = False
+    yoloe_backend = None
+    if _YOLOE_READY:
+        try:
+            yoloe_backend = YoloEBackend()                  # 可用 YOLOE_MODEL_PATH 环境变量指定模型
+            yoloe_backend.set_text_classes([PROMPT_NAME])   # 文本类别
+            use_yoloe = True
+            print(f"[DETECTOR] YOLOE text-prompt backend enabled for: {PROMPT_NAME}", flush=True)
+        except Exception as e:
+            print(f"[DETECTOR] YOLOE init failed: {e}", flush=True)
+    else:
+        print("[DETECTOR] YOLOE backend not ready (import failed)", flush=True)
+
+    # 类名映射（YOLOE 模式下简化）
+    if use_yoloe:
+        # YOLOE 模式下，只有一个目标类
+        id_to_name = {0: PROMPT_NAME}
+        name_to_id = {norm_name(PROMPT_NAME): 0}
+        target_cls_id = 0
+    else:
+        # 如果将来需要支持传统 YOLO，可以在这里初始化
+        id_to_name = {}
+        name_to_id = {}
+        target_cls_id = None
+
+    # 目标类已在上面的 YOLOE 模式中设置
+
+    print(f"[CLASS] target id={target_cls_id}, name={id_to_name.get(target_cls_id, 'N/A')}")
+    print(f"[阈值] conf >= {CONF_THRESHOLD:.2f}")
+
+    # Hand Landmarker
+    print("[INIT] 初始化 Hand Landmarker...")
+    base = BaseOptions(model_asset_path=HAND_TASK_PATH)
+    hand_options = HandLandmarkerOptions(
+        base_options=base,
+        running_mode=VisionRunningMode.LIVE_STREAM,
+        num_hands=1,
+        min_hand_detection_confidence=0.40,
+        min_hand_presence_confidence=0.50,
+        min_tracking_confidence=0.70,
+        result_callback=on_result
+    )
+    landmarker = HandLandmarker.create_from_options(hand_options)
+
+    W = None
+    H = None
+    print("[Bridge] 等待 ESP32 画面 ...")
+
+    # [headless] 仅在非 headless 时创建窗口（原逻辑保留，外层加判断）
+    if not headless:
+        cv2.namedWindow(WINDOW, cv2.WINDOW_NORMAL)
+
+    # 光流缓存
+    old_gray = None
+    p0 = None
+    lock_edge_debug = None     # 调试可视化：内边界
+    track_frame_count = 0      # 控制周边监控频率
+    last_poly_box = None       # 当前多边形外接矩形
+
+    fps_hist = []
+    
+    # 添加自动锁定相关变量
+    auto_lock_start_time = None  # 开始检测到物体的时间
+    auto_lock_delay = 1.0        # 1秒后自动锁定
+    last_detected_mask = None    # 最后检测到的mask
+    
+    # 添加闪烁动画相关变量
+    flash_start_time = None      # 闪烁开始时间
+    flash_duration = 1.0         # 闪烁持续时间（秒）
+    flash_frequency = 1          # 闪烁频率（Hz） - 只闪一次
+    flash_mask = None            # 用于闪烁的mask
+    flash_color = (0, 255, 255)  # 闪烁颜色（黄色）
+
+    # 添加引导相关变量
+    last_guidance_time = 0
+    last_guidance_direction = None
+
+    # 添加居中引导相关变量
+    center_guide_mask = None      # 用于居中引导的mask
+    center_guide_start = None     # 居中引导开始时间
+    center_threshold = 30         # 居中判定阈值（像素）
+    last_center_guide_time = 0   # 上次居中引导语音时间
+    center_reached = False        # 是否已经到达中心
+
+    # 添加抓取跟踪相关变量
+    grasp_tracking_frames = []  # 存储最近的手和物体位置
+    grasp_tracking_duration = 1.0  # 需要持续1秒
+    grasp_movement_threshold = 10  # 最小移动像素阈值（提高阈值）
+    grasp_detected = False  # 是否已经检测到抓取
+    grasp_start_time = None  # 开始检测到协同移动的时间
+    
+    # 背景参考点（用于检测相机移动） - 移到这里初始化
+    background_points = None
+    old_background_gray = None
+
+    try:
+        while True:
+            # 检查停止事件
+            if stop_event and stop_event.is_set():
+                print("[YOLOMEDIA] Stop event detected, exiting...")
+                break
+                
+            frame = bridge_io.wait_raw_bgr(timeout_sec=0.5)
+            if frame is None:
+                # 没取到帧就继续等（ESP32还没连上或暂时无新帧）
+                # [headless] 给出 1ms 让出调度，避免空转
+                if headless:
+                    cv2.waitKey(1)
+                continue
+            
+            # 每帧重置 UI 文字叠加到左下角
+            H, W = frame.shape[:2]
+            ui_reset_overlay(H)
+
+            vis = frame.copy()
+            t_now = time.time()
+
+            # 抽帧 + 降采样（人手识别）
+            if FRAME_IDX % HAND_FPS_DIV == 0:
+                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                if HAND_DOWNSCALE and HAND_DOWNSCALE != 1.0:
+                    small = cv2.resize(rgb, None, fx=HAND_DOWNSCALE, fy=HAND_DOWNSCALE, interpolation=cv2.INTER_AREA)
+                    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=small)
+                else:
+                    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
+                landmarker.detect_async(mp_image, int(t_now * 1000))
+            # 否则跳过，复用上一次 _last_result；Landmarker 会自己做 tracking
+
+
+            # 取手心、手框、握持（放宽版）
+            hand_center = None
+            hand_area = None
+            hand_box = None
+            grasp_now = False
+            grasp_score = 0.0
+            if _last_result is not None:
+                res, _ = _last_result
+                if res.hand_landmarks and len(res.hand_landmarks) > 0:
+                    l0 = res.hand_landmarks[0]
+                    
+                    # 绘制手部骨骼
+                    draw_hands_mono(vis, l0, color=(0, 255, 255), r=2, t=2)
+                    
+                    # 绘制手部轮廓（替代矩形框）
+                    draw_hand_contour(vis, l0, W, H, color=(255, 255, 255), thickness=1)
+                    
+                    xs = [p.x * W for p in l0]
+                    ys = [p.y * H for p in l0]
+                    hand_center = (float(sum(xs)/len(xs)), float(sum(ys)/len(ys)))
+                    hand_box, hand_area = hand_bbox_and_area(l0, W, H)
+                    # 注释掉矩形框绘制
+                    # if hand_box:
+                    #     x0, y0, w0, h0 = hand_box
+                    #     cv2.rectangle(vis, (x0, y0), (x0+w0, y0+h0), (0,255,255), 1)
+                    grasp_now, grasp_score = detect_grasp(l0, W, H)
+                    draw_text_cn(vis, f"握持评分: {grasp_score:.2f}", (10, 70), font_size=18, color=(0, 180, 255))
+                    
+
+            if MODE == "SEGMENT":
+                # —— 仅 YOLOE：每帧文本提示分割 + 取最大目标（删掉 shoppingbest 与重复 YOLOE 段）——
+                FRAME_IDX += 1
+                candidate_masks = []
+                detected_object = False
+
+                if use_yoloe and yoloe_backend is not None:
+                    # 每帧都跑；persist=True 便于维持目标 ID
+                    det = yoloe_backend.segment(frame, conf=0.20, iou=0.45, persist=True)
+                    H, W = frame.shape[:2]
+
+                    # 选一个掩膜：优先与 locked_id 相同；否则面积最大
+                    chosen_idx = None
+                    if det["masks"]:
+                        if locked_id is not None and det["ids"] and (locked_id in det["ids"]):
+                            chosen_idx = det["ids"].index(locked_id)
+                        else:
+                            areas = [int(m.sum()) for m in det["masks"]]
+                            chosen_idx = int(np.argmax(areas))
+
+                    if chosen_idx is not None:
+                        m = det["masks"][chosen_idx]
+                        if m.shape[:2] != (H, W):
+                            m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
+
+                        mask_bin = (m > 0).astype(np.uint8)
+                        candidate_masks.append({
+                            "mask": mask_bin,
+                            "area": int(mask_bin.sum()),
+                            "name": PROMPT_NAME,
+                            "cls_id": 0,
+                            "conf": 0.99,
+                        })
+                        detected_object = True
+
+                        # 简单可视化（半透明叠层 + 轮廓），不影响你后面的逻辑
+                        colored = np.zeros_like(frame, dtype=np.uint8)
+                        colored[mask_bin == 1] = (0, 255, 255)
+                        vis = cv2.addWeighted(vis, 1.0, colored, MASK_ALPHA, 0)
+                        contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                        if contours:
+                            # 选择最大轮廓并进行适度平滑
+                            largest_contour = max(contours, key=cv2.contourArea)
+                            # 使用Douglas-Peucker算法适度简化，保持更多细节
+                            epsilon = CONTOUR_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)  # 更小的epsilon保留更多细节
+                            smoothed_contour = cv2.approxPolyDP(largest_contour, epsilon, True)
+                            cv2.drawContours(vis, [smoothed_contour], -1, (0, 255, 255), STROKE_WIDTH)
+
+                        # 记录 id，减少目标跳变
+                        if det["ids"] and len(det["ids"]) > chosen_idx and det["ids"][chosen_idx] is not None:
+                            locked_id = int(det["ids"][chosen_idx])
+
+                else:
+                    # YOLOE 未就绪：提示并保持原画面（不阻塞前端）
+                    draw_text_cn(vis, "YOLOE 未就绪，显示原始画面", (10, 100), font_size=22, color=(0, 215, 255))
+
+                # 选择面积最大的mask  ←—— 这一行下面开始保留你的原代码
+
+                # 选择面积最大的mask
+                if candidate_masks:
+                    # 按面积降序排序
+                    candidate_masks.sort(key=lambda x: x['area'], reverse=True)
+                    largest_mask_info = candidate_masks[0]
+                    last_detected_mask = largest_mask_info['mask']
+                    
+                    # 可选：在最大的物体上添加特殊标记
+                    contours, _ = cv2.findContours(last_detected_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                    if contours:
+                        # 找到最大轮廓的中心
+                        M = cv2.moments(contours[0])
+                        if M["m00"] != 0:
+                            cx = int(M["m10"] / M["m00"])
+                            cy = int(M["m01"] / M["m00"])
+                            # 在最大物体中心画一个圆圈标记
+                            cv2.circle(vis, (cx, cy), 8, (0, 255, 0), 2)
+                            cv2.circle(vis, (cx, cy), 12, (0, 255, 0), 1)
+                            # 目标标签：保持就地标注
+                            draw_text_cn(vis, "目标", (cx + 15, cy - 5), font_size=16, color=FRONTEND_COLORS["ok"], ui_hint=False)
+                    
+                    # 显示检测信息
+                    if len(candidate_masks) > 1:
+                        draw_text_cn(vis, f"检测到{len(candidate_masks)}个物体，选择最大的（面积: {largest_mask_info['area']}）", 
+                                   (10, H - 30), font_size=16, color=(255, 255, 0))
+                
+                # 自动锁定逻辑
+                if detected_object and last_detected_mask is not None:
+                    if auto_lock_start_time is None:
+                        auto_lock_start_time = t_now
+                        print(f"[AUTO] 检测到物体，选择最大的（面积: {np.sum(last_detected_mask)}），开始倒计时...")
+                        #play_guidance_audio("检测到物体")  # 添加这行
+                    
+                    elapsed = t_now - auto_lock_start_time
+                    remaining = auto_lock_delay - elapsed
+                    
+                    if remaining > 0:
+                        # 显示倒计时（移动到左下角，前端风格）
+                        draw_text_cn(vis, f"检测到物体，{remaining:.1f}秒后自动锁定", (10, 100), font_size=16, color=FRONTEND_COLORS["text"], stroke=(0,0,0))
+                        
+                        # 绘制锁定框 - 使用虚线框表示正在准备锁定
+                        if last_detected_mask is not None:
+                            contours, _ = cv2.findContours(last_detected_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                            if contours:
+                                # 找到最大轮廓
+                                largest_contour = max(contours, key=cv2.contourArea)
+                                # 简化轮廓
+                                epsilon = CONTOUR_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
+                                smoothed_contour = cv2.approxPolyDP(largest_contour, epsilon, True)
+                                
+                                # 根据倒计时进度改变颜色亮度
+                                progress = 1.0 - (remaining / auto_lock_delay)
+                                color_intensity = int(100 + 155 * progress)  # 从100到255
+                                lock_color = (0, color_intensity, color_intensity)  # 黄色渐亮
+                                
+                                # 绘制虚线轮廓
+                                pts = smoothed_contour.reshape(-1, 2)
+                                for i in range(len(pts)):
+                                    pt1 = tuple(pts[i])
+                                    pt2 = tuple(pts[(i + 1) % len(pts)])
+                                    # 使用虚线效果（通过绘制短线段）
+                                    draw_dashed_line(vis, pt1, pt2, color=lock_color, thickness=3, 
+                                                   dash_length=15, gap_length=8)
+                    else:
+                        # 进入闪烁模式
+                        print("[AUTO] 进入闪烁动画模式")
+                        MODE = "FLASH"
+                        flash_start_time = t_now
+                        flash_mask = last_detected_mask.copy()
+                        auto_lock_start_time = None
+                        play_guidance_audio("检测到物体") 
+                else:
+                    # 没有检测到物体，重置计时器
+                    if auto_lock_start_time is not None:
+                        print("[AUTO] 物体丢失，重置倒计时")
+                    auto_lock_start_time = None
+                    last_detected_mask = None
+                    draw_text_cn(vis, "分割中... 等待检测到物体", (10, 100), font_size=16, color=FRONTEND_COLORS["muted"])
+
+            elif MODE == "FLASH":
+                # 闪烁动画模式
+                if flash_start_time is not None and flash_mask is not None:
+                    elapsed = t_now - flash_start_time
+                    
+                    if elapsed < flash_duration:
+                        # 计算渐入渐出效果
+                        # 前0.3秒渐入，中间0.4秒保持，后0.3秒渐出
+                        if elapsed < 0.3:
+                            # 渐入阶段
+                            alpha = elapsed / 0.3 * 0.8  # 0到0.8
+                        elif elapsed < 0.7:
+                            # 保持阶段
+                            alpha = 0.8
+                        else:
+                            # 渐出阶段
+                            alpha = (1.0 - elapsed) / 0.3 * 0.8  # 0.8到0
+                        
+                        # 绘制闪烁的mask
+                        colored = np.zeros_like(frame, dtype=np.uint8)
+                        colored[flash_mask == 1] = flash_color
+                        vis = cv2.addWeighted(vis, 1.0 - alpha, colored, alpha, 0)
+                        
+                        # 绘制轮廓（固定粗细，颜色渐变）
+                        contours, _ = cv2.findContours(flash_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                        if contours:
+                            # 轮廓颜色也跟随alpha变化
+                            contour_color = tuple(int(c * (0.5 + alpha * 0.5)) for c in flash_color)
+                            cv2.drawContours(vis, contours, -1, contour_color, STROKE_WIDTH + 1)
+                        
+                        # 显示提示文字（左下角）
+                        draw_text_cn(vis, "正在锁定目标...", (10, 100), font_size=18, color=FRONTEND_COLORS["accent"]) 
+                    else:
+                        # 闪烁结束，初始化光流追踪并进入居中引导模式
+                        print("[AUTO] 闪烁结束，初始化光流追踪")
+                        edge_mask = inner_offset_edge(flash_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                        pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+                        
+                        if pts is not None and len(pts) >= 8:
+                            p0 = pts
+                            old_gray = gray
+                            MODE = "CENTER_GUIDE"
+                            lock_edge_debug = edge_mask.copy()
+                            track_frame_count = 0
+                            center_guide_start = t_now
+                            center_reached = False
+                            flash_start_time = None
+                            flash_mask = None
+                            last_detected_mask = None
+                            print(f"[LOCK] 内边界特征点数={len(p0)} → CENTER_GUIDE")
+                        else:
+                            print("[LOCK] 内边界特征点不足，返回检测模式")
+                            MODE = "SEGMENT"
+                            flash_start_time = None
+                            flash_mask = None
+                            last_detected_mask = None
+            
+            elif MODE == "CENTER_GUIDE":
+                # 居中引导模式（使用光流追踪）
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                poly_center = None
+                poly_area = 0.0
+                
+                if old_gray is not None and p0 is not None and len(p0) >= 5:
+                    # 光流追踪
+                    p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, gray, p0, None, **LK_PARAMS)
+                    if p1 is not None and st is not None:
+                        good_new = p1[st == 1]
+                        if len(good_new) >= 5:
+                            p0 = good_new.reshape(-1, 1, 2)
+                            hull = cv2.convexHull(good_new.reshape(-1,1,2))
+                            poly = hull.reshape(-1, 2)
+                            
+                            if len(poly) >= 3:
+                                H, W = frame.shape[:2]
+
+                                # 把当前光流多边形 rasterize 成掩膜（便于与 YOLOE 掩膜做 IoU）
+                                poly_mask = np.zeros((H, W), dtype=np.uint8)
+                                cv2.fillPoly(poly_mask, [poly.astype(np.int32)], 1)
+
+                                # 降频：每3帧用 YOLOE 重新检测，其余帧依赖光流维持
+                                need_reseed = False
+                                new_det_mask = None
+
+                                if use_yoloe and yoloe_backend is not None and (FRAME_IDX % 3 == 0):
+                                    # 添加调试信息
+                                    if FRAME_IDX % 30 == 0:  # 每30帧打印一次
+                                        print(f"[YOLOE] 实时检测第 {FRAME_IDX} 帧")
+                                    det = yoloe_backend.segment(frame, conf=0.20, iou=0.45, persist=True)
+                                    if det["masks"]:
+                                        # 取面积最大的那个
+                                        areas = [int(m.sum()) for m in det["masks"]]
+                                        j = int(np.argmax(areas))
+                                        m = det["masks"][j]
+                                        if m.shape[:2] != (H, W):
+                                            m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
+                                        new_det_mask = (m > 0).astype(np.uint8)
+
+                                        # 和当前光流多边形的 IoU
+                                        inter = np.logical_and(new_det_mask, poly_mask).sum()
+                                        union = np.logical_or(new_det_mask, poly_mask).sum() + 1e-6
+                                        iou   = inter / union
+
+                                        # IoU 太低，说明漂了：用 YOLOE 的掩膜重播种光流
+                                        # 降低阈值，让 YOLOE 更容易更新光流
+                                        if iou < 0.5:  # 从 IOU_MIN_KEEP (0.20) 提高到 0.5
+                                            need_reseed = True
+                                            # 用新掩膜的「内边界特征点」播种
+                                            edge_mask = inner_offset_edge(new_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                                            gray2 = gray  # 本帧灰度图已在上面算过
+                                            pts = cv2.goodFeaturesToTrack(gray2, mask=edge_mask, **FEATURE_PARAMS)
+                                            if pts is not None and len(pts) >= 8:
+                                                p0 = pts
+                                                old_gray = gray2
+                                                # 更新 last_mask，便于下游逻辑一致
+                                                last_mask = new_det_mask.copy()
+                                                last_seen_ts = time.time()
+                                                flow_grace = 0
+                                                print("[RESEED] YOLOE 低 IoU 触发重播种（已更新光流特征点）")
+
+                                # 如果这帧没重播种，但 YOLOE 有结果且与 poly 很接近，可以做一次"平滑融合"，抑制抖动
+                                if (not need_reseed) and (new_det_mask is not None):
+                                    inter = np.logical_and(new_det_mask, poly_mask).sum()
+                                    union = np.logical_or(new_det_mask, poly_mask).sum() + 1e-6
+                                    iou   = inter / union
+                                    # 降低融合阈值，让 YOLOE 结果更容易被采用
+                                    if iou < 0.95:  # 从 0.90 提高到 0.95
+                                        # 增加 YOLOE 的权重，让实时检测更明显
+                                        poly_mask = ((0.8 * new_det_mask + 0.2 * poly_mask) > 0.5).astype(np.uint8)
+                                        # 用更新后的 poly_mask 回写到可视化与引导的后续变量（如果你下游用的是 last_detected_mask/last_mask）
+                                        last_mask = poly_mask.copy()
+                                                                                # 更新多边形轮廓，让可视化实时更新
+                                        contours, _ = cv2.findContours(poly_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                                        if contours:
+                                            # 找到最大轮廓
+                                            largest_contour = max(contours, key=cv2.contourArea)
+                                            # 使用精细的轮廓处理，保留更多细节
+                                            epsilon = TRACK_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
+                                            poly = cv2.approxPolyDP(largest_contour, epsilon, True).reshape(-1, 2)
+                                            # 注释掉凸包处理，保留原始轮廓细节
+                                            # hull = cv2.convexHull(poly.reshape(-1,1,2))
+                                            # poly = hull.reshape(-1, 2)
+                                            # 重新计算特征点
+                                            edge_mask = inner_offset_edge(poly_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                                            pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+                                            if pts is not None and len(pts) >= 5:
+                                                p0 = pts
+
+                                # 绘制追踪的多边形 - 使用更粗的线条
+                                cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True, color=(0,255,255), thickness=STROKE_WIDTH)
+                                
+                                # 计算多边形中心
+                                poly_center, poly_area = polygon_center_and_area(poly)
+                                
+                                if poly_center:
+                                    object_center = (int(poly_center[0]), int(poly_center[1]))
+                                    
+                                    # 画面中心
+                                    frame_center = (W // 2, H // 2)
+                                    
+                                    # 绘制物品中心点
+                                    cv2.circle(vis, object_center, 8, (0, 255, 0), -1)
+                                    cv2.circle(vis, object_center, 12, (0, 255, 0), 2)
+                                    
+                                    # 绘制画面中心十字
+                                    cv2.line(vis, (frame_center[0] - 20, frame_center[1]), 
+                                            (frame_center[0] + 20, frame_center[1]), (255, 255, 255), 2)
+                                    cv2.line(vis, (frame_center[0], frame_center[1] - 20), 
+                                            (frame_center[0], frame_center[1] + 20), (255, 255, 255), 2)
+                                    
+                                    # 绘制引导虚线
+                                    draw_dashed_line(vis, object_center, frame_center, 
+                                                   color=(255, 255, 0), thickness=2, 
+                                                   dash_length=10, gap_length=5)
+                                    
+                                    # 获取引导方向
+                                    direction, is_centered = get_center_guidance(object_center, frame_center, center_threshold)
+                                    
+                                    if not center_reached:
+                                        if is_centered:
+                                            # 到达中心，播放OK音效
+                                            center_reached = True
+                                            last_center_guide_time = t_now
+                                            play_guidance_audio("OK")
+                                            try:
+                                                bridge_io.send_ui_final("✓ 物品已居中！")
+                                            except Exception:
+                                                pass
+                                            draw_text_cn(vis, "✓ 物品已居中！", (10, 60), font_size=18, color=FRONTEND_COLORS["ok"]) 
+                                        else:
+                                            # 显示引导文字
+                                            msg = f"请将物品移到画面中心: {direction}"
+                                            try:
+                                                # 节流：每次语音播报也推一次final
+                                                if t_now - last_center_guide_time > GUIDANCE_INTERVAL_SEC:
+                                                    bridge_io.send_ui_final(msg)
+                                            except Exception:
+                                                pass
+                                            draw_text_cn(vis, msg, 
+                                                       (10, 40), font_size=18, color=FRONTEND_COLORS["text"])
+                                            
+                                            # 显示距离信息
+                                            dx = frame_center[0] - object_center[0]
+                                            dy = frame_center[1] - object_center[1]
+                                            distance = int(np.sqrt(dx**2 + dy**2))
+                                            draw_text_cn(vis, f"距离: {distance}px", 
+                                                       (10, 60), font_size=16, color=FRONTEND_COLORS["muted"])
+                                            
+                                            # 播放语音引导
+                                            if t_now - last_center_guide_time > GUIDANCE_INTERVAL_SEC:
+                                                play_guidance_audio(direction)
+                                                last_center_guide_time = t_now
+                                    else:
+                                        # 已经居中，显示成功信息
+                                        try:
+                                            bridge_io.send_ui_final("✓ 物品已成功移到中心！")
+                                        except Exception:
+                                            pass
+                                        draw_text_cn(vis, "✓ 物品已成功移到中心！", 
+                                                   (10, 60), font_size=18, color=FRONTEND_COLORS["ok"])
+                                        
+                                        # 等待1秒后进入手部追踪模式
+                                        if t_now - last_center_guide_time > 1.0:
+                                            print("[CENTER] 进入手部追踪模式")
+                                            try:
+                                                bridge_io.send_ui_final("进入手部追踪模式")
+                                            except Exception:
+                                                pass
+                                            MODE = "TRACK"
+                                            # 保持当前的光流追踪状态
+                                else:
+                                    # 多边形中心计算失败，显示警告
+                                    draw_text_cn(vis, "正在追踪物体...", (10, 100), font_size=20, color=(255, 255, 0))
+                        else:
+                            # 光流点数不足，尝试重新检测
+                            MODE = "SEGMENT"
+                            old_gray = None
+                            p0 = None
+                            print("[CENTER] 光流追踪失败，返回检测模式")
+                
+                old_gray = gray
+
+            else:  # MODE == "TRACK"
+                # 手部追踪模式（原有逻辑保持不变）
+                align_score = 0.0
+                range_score = 0.0
+                ratio = None
+
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                track_frame_count += 1
+
+                relock_done = False
+                poly_center = None
+                poly_area = 0.0
+
+                # 初始化camera_movement为默认值
+                camera_movement = np.array([0.0, 0.0])
+                
+                # 初始化或更新背景参考点（在物体多边形外部取点）
+                if background_points is None or track_frame_count % 30 == 0:
+                    # 在画面四角取一些背景特征点
+                    mask_for_bg = np.ones((H, W), dtype=np.uint8) * 255
+                    if last_poly_box:
+                        x, y, w, h = last_poly_box
+                        # 扩大区域，排除物体和手
+                        expand = 100
+                        x1 = max(0, x - expand)
+                        y1 = max(0, y - expand)
+                        x2 = min(W, x + w + expand)
+                        y2 = min(H, y + h + expand)
+                        mask_for_bg[y1:y2, x1:x2] = 0
+                    
+                    # 在背景区域提取特征点
+                    try:
+                        bg_pts = cv2.goodFeaturesToTrack(gray, maxCorners=20, 
+                                                       qualityLevel=0.1, 
+                                                       minDistance=30, 
+                                                       mask=mask_for_bg)
+                        if bg_pts is not None and len(bg_pts) >= 5:
+                            background_points = bg_pts
+                            old_background_gray = gray.copy()
+                    except Exception as e:
+                        #print(f"[TRACK] 背景特征点提取失败: {e}")
+                        background_points = None
+                
+                # 计算背景移动（相机移动）
+                if old_background_gray is not None and background_points is not None and len(background_points) > 0:
+                    try:
+                        bg_p1, bg_st, _ = cv2.calcOpticalFlowPyrLK(
+                            old_background_gray, gray, background_points, None, **LK_PARAMS
+                        )
+                        if bg_p1 is not None and bg_st is not None:
+                            good_bg_old = background_points[bg_st == 1]
+                            good_bg_new = bg_p1[bg_st == 1]
+                            if len(good_bg_new) >= 3 and len(good_bg_old) >= 3:
+                                # 计算背景的平均移动
+                                bg_movement = np.mean(good_bg_new - good_bg_old, axis=0)
+                                camera_movement = bg_movement.reshape(2)
+                                background_points = good_bg_new.reshape(-1, 1, 2)
+                                old_background_gray = gray.copy()
+                    except Exception as e:
+                        print(f"[TRACK] 背景光流计算失败: {e}")
+                        camera_movement = np.array([0.0, 0.0])
+
+                if old_gray is not None and p0 is not None and len(p0) >= 5:
+                    p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, gray, p0, None, **LK_PARAMS)
+                    if p1 is not None and st is not None:
+                        good_new = p1[st == 1]
+                        if len(good_new) >= 5:
+                            p0 = good_new.reshape(-1, 1, 2)
+                            hull = cv2.convexHull(good_new.reshape(-1,1,2))
+                            poly = hull.reshape(-1, 2)
+                            
+                            if len(poly) >= 3:
+                                # 统一的 YOLOE 实时检测和校正（每帧）
+                                latest_det_mask = None
+                                if use_yoloe and yoloe_backend is not None:
+                                    # 添加调试信息
+                                    if track_frame_count % 30 == 0:  # 每30帧打印一次
+                                        print(f"[YOLOE] TRACK模式实时检测第 {track_frame_count} 帧")
+                                    
+                                    # YOLOE 实时检测（统一调用，避免重复）
+                                    det = yoloe_backend.segment(frame, conf=YOLO_CORRECTION_CONF_THRESHOLD, iou=0.45, persist=True)
+                                    if det["masks"]:
+                                        # 取面积最大的那个
+                                        areas = [int(m.sum()) for m in det["masks"]]
+                                        j = int(np.argmax(areas))
+                                        m = det["masks"][j]
+                                        if m.shape[:2] != (H, W):
+                                            m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
+                                        latest_det_mask = (m > 0).astype(np.uint8)
+                                        
+                                        # 和当前光流多边形的 IoU
+                                        poly_mask = np.zeros((H, W), dtype=np.uint8)
+                                        cv2.fillPoly(poly_mask, [poly.astype(np.int32)], 1)
+                                        inter = np.logical_and(latest_det_mask, poly_mask).sum()
+                                        union = np.logical_or(latest_det_mask, poly_mask).sum() + 1e-6
+                                        iou = inter / union
+                                        
+                                        # 降低IoU阈值，更积极地校正
+                                        if iou > YOLO_CORRECTION_IOU_THRESHOLD:  # 使用可配置阈值
+                                            # 用 YOLOE 结果更新多边形
+                                            contours, _ = cv2.findContours(latest_det_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+                                            if contours:
+                                                largest_contour = max(contours, key=cv2.contourArea)
+                                                # 使用更精细的轮廓处理，减少过度简化
+                                                epsilon = TRACK_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
+                                                poly = cv2.approxPolyDP(largest_contour, epsilon, True).reshape(-1, 2)
+                                                
+                                                # 更新光流特征点
+                                                edge_mask = inner_offset_edge(latest_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                                                pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+                                                if pts is not None and len(pts) >= 5:
+                                                    p0 = pts
+                                                    #print(f"[TRACK] YOLOE 实时校正，IoU: {iou:.3f}")
+                                
+                                # 检查是否接触，决定轮廓颜色
+                                is_touching = False
+                                overlap_ratio = 0.0
+                                if hand_box is not None and poly is not None:
+                                    is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
+                                
+                                # 绘制多边形（可能已被 YOLOE 更新）- 使用更粗的线条
+                                if is_touching:
+                                    # 接触时用亮绿色，并添加发光效果
+                                    poly_color = (0, 255, 127)
+                                    # 绘制一个更粗的外层轮廓作为发光效果
+                                    cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True, 
+                                                color=(127, 255, 127), thickness=STROKE_WIDTH + 4)
+                                    # 添加半透明的填充效果
+                                    overlay = vis.copy()
+                                    cv2.fillPoly(overlay, [poly.astype(np.int32)], (0, 255, 0))
+                                    cv2.addWeighted(overlay, 0.15, vis, 0.85, 0, vis)
+                                else:
+                                    # 未接触时用普通绿色
+                                    poly_color = (0, 255, 0)
+                                cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True, color=poly_color, thickness=STROKE_WIDTH)
+                                # 多边形质心与面积
+                                poly_center, poly_area = polygon_center_and_area(poly)
+                                if poly_center:
+                                    pc = (int(poly_center[0]), int(poly_center[1]))
+                                    cv2.circle(vis, pc, 6, (0,255,0), -1)
+
+                                # 多边形外接矩形（用于周边监控）
+                                x, y, w, h = cv2.boundingRect(poly.astype(np.int32))
+                                last_poly_box = (x, y, w, h)
+
+                                # ====== 对齐分数（第一条）======
+                                if hand_center and poly_center:
+                                    hc = np.array(hand_center, dtype=np.float32)
+                                    oc = np.array(poly_center, dtype=np.float32)
+                                    dist = float(np.linalg.norm(oc - hc))
+                                    diag = float(np.linalg.norm([W, H]))
+                                    align_score = 1.0 - min(dist/(ALIGN_LOOSE_PCT*diag + 1e-6), 1.0)
+                                    
+                                    # 绘制虚线引导（替代原来的实线箭头）
+                                    draw_dashed_line(vis, (hc[0], hc[1]), (oc[0], oc[1]), 
+                                                   color=(255, 255, 0), thickness=2, 
+                                                   dash_length=15, gap_length=10)
+                                    
+                                    # 方向引导
+                                    direction, secondary = get_guidance_direction(
+                                        hand_center, poly_center, hand_area, poly_area,
+                                        hand_box, poly
+                                    )
+                                    
+                                    if direction and direction != "保持":
+                                        # 根据是否接触显示不同颜色
+                                        if direction == "向前":
+                                            # 手已经接触物体，用绿色显示
+                                            guide_color = (0, 255, 0)  # 绿色
+                                            draw_text_cn(vis, f"引导: {direction} - 伸手抓取", (W//2 - 80, 40), 
+                                                       font_size=24, color=guide_color, stroke=(0, 0, 0))
+                                        else:
+                                            # 还未接触，用黄色显示
+                                            guide_color = (0, 255, 255)  # 黄色
+                                            draw_text_cn(vis, f"引导: {direction}", (W//2 - 60, 40), 
+                                                       font_size=24, color=guide_color, stroke=(0, 0, 0))
+                                        
+                                        # 显示次要信息（接触度或其他方向）
+                                        if secondary:
+                                            if isinstance(secondary, str):
+                                                # 接触度信息
+                                                draw_text_cn(vis, secondary, (W//2 - 60, 70), 
+                                                           font_size=18, color=(0, 255, 0))
+                                            else:
+                                                # 其他方向信息
+                                                draw_text_cn(vis, f"（或 {secondary}）", (W//2 - 60, 70), 
+                                                           font_size=18, color=(200, 200, 200))
+                                        
+                                        # 播放语音引导 - 确保每个方向都会播放
+                                        if t_now - last_guidance_time > GUIDANCE_INTERVAL_SEC:
+                                            # 检查方向是否改变，或者时间间隔足够
+                                            if direction != last_guidance_direction or t_now - last_guidance_time > GUIDANCE_INTERVAL_SEC * 2:
+                                                play_guidance_audio(direction)
+                                                last_guidance_direction = direction
+                                                last_guidance_time = t_now
+                                                print(f"[GUIDE] 播放引导音频: {direction}")
+                                else:
+                                    align_score = 0.0
+
+                                # 显示接触状态
+                                is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
+                                if is_touching:
+                                    draw_text_cn(vis, f"状态: 已接触 ({overlap_ratio:.1%})", (10, 95), 
+                                               font_size=16, color=(0, 255, 0))
+                                else:
+                                    # 计算手和物体的距离
+                                    if hand_center and poly_center:
+                                        distance = np.sqrt((hand_center[0] - poly_center[0])**2 + 
+                                                         (hand_center[1] - poly_center[1])**2)
+                                        draw_text_cn(vis, f"距离: {distance:.0f}px", (10, 95), 
+                                                   font_size=16, color=FRONTEND_COLORS["muted"])
+
+                                # 成功条件：握持（放宽）
+                                if (_last_result and _last_result[0].hand_landmarks and len(_last_result[0].hand_landmarks) > 0):
+                                    l0 = _last_result[0].hand_landmarks[0]
+                                    grasp_now, grasp_score = detect_grasp(l0, W, H)
+                                else:
+                                    grasp_now, grasp_score = False, 0.0
+             
+                                # guidance_msg 相关代码已经集成到上面的引导逻辑中
+
+                                # ===== 周边监控 & 重新锁定（复用YOLO结果）=====
+                                if (track_frame_count % PERI_CHECK_EVERY == 0) and (last_poly_box is not None) and (latest_det_mask is not None):
+                                    # 直接使用刚才的YOLO检测结果，避免重复调用
+                                    px, py, pw, ph = last_poly_box
+                                    x0 = max(0, px - PERI_MONITOR_PX)
+                                    y0 = max(0, py - PERI_MONITOR_PX)
+                                    x1 = min(W - 1, px + pw + PERI_MONITOR_PX)
+                                    y1 = min(H - 1, py + ph + PERI_MONITOR_PX)
+                                    
+                                    # 检查周边区域是否有更好的检测结果
+                                    peri_area = latest_det_mask[y0:y1, x0:x1].sum()
+                                    total_area = latest_det_mask.sum()
+                                    
+                                    # 如果周边区域有显著检测结果，重新锁定
+                                    if peri_area > total_area * 0.1:  # 周边有10%以上的检测面积
+                                        edge_mask = inner_offset_edge(latest_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                                        pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+                                        if pts is not None and len(pts) >= 8:
+                                            p0 = pts
+                                            old_gray = gray
+                                            lock_edge_debug = edge_mask.copy()
+                                            #print(f"[PERI] 周边重锁定，特征点数={len(p0)}")
+                            else:
+                                MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
+                        else:
+                            MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
+                    else:
+                        MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
+                else:
+                    MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
+
+  
+
+                if MODE == "SEGMENT":
+                    draw_text_cn(vis, "追踪丢失 → 正在重新识别。按 Enter 重新锁定", (10, 100), font_size=22, color=(0,0,255))
+
+                old_gray = gray
+
+            # FPS（移动到左下角样式）
+            if 'fps_hist' not in locals():
+                fps_hist = []
+            fps_hist.append(t_now)
+            if len(fps_hist) > 30:
+                fps_hist.pop(0)
+            fps = 0.0 if len(fps_hist) < 2 else (len(fps_hist)-1)/(fps_hist[-1]-fps_hist[0])
+            draw_text_cn(vis, f"FPS: {fps:.1f}", (10, 40), font_size=16, color=FRONTEND_COLORS["ok"]) 
+
+            # 右下角显示"内边界/最近一次锁定"的调试图
+            if lock_edge_debug is not None:
+                # 极小缩放并放在右下角
+                small = cv2.resize(lock_edge_debug, (0,0), fx=0.22, fy=0.22, interpolation=cv2.INTER_NEAREST)
+                sh, sw = small.shape[:2]
+                small_bgr = cv2.cvtColor(small, cv2.COLOR_GRAY2BGR)
+                # 右下角位置，留 10-12px 边距
+                x1 = max(8, W - sw - 12)
+                y1 = max(8, H - sh - 12)
+                y2 = y1 + sh
+                x2 = x1 + sw
+                vis[y1:y2, x1:x2] = small_bgr
+                # 标签置于图上方紧贴，使用更小字号
+                #draw_text_cn(vis, "内边界", (x1, y1 - 8), font_size=12, color=FRONTEND_COLORS["muted"], ui_hint=False)
+
+            # 底部中间的"当前指令"按钮（始终绘制，文案随音频同步）
+            draw_command_pill(vis, CURRENT_COMMAND_TEXT)
+
+            # 展示（无论 headless 与否，都会推给前端）
+            bridge_io.send_vis_bgr(vis)
+
+            # [headless] 只有非 headless 时才弹窗与键盘交互；headless 下用 waitKey(1) 让出调度
+            if not headless:
+                cv2.imshow(WINDOW, vis)
+                key = cv2.waitKey(1) & 0xFF
+                if key in (27, ord('q')):
+                    break
+                elif key == ord('r'):
+                    MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
+                elif key == 13:  # Enter：从 SEGMENT 锁定并开始 TRACK（内收 5px）
+                    if MODE == "SEGMENT":
+                        # 使用 YOLOE 进行手动锁定
+                        if use_yoloe and yoloe_backend is not None:
+                            det = yoloe_backend.segment(frame, conf=CONF_THRESHOLD, iou=0.45, persist=True)
+                            if det["masks"]:
+                                # 取面积最大的那个
+                                areas = [int(m.sum()) for m in det["masks"]]
+                                j = int(np.argmax(areas))
+                                m = det["masks"][j]
+                                if m.shape[:2] != (H, W):
+                                    m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
+                                best_mask = (m > 0.5).astype(np.uint8)
+                            else:
+                                best_mask = None
+                        else:
+                            best_mask = None
+                        if best_mask is not None:
+                            edge_mask = inner_offset_edge(best_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
+                            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                            pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
+                            if pts is not None and len(pts) >= 8:
+                                p0 = pts
+                                old_gray = gray
+                                MODE = "TRACK"
+                                lock_edge_debug = edge_mask.copy()
+                                track_frame_count = 0
+                                print(f"[LOCK] 内边界特征点数={len(p0)} → TRACK")
+                            else:
+                                print("[LOCK] 内边界特征点不足，请调整画面后重试。")
+                        else:
+                            print("[LOCK] 当前帧未找到有效分割，请重试。")
+            else:
+                # headless 下也调用一次 waitKey(1)，让 OpenCV 的计时器/回调得到机会，且避免 CPU 忙等
+                cv2.waitKey(1)
+                
+                # 在 headless 模式下检查停止事件
+                if stop_event and stop_event.is_set():
+                    print("[YOLOMEDIA] Received stop signal in headless mode")
+                    break
+
+    finally:
+        try:
+            landmarker.close()
+        except Exception:
+            pass
+        #cap.release()
+        # [headless] 仅在非 headless 时销毁窗口
+        if not headless:
+            cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    main()