1968 lines
86 KiB
Python
1968 lines
86 KiB
Python
# app_main.py
|
||
# -*- coding: utf-8 -*-
|
||
|
||
# Day 26: 抑制 TensorRT 冗余日志(必须在任何 TensorRT 相关 import 之前)
|
||
import os
|
||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # 抑制 TensorFlow 警告
|
||
try:
|
||
import tensorrt as trt
|
||
# 设置 TensorRT 日志级别为 WARNING(抑制 INFO 级别的重复加载日志)
|
||
trt.Logger.min_severity = trt.Logger.WARNING
|
||
except ImportError:
|
||
pass # TensorRT 未安装,跳过
|
||
|
||
import sys, time, json, asyncio, base64, audioop
|
||
from typing import Any, Dict, Optional, Tuple, List, Callable, Set, Deque
|
||
from collections import deque
|
||
from dataclasses import dataclass
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
import re
|
||
from dotenv import load_dotenv
|
||
|
||
# 加载环境变量 (Day 18: 修复 GPU 选择等配置不生效的问题)
|
||
load_dotenv()
|
||
# 在其它 import 之后加:
|
||
|
||
from qwen_extractor import extract_english_label
|
||
from navigation_master import NavigationMaster, OrchestratorResult
|
||
# 新增:导入盲道导航器
|
||
from workflow_blindpath import BlindPathNavigator
|
||
# 新增:导入过马路导航器
|
||
from workflow_crossstreet import CrossStreetNavigator
|
||
import torch
|
||
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request
|
||
from fastapi.responses import HTMLResponse, PlainTextResponse
|
||
from fastapi.staticfiles import StaticFiles
|
||
from starlette.websockets import WebSocketState
|
||
import uvicorn
|
||
import cv2
|
||
import numpy as np
|
||
from contextlib import asynccontextmanager
|
||
|
||
# Server Optimization: Global Context & Logging
|
||
from server_context import ctx
|
||
import logging
|
||
from logging.handlers import TimedRotatingFileHandler
|
||
|
||
# Configure Logging
|
||
# Ensure logs directory exists
|
||
os.makedirs("logs", exist_ok=True)
|
||
|
||
log_formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
|
||
log_handler = TimedRotatingFileHandler("logs/naviglass.log", when="midnight", interval=1, backupCount=7)
|
||
log_handler.setFormatter(log_formatter)
|
||
logger = logging.getLogger()
|
||
logger.setLevel(logging.INFO)
|
||
logger.addHandler(log_handler)
|
||
# Add console handler
|
||
console_handler = logging.StreamHandler()
|
||
console_handler.setFormatter(log_formatter)
|
||
logger.addHandler(console_handler)
|
||
|
||
print = logger.info # Redirect print to logger (simple compatibility hack)
|
||
|
||
# Server Optimization: Dynamic Import Optimization (Fail Fast)
|
||
try:
|
||
import trafficlight_detection
|
||
except ImportError as e:
|
||
logger.warning(f"[INIT] 警告: trafficlight_detection 导入失败: {e}")
|
||
trafficlight_detection = None
|
||
|
||
# try:
|
||
# from hand_landmarker import HandLandmarker
|
||
# hand_detector = HandLandmarker()
|
||
# print("[OLD_AI] HandLandmarker loaded.")
|
||
# except ImportError as e:
|
||
# # logger.warning(f"[INIT] 警告: hand_landmarker 导入失败: {e}")
|
||
# hand_detector = None
|
||
hand_detector = None
|
||
|
||
# 【Day 19 优化】TurboJPEG - 比 cv2.imencode/imdecode 快 2-3 倍
|
||
# Day 20: TensorRT 模型加载工具
|
||
from model_utils import get_best_model_path
|
||
try:
|
||
from turbojpeg import TurboJPEG
|
||
_turbo_jpeg = TurboJPEG()
|
||
print("[INIT] TurboJPEG 加载成功,JPEG 编解码将使用加速版本")
|
||
except ImportError:
|
||
_turbo_jpeg = None
|
||
print("[INIT] TurboJPEG 未安装,使用 cv2 作为回退 (pip install PyTurboJPEG)")
|
||
|
||
from ultralytics import YOLO
|
||
from obstacle_detector_client import ObstacleDetectorClient
|
||
from contextlib import asynccontextmanager
|
||
|
||
# Day 18: 删除了重复的 import torch(已在 L17 导入)
|
||
|
||
|
||
import mediapipe as mp
|
||
import bridge_io
|
||
import threading
|
||
import yolomedia # 确保和 app_main.py 同目录,文件名就是 yolomedia.py
|
||
# ---- Windows 事件循环策略 ----
|
||
if sys.platform.startswith("win"):
|
||
try:
|
||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||
except Exception:
|
||
pass
|
||
|
||
# ---- .env ----
|
||
try:
|
||
from dotenv import load_dotenv
|
||
load_dotenv()
|
||
except Exception:
|
||
pass
|
||
|
||
# ---- Day 21: 新 AI 管道 (SenseVoice + GLM-4.6v-Flash + EdgeTTS) ----
|
||
# 选择使用新的 AI 管道还是旧的 DashScope/Omni
|
||
USE_NEW_AI_PIPELINE = os.getenv("USE_NEW_AI_PIPELINE", "1") == "1"
|
||
|
||
if USE_NEW_AI_PIPELINE:
|
||
# 新管道:本地 ASR + GLM + EdgeTTS + 服务器端 VAD
|
||
from sensevoice_asr import recognize as sensevoice_recognize, init_sensevoice
|
||
from glm_client import chat as glm_chat
|
||
from edge_tts_client import text_to_speech_pcm_stream
|
||
from server_vad import get_server_vad, reset_server_vad
|
||
print("[AI Pipeline] 使用新管道: SenseVoice + GLM-4.6v-Flash + EdgeTTS + Server VAD")
|
||
else:
|
||
# 旧管道:DashScope ASR + Omni
|
||
from dashscope import audio as dash_audio
|
||
API_KEY = os.getenv("DASHSCOPE_API_KEY", "sk-a9440db694924559ae4ebdc2023d2b9a")
|
||
MODEL = "paraformer-realtime-v2"
|
||
from omni_client import stream_chat, OmniStreamPiece
|
||
from asr_core import ASRCallback, set_current_recognition, stop_current_recognition
|
||
print("[AI Pipeline] 使用旧管道: DashScope + Qwen-Omni")
|
||
|
||
# 通用常量
|
||
AUDIO_FMT = "pcm"
|
||
SAMPLE_RATE = 16000
|
||
SILENCE_CHUNK = b'\x00' * 640 # 20ms 静音
|
||
|
||
# 兼容层:当使用新管道时,提供 ASR 相关函数的 stub
|
||
if USE_NEW_AI_PIPELINE:
|
||
# 新管道不使用流式 ASR,但需要保持函数存在避免导入错误
|
||
async def set_current_recognition(rec): pass
|
||
async def stop_current_recognition(): pass
|
||
class ASRCallback:
|
||
def __init__(self, **kwargs): pass
|
||
|
||
|
||
from audio_stream import (
|
||
hard_reset_audio,
|
||
BYTES_PER_20MS_16K,
|
||
is_playing_now,
|
||
current_ai_task,
|
||
register_stream_route,
|
||
broadcast_pcm16_realtime,
|
||
)
|
||
from audio_player import initialize_audio_system, play_voice_text
|
||
|
||
# ---- 同步录制器 ----
|
||
import sync_recorder
|
||
import signal
|
||
import atexit
|
||
|
||
# ---- IMU UDP ----
|
||
UDP_IP = "0.0.0.0"
|
||
UDP_PORT = 12345
|
||
|
||
|
||
# ---- 【新】lifespan 管理器(替代 on_event) ----
|
||
@asynccontextmanager
|
||
async def lifespan(app: FastAPI):
|
||
"""应用生命周期管理器 - 替代 on_event 装饰器"""
|
||
# === 启动逻辑(原 @app.on_event("startup") 的内容) ===
|
||
print("[LIFESPAN] 应用启动中...")
|
||
|
||
# 1. 注册 bridge_io 发送回调
|
||
main_loop = asyncio.get_event_loop()
|
||
|
||
def _sender(jpeg_bytes: bytes):
|
||
try:
|
||
if main_loop.is_closed():
|
||
return
|
||
|
||
# global yolomedia_sending_frames (Moved to ctx)
|
||
if not ctx.yolomedia_sending_frames:
|
||
ctx.yolomedia_sending_frames = True
|
||
logger.info("[YOLOMEDIA] 开始发送处理后的帧,切换到YOLO画面")
|
||
|
||
async def _broadcast():
|
||
if not ctx.camera_viewers:
|
||
return
|
||
dead = []
|
||
for ws in list(ctx.camera_viewers):
|
||
try:
|
||
await ws.send_bytes(jpeg_bytes)
|
||
except Exception:
|
||
dead.append(ws)
|
||
for ws in dead:
|
||
try:
|
||
ctx.camera_viewers.remove(ws)
|
||
except Exception:
|
||
pass
|
||
|
||
future = asyncio.run_coroutine_threadsafe(_broadcast(), main_loop)
|
||
except Exception as e:
|
||
if "Event loop is closed" not in str(e):
|
||
print(f"[DEBUG] _sender error: {e}", flush=True)
|
||
|
||
bridge_io.set_sender(_sender)
|
||
|
||
# 2. 初始化音频系统(后台线程)
|
||
def _init_audio():
|
||
try:
|
||
initialize_audio_system()
|
||
except Exception as e:
|
||
print(f"[AUDIO] 初始化失败: {e}")
|
||
|
||
threading.Thread(target=_init_audio, daemon=True).start()
|
||
|
||
# 3. 启动 UDP 服务器
|
||
loop = asyncio.get_running_loop()
|
||
await loop.create_datagram_endpoint(lambda: UDPProto(), local_addr=(UDP_IP, UDP_PORT))
|
||
|
||
# 4. Day 21: 预加载新 AI 管道模型(避免首次使用时延迟)
|
||
if USE_NEW_AI_PIPELINE:
|
||
async def _preload_models():
|
||
try:
|
||
print("[PRELOAD] 预加载 Silero VAD...")
|
||
from server_vad import get_server_vad
|
||
get_server_vad() # 触发 VAD 模型加载
|
||
|
||
print("[PRELOAD] 预加载 SenseVoice ASR...")
|
||
from sensevoice_asr import init_sensevoice
|
||
await init_sensevoice() # 异步加载 ASR 模型
|
||
|
||
print("[PRELOAD] 新 AI 管道模型预加载完成")
|
||
except Exception as e:
|
||
print(f"[PRELOAD] 模型预加载失败: {e}")
|
||
|
||
# 后台预加载,不阻塞启动
|
||
asyncio.create_task(_preload_models())
|
||
|
||
print("[LIFESPAN] 应用启动完成")
|
||
|
||
# === yield 表示应用开始运行 ===
|
||
# Day 13: 使用 try-finally 确保关闭逻辑执行,并捕获 CancelledError
|
||
try:
|
||
yield
|
||
except asyncio.CancelledError:
|
||
# Ctrl+C 时 Starlette 会取消 lifespan,这是正常行为
|
||
pass
|
||
finally:
|
||
# === 关闭逻辑(原 @app.on_event("shutdown") 的内容) ===
|
||
print("[LIFESPAN] 应用关闭中...")
|
||
|
||
# 停止YOLO媒体处理
|
||
try:
|
||
stop_yolomedia()
|
||
except Exception:
|
||
pass
|
||
|
||
# 停止音频和AI任务
|
||
try:
|
||
await hard_reset_audio("shutdown")
|
||
except Exception:
|
||
pass
|
||
|
||
# 【Day 15】关闭帧处理线程池
|
||
try:
|
||
ctx.frame_processing_executor.shutdown(wait=False)
|
||
logger.info("[LIFESPAN] 帧处理线程池已关闭")
|
||
except Exception:
|
||
pass
|
||
|
||
print("[LIFESPAN] 应用关闭完成")
|
||
|
||
|
||
# Day 13: 强制退出进程,避免 uvicorn 挂起
|
||
# 注意:不能在这里 import threading 或 os,否则会破坏 Python 作用域
|
||
# 顶层已经导入了这些模块
|
||
def _force_exit():
|
||
import time as _time
|
||
import os as _os
|
||
_time.sleep(0.5) # 给其他清理一点时间
|
||
_os._exit(0)
|
||
import threading as _threading
|
||
_threading.Thread(target=_force_exit, daemon=True).start()
|
||
|
||
|
||
app = FastAPI(lifespan=lifespan)
|
||
|
||
# ====== 状态与容器 ======
|
||
# 全局变量已移至 server_context.py (ctx 单例)
|
||
# 挂载静态文件
|
||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||
|
||
# 【Day 19 优化】TurboJPEG 辅助函数 - 带回退逻辑
|
||
|
||
# 【Day 18 性能优化】并行广播辅助函数 - 解决 WebSocket 顺序发送阻塞
|
||
|
||
# 【Day 19 优化】TurboJPEG 辅助函数 - 带回退逻辑
|
||
def turbo_decode(jpeg_bytes: bytes):
|
||
"""解码 JPEG 为 BGR numpy 数组,优先使用 TurboJPEG"""
|
||
if _turbo_jpeg:
|
||
return _turbo_jpeg.decode(jpeg_bytes)
|
||
else:
|
||
arr = np.frombuffer(jpeg_bytes, dtype=np.uint8)
|
||
return cv2.imdecode(arr, cv2.IMREAD_COLOR)
|
||
|
||
def turbo_encode(bgr_image, quality: int = 80) -> bytes:
|
||
"""编码 BGR numpy 数组为 JPEG bytes,优先使用 TurboJPEG"""
|
||
if _turbo_jpeg:
|
||
return _turbo_jpeg.encode(bgr_image, quality=quality)
|
||
else:
|
||
ok, enc = cv2.imencode(".jpg", bgr_image, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
|
||
return enc.tobytes() if ok else None
|
||
|
||
async def _broadcast_to_viewers(jpeg_data: bytes) -> None:
|
||
"""并行向所有 viewer 广播 JPEG 帧,避免顺序 await 阻塞事件循环"""
|
||
if not ctx.camera_viewers or not jpeg_data:
|
||
return
|
||
|
||
viewers = list(ctx.camera_viewers)
|
||
if not viewers:
|
||
return
|
||
|
||
# 使用 asyncio.gather 并行发送,return_exceptions=True 确保单个失败不影响其他
|
||
async def _safe_send(ws):
|
||
try:
|
||
await ws.send_bytes(jpeg_data)
|
||
return None
|
||
except Exception:
|
||
return ws # 返回失败的 ws 以便移除
|
||
|
||
results = await asyncio.gather(*[_safe_send(ws) for ws in viewers], return_exceptions=True)
|
||
|
||
# 清理失败的连接
|
||
for r in results:
|
||
if r is not None and r in ctx.camera_viewers:
|
||
try:
|
||
ctx.camera_viewers.discard(r)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def load_navigation_models():
|
||
"""加载盲道导航所需的模型"""
|
||
# global yolo_seg_model, obstacle_detector (Moved to ctx)
|
||
|
||
try:
|
||
seg_model_path = os.getenv("BLIND_PATH_MODEL", "model/yolo-seg.pt")
|
||
# Day 20: 优先使用 TensorRT 引擎
|
||
seg_model_path = get_best_model_path(seg_model_path)
|
||
#print(f"[NAVIGATION] 尝试加载模型: {seg_model_path}")
|
||
|
||
if os.path.exists(seg_model_path):
|
||
print(f"[NAVIGATION] 模型文件存在,开始加载...")
|
||
ctx.yolo_seg_model = YOLO(seg_model_path)
|
||
|
||
# Day 20: TensorRT 引擎不需要 .to() 和 .fuse()
|
||
from model_utils import is_tensorrt_engine
|
||
if is_tensorrt_engine(seg_model_path):
|
||
print(f"[NAVIGATION] TensorRT 引擎已加载,跳过 .to() 和 .fuse()")
|
||
elif torch.cuda.is_available():
|
||
ctx.yolo_seg_model.to("cuda")
|
||
# Day 22 优化: 融合模型层以提升推理速度
|
||
try:
|
||
ctx.yolo_seg_model.fuse()
|
||
print(f"[NAVIGATION] 模型层融合完成")
|
||
except Exception as e:
|
||
print(f"[NAVIGATION] 模型融合失败(非致命): {e}")
|
||
print(f"[NAVIGATION] 盲道分割模型加载成功并放到GPU: {ctx.yolo_seg_model.device}")
|
||
else:
|
||
print("[NAVIGATION] CUDA不可用,模型仍在CPU")
|
||
|
||
# Day 22 优化: 使用配置的输入尺寸进行预热,并启用FP16
|
||
try:
|
||
imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
|
||
use_half = os.getenv("AIGLASS_YOLO_HALF", "1") == "1"
|
||
test_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
|
||
|
||
# 预热推理,让CUDA编译kernel
|
||
for _ in range(3): # 多次预热确保稳定
|
||
results = ctx.yolo_seg_model.predict(
|
||
test_img,
|
||
device="cuda" if torch.cuda.is_available() else "cpu",
|
||
verbose=False,
|
||
imgsz=imgsz,
|
||
half=use_half
|
||
)
|
||
print(f"[NAVIGATION] 模型预热成功 (imgsz={imgsz}, half={use_half})")
|
||
print(f"[NAVIGATION] 支持的类别数: {len(ctx.yolo_seg_model.names) if hasattr(ctx.yolo_seg_model, 'names') else '未知'}")
|
||
if hasattr(ctx.yolo_seg_model, 'names'):
|
||
print(f"[NAVIGATION] 模型类别: {ctx.yolo_seg_model.names}")
|
||
except Exception as e:
|
||
print(f"[NAVIGATION] 模型预热失败: {e}")
|
||
else:
|
||
print(f"[NAVIGATION] 错误:找不到模型文件: {seg_model_path}")
|
||
print(f"[NAVIGATION] 当前工作目录: {os.getcwd()}")
|
||
print(f"[NAVIGATION] 请检查文件路径是否正确")
|
||
|
||
# 【修改开始】使用 ObstacleDetectorClient 替代直接的 YOLO
|
||
obstacle_model_path = os.getenv("OBSTACLE_MODEL", "model/yoloe-11l-seg.pt")
|
||
# Day 20: 优先使用 TensorRT 引擎
|
||
obstacle_model_path = get_best_model_path(obstacle_model_path)
|
||
print(f"[NAVIGATION] 尝试加载障碍物检测模型: {obstacle_model_path}")
|
||
|
||
if os.path.exists(obstacle_model_path):
|
||
print(f"[NAVIGATION] 障碍物检测模型文件存在,开始加载...")
|
||
try:
|
||
# 使用 ObstacleDetectorClient 封装的 YOLO-E
|
||
ctx.obstacle_detector = ObstacleDetectorClient(model_path=obstacle_model_path)
|
||
print(f"[NAVIGATION] ========== YOLO-E 障碍物检测器加载成功 ==========")
|
||
|
||
# 检查模型是否成功加载
|
||
if hasattr(ctx.obstacle_detector, 'model') and ctx.obstacle_detector.model is not None:
|
||
print(f"[NAVIGATION] YOLO-E 模型已初始化")
|
||
# Day 20: TensorRT 引擎没有 .parameters(),跳过设备检查
|
||
if not is_tensorrt_engine(obstacle_model_path):
|
||
try:
|
||
print(f"[NAVIGATION] 模型设备: {next(ctx.obstacle_detector.model.parameters()).device}")
|
||
except StopIteration:
|
||
pass
|
||
else:
|
||
print(f"[NAVIGATION] 警告:YOLO-E 模型初始化异常")
|
||
|
||
# 检查白名单是否成功加载
|
||
if hasattr(ctx.obstacle_detector, 'WHITELIST_CLASSES'):
|
||
print(f"[NAVIGATION] 白名单类别数: {len(ctx.obstacle_detector.WHITELIST_CLASSES)}")
|
||
print(f"[NAVIGATION] 白名单前10个类别: {', '.join(ctx.obstacle_detector.WHITELIST_CLASSES[:10])}")
|
||
else:
|
||
print(f"[NAVIGATION] 警告:白名单类别未定义")
|
||
|
||
# 检查文本特征是否成功预计算
|
||
if hasattr(ctx.obstacle_detector, 'whitelist_embeddings') and ctx.obstacle_detector.whitelist_embeddings is not None:
|
||
print(f"[NAVIGATION] YOLO-E 文本特征已预计算")
|
||
print(f"[NAVIGATION] 文本特征张量形状: {ctx.obstacle_detector.whitelist_embeddings.shape if hasattr(ctx.obstacle_detector.whitelist_embeddings, 'shape') else '未知'}")
|
||
else:
|
||
print(f"[NAVIGATION] 警告:YOLO-E 文本特征未预计算")
|
||
|
||
# 测试障碍物检测功能
|
||
print(f"[NAVIGATION] 开始测试 YOLO-E 检测功能...")
|
||
try:
|
||
test_img = np.zeros((640, 640, 3), dtype=np.uint8)
|
||
# 在测试图像中画一个白色矩形,模拟一个物体
|
||
cv2.rectangle(test_img, (200, 200), (400, 400), (255, 255, 255), -1)
|
||
|
||
# 测试检测(不提供 path_mask)
|
||
test_results = ctx.obstacle_detector.detect(test_img)
|
||
print(f"[NAVIGATION] YOLO-E 检测测试成功!")
|
||
print(f"[NAVIGATION] 测试检测结果数: {len(test_results)}")
|
||
|
||
if len(test_results) > 0:
|
||
print(f"[NAVIGATION] 测试检测到的物体:")
|
||
for i, obj in enumerate(test_results):
|
||
print(f" - 物体 {i+1}: {obj.get('name', 'unknown')}, "
|
||
f"面积比例: {obj.get('area_ratio', 0):.3f}, "
|
||
f"位置: ({obj.get('center_x', 0):.0f}, {obj.get('center_y', 0):.0f})")
|
||
except Exception as e:
|
||
print(f"[NAVIGATION] YOLO-E 检测测试失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
print(f"[NAVIGATION] ========== YOLO-E 障碍物检测器加载完成 ==========")
|
||
|
||
except Exception as e:
|
||
print(f"[NAVIGATION] 障碍物检测器加载失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
ctx.obstacle_detector = None
|
||
else:
|
||
print(f"[NAVIGATION] 警告:找不到障碍物检测模型文件: {obstacle_model_path}")
|
||
|
||
except Exception as e:
|
||
print(f"[NAVIGATION] 模型加载失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def load_indoor_model():
|
||
"""Day 25: 加载室内导盲模型"""
|
||
# global indoor_seg_model (Moved to ctx)
|
||
from model_utils import is_tensorrt_engine # Imported here for usage
|
||
|
||
try:
|
||
indoor_model_path = os.getenv("INDOOR_MODEL", "model/yolo11l-seg-indoor14.pt")
|
||
# 优先使用 TensorRT 引擎
|
||
indoor_model_path = get_best_model_path(indoor_model_path)
|
||
print(f"[INDOOR] 尝试加载室内导盲模型: {indoor_model_path}")
|
||
|
||
if os.path.exists(indoor_model_path):
|
||
ctx.indoor_seg_model = YOLO(indoor_model_path)
|
||
|
||
if is_tensorrt_engine(indoor_model_path):
|
||
print(f"[INDOOR] TensorRT 引擎已加载,跳过 .to() 和 .fuse()")
|
||
elif torch.cuda.is_available():
|
||
ctx.indoor_seg_model.to("cuda")
|
||
try:
|
||
ctx.indoor_seg_model.fuse()
|
||
print(f"[INDOOR] 模型层融合完成")
|
||
except Exception as e:
|
||
print(f"[INDOOR] 模型融合失败(非致命): {e}")
|
||
print(f"[INDOOR] 室内导盲模型加载成功并放到GPU: {ctx.indoor_seg_model.device}")
|
||
|
||
# 预热推理
|
||
try:
|
||
imgsz = int(os.getenv("AIGLASS_YOLO_IMGSZ", "480"))
|
||
use_half = os.getenv("AIGLASS_YOLO_HALF", "1") == "1"
|
||
test_img = np.zeros((imgsz, imgsz, 3), dtype=np.uint8)
|
||
for _ in range(2):
|
||
ctx.indoor_seg_model.predict(
|
||
test_img,
|
||
device="cuda" if torch.cuda.is_available() else "cpu",
|
||
verbose=False,
|
||
imgsz=imgsz,
|
||
half=use_half
|
||
)
|
||
print(f"[INDOOR] 模型预热成功 (imgsz={imgsz})")
|
||
if hasattr(ctx.indoor_seg_model, 'names'):
|
||
print(f"[INDOOR] 室内模型类别数: {len(ctx.indoor_seg_model.names)}")
|
||
print(f"[INDOOR] 类别: {list(ctx.indoor_seg_model.names.values())[:5]}...")
|
||
except Exception as e:
|
||
print(f"[INDOOR] 模型预热失败: {e}")
|
||
else:
|
||
print(f"[INDOOR] 警告:找不到室内导盲模型文件: {indoor_model_path}")
|
||
except Exception as e:
|
||
print(f"[INDOOR] 室内模型加载失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
# 在程序启动时加载模型
|
||
print("[NAVIGATION] 开始加载导航模型...")
|
||
load_navigation_models()
|
||
print(f"[NAVIGATION] 模型加载完成 - yolo_seg_model: {ctx.yolo_seg_model is not None}")
|
||
|
||
# Day 25: 加载室内导盲模型
|
||
print("[INDOOR] 开始加载室内导盲模型...")
|
||
load_indoor_model()
|
||
print(f"[INDOOR] 模型加载完成 - indoor_seg_model: {ctx.indoor_seg_model is not None}")
|
||
|
||
# Day 14 优化: 在服务器启动时就预先创建导航器实例,避免客户端连接时延迟
|
||
if ctx.yolo_seg_model is not None and ctx.blind_path_navigator is None:
|
||
print("[NAVIGATION] 预初始化盲道导航器...")
|
||
ctx.blind_path_navigator = BlindPathNavigator(ctx.yolo_seg_model, ctx.obstacle_detector)
|
||
print("[NAVIGATION] 盲道导航器已预初始化")
|
||
|
||
if ctx.yolo_seg_model is not None and ctx.cross_street_navigator is None:
|
||
print("[CROSS_STREET] 预初始化过马路导航器...")
|
||
ctx.cross_street_navigator = CrossStreetNavigator(
|
||
seg_model=ctx.yolo_seg_model,
|
||
coco_model=None,
|
||
obs_model=None
|
||
)
|
||
print("[CROSS_STREET] 过马路导航器已预初始化")
|
||
|
||
# Day 26: 创建室内导航器(使用专用 IndoorNavigator)
|
||
if ctx.indoor_seg_model is not None and ctx.indoor_navigator is None:
|
||
print("[INDOOR] 预初始化室内导航器...")
|
||
from workflow_indoor import IndoorNavigator
|
||
ctx.indoor_navigator = IndoorNavigator(ctx.indoor_seg_model)
|
||
print("[INDOOR] 室内导航器已预初始化")
|
||
|
||
if ctx.orchestrator is None and ctx.blind_path_navigator is not None and ctx.cross_street_navigator is not None:
|
||
print("[NAV MASTER] 预初始化统领状态机...")
|
||
ctx.orchestrator = NavigationMaster(ctx.blind_path_navigator, ctx.cross_street_navigator, indoor_nav=ctx.indoor_navigator)
|
||
print("[NAV MASTER] 统领状态机已预初始化")
|
||
|
||
# 【新增】启动同步录制
|
||
print("[RECORDER] 启动同步录制系统...")
|
||
sync_recorder.start_recording()
|
||
print("[RECORDER] 录制系统已启动,将自动保存视频和音频")
|
||
|
||
# 【新增】注册退出处理器,确保Ctrl+C时保存录制文件
|
||
def cleanup_on_exit():
|
||
"""程序退出时的清理工作"""
|
||
print("\n[SYSTEM] 正在关闭录制器...")
|
||
try:
|
||
sync_recorder.stop_recording()
|
||
print("[SYSTEM] 录制文件已保存")
|
||
except Exception as e:
|
||
print(f"[SYSTEM] 关闭录制器时出错: {e}")
|
||
|
||
def signal_handler(sig, frame):
|
||
"""处理Ctrl+C信号"""
|
||
print("\n[SYSTEM] 收到中断信号,正在安全退出...")
|
||
cleanup_on_exit()
|
||
# Day 13: 使用 os._exit() 强制退出,避免 asyncio 事件循环干扰
|
||
import os
|
||
os._exit(0)
|
||
|
||
# 注册信号处理器
|
||
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
|
||
signal.signal(signal.SIGTERM, signal_handler) # 终止信号
|
||
atexit.register(cleanup_on_exit) # 正常退出时也调用
|
||
|
||
print("[RECORDER] 已注册退出处理器 - Ctrl+C时会自动保存录制文件")
|
||
|
||
|
||
|
||
# 【新增】预加载红绿灯检测模型(避免进入WAIT_TRAFFIC_LIGHT状态时卡顿)
|
||
# Day 26 优化: init_model() 内部已包含预热,无需重复调用
|
||
try:
|
||
import trafficlight_detection
|
||
print("[TRAFFIC_LIGHT] 开始预加载红绿灯检测模型...")
|
||
if trafficlight_detection.init_model():
|
||
print("[TRAFFIC_LIGHT] 红绿灯检测模型预加载成功")
|
||
else:
|
||
print("[TRAFFIC_LIGHT] 红绿灯检测模型预加载失败")
|
||
except Exception as e:
|
||
print(f"[TRAFFIC_LIGHT] 红绿灯模型预加载出错: {e}")
|
||
|
||
# ============== 关键:系统级"硬重置"总闸 =================
|
||
interrupt_lock = asyncio.Lock()
|
||
|
||
# ============== YOLO媒体线程管理 =================
|
||
yolomedia_thread: Optional[threading.Thread] = None
|
||
yolomedia_stop_event = threading.Event()
|
||
yolomedia_running = False
|
||
yolomedia_sending_frames = False # 新增:标记YOLO是否已经开始发送处理后的帧
|
||
|
||
# ============== 红绿灯检测跳帧机制 =================
|
||
_traffic_light_task = None
|
||
_traffic_light_result_jpeg = None
|
||
_traffic_light_pending_frame = None
|
||
|
||
# 物品名称到YOLO类别的映射
|
||
ITEM_TO_CLASS_MAP = {
|
||
"红牛": "Red_Bull",
|
||
"AD钙奶": "AD_milk",
|
||
"ad钙奶": "AD_milk",
|
||
"钙奶": "AD_milk",
|
||
}
|
||
|
||
async def ui_broadcast_raw(msg: str):
|
||
dead = []
|
||
for k, ws in list(ctx.ui_clients.items()):
|
||
try:
|
||
await ws.send_text(msg)
|
||
except Exception:
|
||
dead.append(k)
|
||
for k in dead:
|
||
ctx.ui_clients.pop(k, None)
|
||
|
||
|
||
async def ui_broadcast_partial(text: str):
|
||
# global current_partial (Moved to ctx)
|
||
ctx.current_partial = text
|
||
await ui_broadcast_raw("PARTIAL:" + text)
|
||
|
||
async def ui_broadcast_final(text: str):
|
||
# global current_partial, recent_finals (Moved to ctx)
|
||
ctx.current_partial = ""
|
||
ctx.recent_finals.append(text)
|
||
if len(ctx.recent_finals) > 50:
|
||
ctx.recent_finals = ctx.recent_finals[-50:]
|
||
await ui_broadcast_raw("FINAL:" + text)
|
||
print(f"[ASR/AI FINAL] {text}")
|
||
|
||
async def full_system_reset(reason: str = ""):
|
||
"""
|
||
回到刚启动后的状态:
|
||
1) 停播 + 取消AI任务 + 切断所有/stream.wav(hard_reset_audio)
|
||
2) 停止 ASR 实时识别流(关键)
|
||
3) 清 UI 状态
|
||
4) 清最近相机帧(避免把旧帧又拼进下一轮)
|
||
5) 告知 ESP32:RESET(可选)
|
||
"""
|
||
# 1) 音频&AI
|
||
await hard_reset_audio(reason or "full_system_reset")
|
||
|
||
# 2) ASR
|
||
await stop_current_recognition()
|
||
|
||
# 3) UI
|
||
# global current_partial, recent_finals (Moved to ctx)
|
||
ctx.current_partial = ""
|
||
ctx.recent_finals = []
|
||
|
||
# 4) 相机帧
|
||
try:
|
||
ctx.last_frames.clear()
|
||
except Exception:
|
||
pass
|
||
|
||
# 5) 通知 ESP32
|
||
try:
|
||
if ctx.esp32_audio_ws and (ctx.esp32_audio_ws.client_state == WebSocketState.CONNECTED):
|
||
await ctx.esp32_audio_ws.send_text("RESET")
|
||
except Exception:
|
||
pass
|
||
|
||
print("[SYSTEM] full reset done.", flush=True)
|
||
|
||
# ========= 启动/停止 YOLO 媒体处理 =========
|
||
def start_yolomedia_with_target(target_name: str):
|
||
"""启动yolomedia线程,搜索指定物品"""
|
||
global yolomedia_thread, yolomedia_stop_event # running/sending moved to ctx
|
||
|
||
# 如果已经在运行,先停止
|
||
if ctx.yolomedia_running:
|
||
stop_yolomedia()
|
||
|
||
# 查找对应的YOLO类别
|
||
yolo_class = ITEM_TO_CLASS_MAP.get(target_name, target_name)
|
||
print(f"[YOLOMEDIA] Starting with target: {target_name} -> YOLO class: {yolo_class}", flush=True)
|
||
# print(f"[YOLOMEDIA] Available mappings: {ITEM_TO_CLASS_MAP}", flush=True)
|
||
|
||
yolomedia_stop_event.clear()
|
||
ctx.yolomedia_running = True
|
||
ctx.yolomedia_sending_frames = False # 重置发送帧状态
|
||
|
||
def _run():
|
||
try:
|
||
# 传递目标类别名和停止事件
|
||
yolomedia.main(headless=True, prompt_name=yolo_class, stop_event=yolomedia_stop_event)
|
||
except Exception as e:
|
||
print(f"[YOLOMEDIA] worker stopped: {e}", flush=True)
|
||
finally:
|
||
ctx.yolomedia_running = False
|
||
ctx.yolomedia_sending_frames = False
|
||
|
||
yolomedia_thread = threading.Thread(target=_run, daemon=True)
|
||
yolomedia_thread.start()
|
||
print(f"[YOLOMEDIA] background worker started for: {yolo_class}(正在初始化,暂时显示原始画面)", flush=True)
|
||
|
||
def stop_yolomedia():
|
||
"""停止yolomedia线程"""
|
||
global yolomedia_thread, yolomedia_stop_event # running/sending moved to ctx
|
||
|
||
if ctx.yolomedia_running:
|
||
print("[YOLOMEDIA] Stopping worker...", flush=True)
|
||
yolomedia_stop_event.set()
|
||
|
||
# 等待线程结束(最多等5秒)
|
||
if yolomedia_thread and yolomedia_thread.is_alive():
|
||
yolomedia_thread.join(timeout=5.0)
|
||
|
||
ctx.yolomedia_running = False
|
||
ctx.yolomedia_sending_frames = False
|
||
|
||
# 【新增】如果orchestrator在找物品模式,结束时不自动恢复(由命令控制)
|
||
# 只清理标志位即可
|
||
print("[YOLOMEDIA] Worker stopped, 等待状态切换.", flush=True)
|
||
|
||
# ========= 自定义的 start_ai_with_text,支持识别特殊命令 =========
|
||
async def start_ai_with_text_custom(user_text: str):
|
||
"""扩展版的AI启动函数,支持识别特殊命令"""
|
||
# global navigation_active, ... (Moved to ctx)
|
||
|
||
# 【修改】在导航模式和红绿灯检测模式下,只有特定词才进入omni对话
|
||
if ctx.orchestrator:
|
||
current_state = ctx.orchestrator.get_state()
|
||
# 如果在导航模式或红绿灯检测模式(非CHAT模式)
|
||
if current_state not in ["CHAT", "IDLE"]:
|
||
# 检查是否是允许的对话触发词
|
||
allowed_keywords = ["帮我看", "帮我看下", "帮我找", "找一下", "看看", "识别一下"]
|
||
is_allowed_query = any(keyword in user_text for keyword in allowed_keywords)
|
||
|
||
# 检查是否是导航控制命令
|
||
nav_control_keywords = ["开始过马路", "过马路结束", "开始导航", "盲道导航", "停止导航", "结束导航",
|
||
"检测红绿灯", "看红绿灯", "停止检测", "停止红绿灯",
|
||
"室内导航", "室内导盲"] # 新增室内导航
|
||
is_nav_control = any(keyword in user_text for keyword in nav_control_keywords)
|
||
|
||
# 如果既不是允许的查询,也不是导航控制命令,则丢弃
|
||
if not is_allowed_query and not is_nav_control:
|
||
mode_name = "红绿灯检测" if current_state == "TRAFFIC_LIGHT_DETECTION" else "导航"
|
||
print(f"[{mode_name}模式] 丢弃非对话语音: {user_text}")
|
||
return # 直接丢弃,不进入omni
|
||
|
||
# 【修改】检查是否是过马路相关命令 - 使用orchestrator控制
|
||
if "开始过马路" in user_text or "帮我过马路" in user_text:
|
||
# 【新增】如果正在找物品,先停止
|
||
if ctx.yolomedia_running:
|
||
stop_yolomedia()
|
||
print("[ITEM_SEARCH] 从找物品模式切换到过马路")
|
||
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.start_crossing()
|
||
print(f"[CROSS_STREET] 过马路模式已启动,状态: {ctx.orchestrator.get_state()}")
|
||
# 播放启动语音并广播到UI
|
||
play_voice_text("过马路模式已启动。")
|
||
await ui_broadcast_final("[系统] 过马路模式已启动")
|
||
else:
|
||
print("[CROSS_STREET] 警告:导航统领器未初始化!")
|
||
play_voice_text("启动过马路模式失败,请稍后重试。")
|
||
await ui_broadcast_final("[系统] 导航系统未就绪")
|
||
return
|
||
|
||
if "过马路结束" in user_text or "结束过马路" in user_text:
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.stop_navigation()
|
||
print(f"[CROSS_STREET] 导航已停止,状态: {ctx.orchestrator.get_state()}")
|
||
# 播放停止语音并广播到UI
|
||
play_voice_text("已停止导航。")
|
||
await ui_broadcast_final("[系统] 过马路模式已停止")
|
||
else:
|
||
await ui_broadcast_final("[系统] 导航系统未运行")
|
||
return
|
||
|
||
# 【修改】检查是否是红绿灯检测命令 - 实现与盲道导航互斥
|
||
if "检测红绿灯" in user_text or "看红绿灯" in user_text:
|
||
try:
|
||
# import trafficlight_detection (Fixed: use top-level Safe Import)
|
||
if not trafficlight_detection:
|
||
logger.warning("[TRAFFIC] trafficlight_detection module missing")
|
||
await ui_broadcast_final("[系统] 红绿灯功能未启用")
|
||
return
|
||
|
||
# 切换orchestrator到红绿灯检测模式(暂停盲道导航)
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.start_traffic_light_detection()
|
||
logger.info(f"[TRAFFIC] 切换到红绿灯检测模式,状态: {ctx.orchestrator.get_state()}")
|
||
|
||
# 【改进】使用主线程模式而不是独立线程,避免掉帧
|
||
success = trafficlight_detection.init_model() # 只初始化模型,不启动线程
|
||
trafficlight_detection.reset_detection_state() # 重置状态
|
||
|
||
if success:
|
||
await ui_broadcast_final("[系统] 红绿灯检测已启动")
|
||
else:
|
||
await ui_broadcast_final("[系统] 红绿灯模型加载失败")
|
||
except Exception as e:
|
||
print(f"[TRAFFIC] 启动红绿灯检测失败: {e}")
|
||
await ui_broadcast_final(f"[系统] 启动失败: {e}")
|
||
return
|
||
|
||
if "停止检测" in user_text or "停止红绿灯" in user_text:
|
||
try:
|
||
# 恢复到对话模式
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.stop_navigation() # 回到CHAT模式
|
||
print(f"[TRAFFIC] 红绿灯检测停止,恢复到{ctx.orchestrator.get_state()}模式")
|
||
|
||
# 清除红绿灯检测缓存
|
||
global _traffic_light_result_jpeg
|
||
_traffic_light_result_jpeg = None
|
||
|
||
await ui_broadcast_final("[系统] 红绿灯检测已停止")
|
||
except Exception as e:
|
||
print(f"[TRAFFIC] 停止红绿灯检测失败: {e}")
|
||
await ui_broadcast_final(f"[系统] 停止失败: {e}")
|
||
return
|
||
|
||
# 【修改】检查是否是导航相关命令 - 使用orchestrator控制
|
||
if "开始导航" in user_text or "盲道导航" in user_text or "帮我导航" in user_text:
|
||
# 【新增】如果正在找物品,先停止
|
||
if ctx.yolomedia_running:
|
||
stop_yolomedia()
|
||
print("[ITEM_SEARCH] 从找物品模式切换到盲道导航")
|
||
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.start_blind_path_navigation()
|
||
print(f"[NAVIGATION] 盲道导航已启动,状态: {ctx.orchestrator.get_state()}")
|
||
await ui_broadcast_final("[系统] 盲道导航已启动")
|
||
else:
|
||
print("[NAVIGATION] 警告:导航统领器未初始化!")
|
||
await ui_broadcast_final("[系统] 导航系统未就绪")
|
||
return
|
||
|
||
# 【新增】检查是否是室内导航命令
|
||
if "室内导航" in user_text or "室内导盲" in user_text:
|
||
# 如果正在找物品,先停止
|
||
if ctx.yolomedia_running:
|
||
stop_yolomedia()
|
||
print("[ITEM_SEARCH] 从找物品模式切换到室内导航")
|
||
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.start_indoor_navigation()
|
||
print(f"[INDOOR] 室内导航已启动,状态: {ctx.orchestrator.get_state()}")
|
||
await ui_broadcast_final("[系统] 室内导航已启动")
|
||
else:
|
||
print("[INDOOR] 警告:导航统领器未初始化!")
|
||
await ui_broadcast_final("[系统] 导航系统未就绪")
|
||
return
|
||
|
||
# 【修改】停止导航优先判断
|
||
# 只要包含"停止导航"或"结束导航",无论是否包含"室内",都视为停止指令
|
||
if "停止导航" in user_text or "结束导航" in user_text:
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.stop_navigation()
|
||
print(f"[NAVIGATION] 导航已停止,状态: {ctx.orchestrator.get_state()}")
|
||
await ui_broadcast_final("[系统] 导航已停止")
|
||
else:
|
||
await ui_broadcast_final("[系统] 导航系统未运行")
|
||
return
|
||
|
||
nav_cmd_keywords = ["开始过马路", "过马路结束", "开始导航", "盲道导航", "停止导航", "结束导航", "立即通过", "现在通过", "继续"]
|
||
if any(k in user_text for k in nav_cmd_keywords):
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.on_voice_command(user_text)
|
||
await ui_broadcast_final("[系统] 导航模式已更新")
|
||
else:
|
||
await ui_broadcast_final("[系统] 导航统领器未初始化")
|
||
return
|
||
|
||
# 检查是否是"帮我找/识别一下xxx"的命令
|
||
# 扩展正则表达式,支持更多关键词
|
||
find_pattern = r"(?:^\s*帮我)?\s*找一下\s*(.+?)(?:。|!|?|$)"
|
||
match = re.search(find_pattern, user_text)
|
||
|
||
if match:
|
||
# 提取中文物品名称
|
||
item_cn = match.group(1).strip()
|
||
if item_cn:
|
||
# 【新增】用本地映射 + Qwen 提取英文类名
|
||
label_en, src = extract_english_label(item_cn)
|
||
print(f"[COMMAND] Finder request: '{item_cn}' -> '{label_en}' (src={src})", flush=True)
|
||
|
||
# 【新增】切换到找物品模式(暂停导航)
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.start_item_search()
|
||
print(f"[ITEM_SEARCH] 已切换到找物品模式,状态: {ctx.orchestrator.get_state()}")
|
||
|
||
# 【关键】把英文类名传给 yolomedia(它会在找不到类时自动切 YOLOE)
|
||
start_yolomedia_with_target(label_en)
|
||
|
||
# 给前端/语音来个确认反馈
|
||
try:
|
||
await ui_broadcast_final(f"[找物品] 正在寻找 {item_cn}...")
|
||
except Exception:
|
||
pass
|
||
|
||
return
|
||
|
||
# 检查是否是"找到了"的命令
|
||
if "找到了" in user_text or "拿到了" in user_text:
|
||
print("[COMMAND] Found command detected", flush=True)
|
||
# 停止yolomedia
|
||
stop_yolomedia()
|
||
|
||
# 【新增】停止找物品模式,恢复之前的导航状态
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.stop_item_search(restore_nav=True)
|
||
current_state = ctx.orchestrator.get_state()
|
||
print(f"[ITEM_SEARCH] 找物品结束,当前状态: {current_state}")
|
||
|
||
# 根据恢复的状态给出反馈
|
||
if current_state in ["BLINDPATH_NAV", "SEEKING_CROSSWALK", "WAIT_TRAFFIC_LIGHT", "CROSSING", "SEEKING_NEXT_BLINDPATH"]:
|
||
await ui_broadcast_final("[找物品] 已找到物品,继续导航。")
|
||
else:
|
||
await ui_broadcast_final("[找物品] 已找到物品。")
|
||
else:
|
||
await ui_broadcast_final("[找物品] 已找到物品。")
|
||
|
||
return
|
||
|
||
# 【修改】omni对话开始时,切换到CHAT模式
|
||
# global omni_conversation_active, omni_previous_nav_state (Moved to ctx)
|
||
ctx.omni_conversation_active = True
|
||
|
||
# 保存当前导航状态并切换到CHAT模式
|
||
if ctx.orchestrator:
|
||
current_state = ctx.orchestrator.get_state()
|
||
# 只有在导航模式下才需要保存和切换
|
||
if current_state not in ["CHAT", "IDLE"]:
|
||
ctx.omni_previous_nav_state = current_state
|
||
ctx.orchestrator.force_state("CHAT")
|
||
print(f"[OMNI] 对话开始,从{current_state}切换到CHAT模式")
|
||
else:
|
||
ctx.omni_previous_nav_state = None
|
||
print(f"[OMNI] 对话开始(当前已在{current_state}模式)")
|
||
|
||
# 如果不是特殊命令,执行原有的AI对话逻辑
|
||
# 但如果yolomedia正在运行,暂时不处理普通对话
|
||
if ctx.yolomedia_running:
|
||
print("[AI] YOLO media is running, skipping normal AI response", flush=True)
|
||
return
|
||
|
||
# 原有的AI对话逻辑
|
||
await start_ai_with_text(user_text)
|
||
|
||
# ========= Omni 播放启动 =========
|
||
async def start_ai_with_text(user_text: str):
|
||
"""硬重置后,开启新的 AI 语音输出。"""
|
||
|
||
# Day 13: 在 AI 处理开始前保存 WebSocket 引用
|
||
from audio_stream import set_tts_websocket
|
||
set_tts_websocket(ctx.esp32_audio_ws)
|
||
|
||
async def _runner_new_pipeline():
|
||
"""Day 21: 新管道 - GLM-4.5-Flash + EdgeTTS"""
|
||
txt_buf: List[str] = []
|
||
|
||
try:
|
||
# 获取图片(如果有)
|
||
img_b64 = None
|
||
if ctx.last_frames:
|
||
try:
|
||
_, jpeg_bytes = ctx.last_frames[-1]
|
||
img_b64 = base64.b64encode(jpeg_bytes).decode("ascii")
|
||
except Exception:
|
||
pass
|
||
|
||
# 调用 GLM-4.5-Flash
|
||
print(f"[NEW AI] 调用 GLM: {user_text[:50]}...")
|
||
ai_response = await glm_chat(user_text, img_b64)
|
||
|
||
if not ai_response:
|
||
print("[NEW AI] AI 无回复")
|
||
return
|
||
|
||
txt_buf.append(ai_response)
|
||
print(f"[NEW AI] AI 回复: {ai_response}")
|
||
await ui_broadcast_partial("[AI] " + ai_response)
|
||
|
||
# EdgeTTS 流式合成并发送
|
||
# 设置 VAD TTS 播放状态,避免将 TTS 回声误识别为用户语音
|
||
vad = get_server_vad()
|
||
vad.set_tts_playing(True)
|
||
|
||
try:
|
||
async for audio_chunk in text_to_speech_pcm_stream(ai_response):
|
||
if audio_chunk:
|
||
await broadcast_pcm16_realtime(audio_chunk)
|
||
finally:
|
||
# TTS 播放结束,恢复 VAD 检测
|
||
vad.set_tts_playing(False)
|
||
|
||
print("[NEW AI] 音频播放完成")
|
||
|
||
except asyncio.CancelledError:
|
||
raise
|
||
except Exception as e:
|
||
err_msg = f"AI Error: {str(e)}"
|
||
logger.error(f"[NEW AI] 错误: {err_msg}", exc_info=True)
|
||
|
||
# 1. 广播错误到 UI
|
||
try:
|
||
await ui_broadcast_final(f"[系统] {err_msg}")
|
||
except Exception:
|
||
pass
|
||
|
||
# 2. 发送开始回复信号
|
||
if ctx.esp32_audio_ws:
|
||
try:
|
||
await ctx.esp32_audio_ws.send_text("OK:REPLY_START")
|
||
except Exception:
|
||
pass
|
||
|
||
# 3. 语音播报错误 (可选,防止用户以为在思考)
|
||
try:
|
||
vad = get_server_vad()
|
||
vad.set_tts_playing(True)
|
||
async for audio_chunk in text_to_speech_pcm_stream("抱歉,我遇到了一些问题。"):
|
||
if audio_chunk:
|
||
await broadcast_pcm16_realtime(audio_chunk)
|
||
vad.set_tts_playing(False)
|
||
except Exception:
|
||
pass
|
||
finally:
|
||
# global omni_conversation_active, omni_previous_nav_state (Moved to ctx)
|
||
ctx.omni_conversation_active = False
|
||
|
||
if ctx.orchestrator and ctx.omni_previous_nav_state:
|
||
ctx.orchestrator.force_state(ctx.omni_previous_nav_state)
|
||
# print(f"[AI] 对话结束,恢复到{ctx.omni_previous_nav_state}模式")
|
||
ctx.omni_previous_nav_state = None
|
||
|
||
from audio_stream import stream_clients
|
||
for sc in list(stream_clients):
|
||
if not sc.abort_event.is_set():
|
||
try: sc.q.put_nowait(b"\x00"*BYTES_PER_20MS_16K)
|
||
except Exception: pass
|
||
try: sc.q.put_nowait(None)
|
||
except Exception: pass
|
||
|
||
final_text = ("".join(txt_buf)).strip() or "(空响应)"
|
||
await ui_broadcast_final("[AI] " + final_text)
|
||
|
||
async def _runner_old_pipeline():
|
||
"""旧管道 - Qwen-Omni (流式音频)"""
|
||
txt_buf: List[str] = []
|
||
rate_state = None
|
||
|
||
# 组装(图像+文本)
|
||
content_list = []
|
||
if ctx.last_frames:
|
||
try:
|
||
_, jpeg_bytes = ctx.last_frames[-1]
|
||
img_b64 = base64.b64encode(jpeg_bytes).decode("ascii")
|
||
content_list.append({
|
||
"type": "image_url",
|
||
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
|
||
})
|
||
except Exception:
|
||
pass
|
||
content_list.append({"type": "text", "text": user_text})
|
||
|
||
try:
|
||
async for piece in stream_chat(content_list, voice="Cherry", audio_format="wav"):
|
||
if piece.text_delta:
|
||
txt_buf.append(piece.text_delta)
|
||
try:
|
||
await ui_broadcast_partial("[AI] " + "".join(txt_buf))
|
||
except Exception:
|
||
pass
|
||
|
||
if piece.audio_b64:
|
||
try:
|
||
pcm24 = base64.b64decode(piece.audio_b64)
|
||
except Exception:
|
||
pcm24 = b""
|
||
if pcm24:
|
||
pcm16k, rate_state = audioop.ratecv(pcm24, 2, 1, 24000, 16000, rate_state)
|
||
pcm16k = audioop.mul(pcm16k, 2, 0.60)
|
||
if pcm16k:
|
||
await broadcast_pcm16_realtime(pcm16k)
|
||
|
||
except asyncio.CancelledError:
|
||
raise
|
||
except Exception as e:
|
||
try:
|
||
await ui_broadcast_final(f"[AI] 发生错误:{e}")
|
||
except Exception:
|
||
pass
|
||
finally:
|
||
# global omni_conversation_active, omni_previous_nav_state (Moved to ctx)
|
||
ctx.omni_conversation_active = False
|
||
|
||
if ctx.orchestrator and ctx.omni_previous_nav_state:
|
||
ctx.orchestrator.force_state(ctx.omni_previous_nav_state)
|
||
# print(f"[OMNI] 对话结束,恢复到{ctx.omni_previous_nav_state}模式")
|
||
ctx.omni_previous_nav_state = None
|
||
else:
|
||
print(f"[OMNI] 对话结束(无需恢复导航状态)")
|
||
|
||
from audio_stream import stream_clients
|
||
for sc in list(stream_clients):
|
||
if not sc.abort_event.is_set():
|
||
try: sc.q.put_nowait(b"\x00"*BYTES_PER_20MS_16K)
|
||
except Exception: pass
|
||
try: sc.q.put_nowait(None)
|
||
except Exception: pass
|
||
|
||
final_text = ("".join(txt_buf)).strip() or "(空响应)"
|
||
try:
|
||
await ui_broadcast_final("[AI] " + final_text)
|
||
except Exception:
|
||
pass
|
||
|
||
# 真正启动前先硬重置
|
||
await hard_reset_audio("start_ai_with_text")
|
||
loop = asyncio.get_running_loop()
|
||
from audio_stream import __dict__ as _as_dict
|
||
|
||
# 根据配置选择管道
|
||
if USE_NEW_AI_PIPELINE:
|
||
task = loop.create_task(_runner_new_pipeline())
|
||
else:
|
||
task = loop.create_task(_runner_old_pipeline())
|
||
|
||
_as_dict["current_ai_task"] = task
|
||
|
||
# ---------- 页面 / 健康 ----------
|
||
@app.get("/", response_class=HTMLResponse)
|
||
def root():
|
||
with open(os.path.join("templates", "index.html"), "r", encoding="utf-8") as f:
|
||
return HTMLResponse(f.read())
|
||
|
||
@app.get("/api/health", response_class=PlainTextResponse)
|
||
def health():
|
||
return "OK"
|
||
|
||
# 注册 /stream.wav
|
||
register_stream_route(app)
|
||
|
||
# ---------- WebSocket:WebUI 文本(ASR/AI 状态推送) ----------
|
||
@app.websocket("/ws_ui")
|
||
async def ws_ui(ws: WebSocket):
|
||
await ws.accept()
|
||
# ui_clients[id(ws)] = ws (Fixed: use ctx)
|
||
ctx.ui_clients[id(ws)] = ws
|
||
try:
|
||
# init = {"partial": current_partial, "finals": recent_finals[-10:]} (Fixed: use ctx)
|
||
init = {"partial": ctx.current_partial, "finals": ctx.recent_finals[-10:]}
|
||
await ws.send_text("INIT:" + json.dumps(init, ensure_ascii=False))
|
||
while True:
|
||
await asyncio.sleep(60)
|
||
except (WebSocketDisconnect, asyncio.CancelledError):
|
||
pass
|
||
finally:
|
||
# ui_clients.pop(id(ws), None) (Fixed: use ctx)
|
||
ctx.ui_clients.pop(id(ws), None)
|
||
|
||
|
||
# ---------- Day 21: 新版 AI 音频处理 (SenseVoice + GLM + EdgeTTS) ----------
|
||
async def process_complete_audio_new_pipeline(audio_data: bytes, ws: WebSocket):
|
||
"""
|
||
非流式音频处理:
|
||
1. SenseVoice 识别完整音频
|
||
2. GLM 生成回复
|
||
3. EdgeTTS 流式合成并发送
|
||
"""
|
||
try:
|
||
# 1. 语音识别
|
||
logger.info(f"[NEW AI] 开始识别音频: {len(audio_data)} bytes")
|
||
user_text = await sensevoice_recognize(audio_data)
|
||
|
||
if not user_text or len(user_text.strip()) < 2:
|
||
logger.info("[NEW AI] 未识别到有效语音")
|
||
return
|
||
|
||
logger.info(f"[NEW AI] 用户说: {user_text}")
|
||
await ui_broadcast_partial(f"[用户] {user_text}")
|
||
|
||
# 检查是否是导航命令
|
||
# 使用现有的 start_ai_with_text_custom 来处理特殊命令
|
||
# 这样可以保持导航功能不变
|
||
|
||
# 2. 调用 GLM 生成回复
|
||
ai_response = await glm_chat(user_text)
|
||
|
||
if not ai_response:
|
||
logger.info("[NEW AI] AI 无回复")
|
||
return
|
||
|
||
logger.info(f"[NEW AI] AI 回复: {ai_response}")
|
||
await ui_broadcast_final(f"[AI] {ai_response}")
|
||
|
||
# 3. EdgeTTS 流式合成并发送
|
||
# 设置 VAD TTS 播放状态
|
||
vad = get_server_vad()
|
||
vad.set_tts_playing(True)
|
||
|
||
try:
|
||
async for audio_chunk in text_to_speech_pcm_stream(ai_response):
|
||
if audio_chunk:
|
||
await broadcast_pcm16_realtime(audio_chunk)
|
||
finally:
|
||
vad.set_tts_playing(False)
|
||
|
||
logger.info("[NEW AI] 音频合成并发送完成")
|
||
|
||
except Exception as e:
|
||
logger.error(f"[NEW AI] 处理失败: {e}", exc_info=True)
|
||
|
||
|
||
# ---------- WebSocket:设备音频入口(ASR 上行) ----------
|
||
@app.websocket("/ws_audio")
|
||
async def ws_audio(ws: WebSocket):
|
||
# global esp32_audio_ws (Moved to ctx)
|
||
ctx.esp32_audio_ws = ws
|
||
# Day 20: 连接建立时立即保存 TTS WebSocket 引用
|
||
# 避免因引用丢失导致 TTS 音频无法发送
|
||
from audio_stream import set_tts_websocket
|
||
set_tts_websocket(ws)
|
||
await ws.accept()
|
||
logger.info("\n[AUDIO] client connected (TTS WebSocket reference saved)")
|
||
recognition = None
|
||
streaming = False
|
||
last_ts = time.monotonic()
|
||
keepalive_task: Optional[asyncio.Task] = None
|
||
audio_buffer = bytearray() # Day 21: 用于新管道收集音频
|
||
|
||
async def stop_rec(send_notice: Optional[str] = None):
|
||
nonlocal recognition, streaming, keepalive_task
|
||
if keepalive_task and not keepalive_task.done():
|
||
keepalive_task.cancel()
|
||
try: await keepalive_task
|
||
except Exception: pass
|
||
keepalive_task = None
|
||
if recognition:
|
||
try: recognition.stop()
|
||
except Exception: pass
|
||
recognition = None
|
||
await set_current_recognition(None)
|
||
streaming = False
|
||
if send_notice:
|
||
try: await ws.send_text(send_notice)
|
||
except Exception: pass
|
||
|
||
async def on_sdk_error(_msg: str):
|
||
await stop_rec(send_notice="RESTART")
|
||
|
||
async def keepalive_loop():
|
||
nonlocal last_ts, recognition, streaming
|
||
try:
|
||
while streaming and recognition is not None:
|
||
idle = time.monotonic() - last_ts
|
||
if idle > 0.35:
|
||
try:
|
||
for _ in range(30): # ~600ms 静音
|
||
recognition.send_audio_frame(SILENCE_CHUNK)
|
||
last_ts = time.monotonic()
|
||
except Exception:
|
||
await on_sdk_error("keepalive send failed")
|
||
return
|
||
await asyncio.sleep(0.10)
|
||
except asyncio.CancelledError:
|
||
return
|
||
|
||
try:
|
||
while True:
|
||
if WebSocketState and ws.client_state != WebSocketState.CONNECTED:
|
||
break
|
||
try:
|
||
msg = await ws.receive()
|
||
except WebSocketDisconnect:
|
||
break
|
||
except RuntimeError as e:
|
||
if "Cannot call \"receive\"" in str(e):
|
||
break
|
||
raise
|
||
|
||
if "text" in msg and msg["text"] is not None:
|
||
raw = (msg["text"] or "").strip()
|
||
cmd = raw.upper()
|
||
|
||
if cmd == "START":
|
||
logger.info("[AUDIO] START received")
|
||
await stop_rec()
|
||
|
||
# Day 13: 刷新 TTS 缓存
|
||
try:
|
||
from audio_stream import flush_tts_buffer
|
||
flushed = await flush_tts_buffer(ws)
|
||
if flushed > 0:
|
||
logger.info(f"[AUDIO] Flushed {flushed} bytes of cached TTS audio")
|
||
except Exception as e:
|
||
logger.warning(f"[AUDIO] Error flushing TTS buffer: {e}")
|
||
|
||
if USE_NEW_AI_PIPELINE:
|
||
# Day 21: 新管道 - 服务器端 VAD + 非流式 SenseVoice
|
||
reset_server_vad() # 重置 VAD 状态
|
||
|
||
# 清除对话历史(新会话开始)
|
||
from glm_client import clear_conversation_history
|
||
clear_conversation_history()
|
||
|
||
streaming = True
|
||
await ui_broadcast_partial("(已开始接收音频…)")
|
||
await ws.send_text("OK:STARTED")
|
||
logger.info("[NEW ASR] 新管道已启动,服务器端 VAD 监听中")
|
||
else:
|
||
# 旧管道 - 流式 DashScope
|
||
loop = asyncio.get_running_loop()
|
||
def post(coro):
|
||
asyncio.run_coroutine_threadsafe(coro, loop)
|
||
|
||
cb = ASRCallback(
|
||
on_sdk_error=lambda s: post(on_sdk_error(s)),
|
||
post=post,
|
||
ui_broadcast_partial=ui_broadcast_partial,
|
||
ui_broadcast_final=ui_broadcast_final,
|
||
is_playing_now_fn=is_playing_now,
|
||
start_ai_with_text_fn=start_ai_with_text_custom,
|
||
full_system_reset_fn=full_system_reset,
|
||
interrupt_lock=interrupt_lock,
|
||
)
|
||
|
||
recognition = dash_audio.asr.Recognition(
|
||
api_key=API_KEY, model=MODEL, format=AUDIO_FMT,
|
||
sample_rate=SAMPLE_RATE, callback=cb
|
||
)
|
||
recognition.start()
|
||
await set_current_recognition(recognition)
|
||
streaming = True
|
||
last_ts = time.monotonic()
|
||
keepalive_task = asyncio.create_task(keepalive_loop())
|
||
await ui_broadcast_partial("(已开始接收音频…)")
|
||
await ws.send_text("OK:STARTED")
|
||
|
||
elif cmd == "STOP":
|
||
if recognition:
|
||
for _ in range(15): # ~300ms 静音
|
||
try: recognition.send_audio_frame(SILENCE_CHUNK)
|
||
except Exception: break
|
||
await stop_rec(send_notice="OK:STOPPED")
|
||
|
||
elif cmd == "RECOGNIZE" and USE_NEW_AI_PIPELINE:
|
||
# Day 21: 客户端 VAD 检测到语音结束,请求识别
|
||
if audio_buffer and len(audio_buffer) > 3200: # 至少 100ms 音频
|
||
logger.info(f"[NEW ASR] 收到 RECOGNIZE 命令,音频长度: {len(audio_buffer)} bytes")
|
||
await ui_broadcast_partial("(正在识别…)")
|
||
|
||
# 非流式识别
|
||
user_text = await sensevoice_recognize(bytes(audio_buffer))
|
||
audio_buffer.clear()
|
||
|
||
if user_text and len(user_text.strip()) >= 2:
|
||
logger.info(f"[NEW ASR] 识别结果: {user_text}")
|
||
await ui_broadcast_final(f"[用户] {user_text}")
|
||
|
||
# 调用 AI 回复
|
||
async with interrupt_lock:
|
||
await start_ai_with_text_custom(user_text)
|
||
await ws.send_text("OK:RECOGNIZED")
|
||
else:
|
||
logger.info("[NEW ASR] 未识别到有效语音")
|
||
await ws.send_text("OK:EMPTY")
|
||
else:
|
||
print("[NEW ASR] 音频太短,忽略")
|
||
await ws.send_text("OK:TOO_SHORT")
|
||
|
||
elif raw.startswith("PROMPT:"):
|
||
text = raw[len("PROMPT:"):].strip()
|
||
if text:
|
||
async with interrupt_lock:
|
||
await start_ai_with_text_custom(text)
|
||
await ws.send_text("OK:PROMPT_ACCEPTED")
|
||
else:
|
||
await ws.send_text("ERR:EMPTY_PROMPT")
|
||
|
||
elif "bytes" in msg and msg["bytes"] is not None:
|
||
audio_bytes = msg["bytes"]
|
||
if not hasattr(ws_audio, '_audio_recv_count'):
|
||
ws_audio._audio_recv_count = 0
|
||
ws_audio._audio_total_bytes = 0
|
||
ws_audio._audio_recv_count += 1
|
||
ws_audio._audio_total_bytes += len(audio_bytes)
|
||
|
||
if ws_audio._audio_recv_count % 500 == 0:
|
||
logger.info(f"[AUDIO] 📥 Received: {ws_audio._audio_recv_count} packets, {ws_audio._audio_total_bytes} bytes total")
|
||
|
||
if USE_NEW_AI_PIPELINE:
|
||
# Day 21 改进: 使用服务器端 VAD 检测语音
|
||
if streaming:
|
||
vad = get_server_vad()
|
||
vad_result = vad.process(audio_bytes)
|
||
|
||
if vad_result['speech_started']:
|
||
await ui_broadcast_partial("(正在录音…)")
|
||
|
||
if vad_result['speech_ended'] and vad_result['speech_audio']:
|
||
# VAD 检测到语音结束,自动触发识别
|
||
speech_audio = vad_result['speech_audio']
|
||
logger.info(f"[VAD] 自动触发识别,音频长度: {len(speech_audio)} bytes")
|
||
await ui_broadcast_partial("(正在识别…)")
|
||
|
||
# 非流式识别
|
||
user_text = await sensevoice_recognize(speech_audio)
|
||
|
||
if user_text and len(user_text.strip()) >= 2:
|
||
logger.info(f"[NEW ASR] 识别结果: {user_text}")
|
||
await ui_broadcast_final(f"[用户] {user_text}")
|
||
|
||
# 调用 AI 回复
|
||
async with interrupt_lock:
|
||
await start_ai_with_text_custom(user_text)
|
||
await ws.send_text("OK:RECOGNIZED")
|
||
else:
|
||
logger.info("[NEW ASR] 未识别到有效语音")
|
||
await ws.send_text("OK:EMPTY")
|
||
else:
|
||
# 旧管道:实时发送到 DashScope
|
||
if streaming and recognition:
|
||
try:
|
||
recognition.send_audio_frame(audio_bytes)
|
||
last_ts = time.monotonic()
|
||
except Exception:
|
||
await on_sdk_error("send_audio_frame failed")
|
||
|
||
except Exception as e:
|
||
logger.error(f"[WS ERROR] {e}")
|
||
finally:
|
||
await stop_rec()
|
||
try:
|
||
if WebSocketState is None or ws.client_state == WebSocketState.CONNECTED:
|
||
await ws.close(code=1000)
|
||
except Exception:
|
||
pass
|
||
if ctx.esp32_audio_ws is ws:
|
||
ctx.esp32_audio_ws = None
|
||
logger.info("[WS] connection closed")
|
||
|
||
# ---------- WebSocket:设备相机入口(JPEG 二进制) ----------
|
||
@app.websocket("/ws/camera")
|
||
async def ws_camera_esp(ws: WebSocket):
|
||
# global esp32_camera_ws, blind_path_navigator, cross_street_navigator, cross_street_active, navigation_active, orchestrator (Moved to ctx)
|
||
if ctx.esp32_camera_ws is not None:
|
||
await ws.close(code=1013)
|
||
return
|
||
ctx.esp32_camera_ws = ws
|
||
await ws.accept()
|
||
logger.info("[CAMERA] 设备已连接")
|
||
|
||
# 【新增】初始化盲道导航器
|
||
if ctx.blind_path_navigator is None and ctx.yolo_seg_model is not None:
|
||
ctx.blind_path_navigator = BlindPathNavigator(ctx.yolo_seg_model, ctx.obstacle_detector)
|
||
print("[NAVIGATION] 盲道导航器已初始化")
|
||
else:
|
||
if ctx.blind_path_navigator is not None:
|
||
logger.info("[NAVIGATION] 导航器已存在,无需重新初始化")
|
||
elif ctx.yolo_seg_model is None:
|
||
logger.warning("[NAVIGATION] 警告:YOLO模型未加载,无法初始化导航器")
|
||
|
||
# 【新增】初始化过马路导航器
|
||
if ctx.cross_street_navigator is None:
|
||
if ctx.yolo_seg_model:
|
||
ctx.cross_street_navigator = CrossStreetNavigator(
|
||
seg_model=ctx.yolo_seg_model,
|
||
coco_model=None, # 不使用交通灯检测
|
||
obs_model=None # 暂时也不用障碍物检测,让它更快
|
||
)
|
||
logger.info("[CROSS_STREET] 过马路导航器已初始化(简化版 - 仅斑马线检测)")
|
||
else:
|
||
logger.error("[CROSS_STREET] 错误:缺少分割模型,无法初始化过马路导航器")
|
||
|
||
if not ctx.yolo_seg_model:
|
||
print("[CROSS_STREET] - 缺少分割模型 (yolo_seg_model)")
|
||
if not ctx.obstacle_detector:
|
||
print("[CROSS_STREET] - 缺少障碍物检测器 (obstacle_detector)")
|
||
|
||
if ctx.orchestrator is None and ctx.blind_path_navigator is not None and ctx.cross_street_navigator is not None:
|
||
ctx.orchestrator = NavigationMaster(ctx.blind_path_navigator, ctx.cross_street_navigator, indoor_nav=ctx.indoor_navigator)
|
||
logger.info("[NAV MASTER] 统领状态机已初始化(托管模式)")
|
||
frame_counter = 0 # 添加帧计数器
|
||
|
||
# Day 20: 性能诊断变量
|
||
_perf_last_frame_time = None
|
||
_perf_frame_intervals = []
|
||
_perf_broadcast_times = []
|
||
_perf_nav_times = []
|
||
|
||
try:
|
||
while True:
|
||
msg = await ws.receive()
|
||
if "bytes" in msg and msg["bytes"] is not None:
|
||
data = msg["bytes"]
|
||
frame_counter += 1
|
||
|
||
# Day 20: 记录帧接收时间
|
||
_perf_frame_time = time.perf_counter()
|
||
if _perf_last_frame_time is not None:
|
||
_perf_frame_intervals.append(_perf_frame_time - _perf_last_frame_time)
|
||
_perf_last_frame_time = _perf_frame_time
|
||
|
||
# 【新增】录制原始帧
|
||
try:
|
||
sync_recorder.record_frame(data)
|
||
except Exception as e:
|
||
if frame_counter % 100 == 0: # 避免日志刷屏
|
||
logger.error(f"[RECORDER] 录制帧失败: {e}")
|
||
|
||
try:
|
||
ctx.last_frames.append((time.time(), data))
|
||
except Exception:
|
||
pass
|
||
|
||
# 推送到bridge_io(供yolomedia使用)
|
||
bridge_io.push_raw_jpeg(data)
|
||
|
||
# 【调试】检查导航条件
|
||
if frame_counter % 60 == 0: # 每60帧输出一次(约5-6秒)
|
||
state_dbg = ctx.orchestrator.get_state() if ctx.orchestrator else "N/A"
|
||
|
||
# Day 20: 性能诊断汇总
|
||
if _perf_frame_intervals:
|
||
avg_interval = sum(_perf_frame_intervals) / len(_perf_frame_intervals) * 1000
|
||
fps = 1000 / avg_interval if avg_interval > 0 else 0
|
||
_perf_frame_intervals.clear()
|
||
else:
|
||
avg_interval = 0
|
||
fps = 0
|
||
|
||
avg_broadcast = sum(_perf_broadcast_times) / len(_perf_broadcast_times) if _perf_broadcast_times else 0
|
||
avg_nav = sum(_perf_nav_times) / len(_perf_nav_times) if _perf_nav_times else 0
|
||
_perf_broadcast_times.clear()
|
||
_perf_nav_times.clear()
|
||
|
||
logger.info(f"[PERF] 帧:{frame_counter} | 客户端FPS:{fps:.1f} | 帧间隔:{avg_interval:.1f}ms | "
|
||
f"广播:{avg_broadcast:.1f}ms | 导航:{avg_nav:.1f}ms | state={state_dbg}")
|
||
|
||
# 【Day 19 优化】延迟解码:只在需要处理时才解码,避免白白浪费 CPU
|
||
# 先检查是否需要导航处理
|
||
needs_processing = (ctx.orchestrator and not ctx.yolomedia_running)
|
||
bgr = None # 延迟初始化
|
||
|
||
if needs_processing:
|
||
current_state = ctx.orchestrator.get_state()
|
||
|
||
# 【Day 19】ITEM_SEARCH/CHAT/IDLE 模式无需处理,直接转发原始 JPEG
|
||
if current_state in ("ITEM_SEARCH", "CHAT", "IDLE"):
|
||
if not ctx.yolomedia_sending_frames and ctx.camera_viewers:
|
||
await _broadcast_to_viewers(data) # 零拷贝直传
|
||
continue
|
||
|
||
# 需要导航处理时才解码
|
||
try:
|
||
bgr = turbo_decode(data)
|
||
if bgr is None or bgr.size == 0:
|
||
if frame_counter % 30 == 0:
|
||
logger.warning(f"[JPEG] 解码失败:数据长度={len(data)}")
|
||
bgr = None
|
||
except Exception as e:
|
||
if frame_counter % 30 == 0:
|
||
logger.error(f"[JPEG] 解码异常: {e}")
|
||
bgr = None
|
||
|
||
# 【托管】优先交给统领状态机(寻物未占用画面时)
|
||
if ctx.orchestrator and not ctx.yolomedia_running and bgr is not None:
|
||
out_img = bgr # 默认输出原图
|
||
try:
|
||
# 【新增】检查是否在红绿灯检测模式
|
||
if current_state == "TRAFFIC_LIGHT_DETECTION":
|
||
# 红绿灯检测模式:使用跳帧机制避免阻塞
|
||
# import trafficlight_detection (Fixed: use top-level Safe Import)
|
||
if not trafficlight_detection:
|
||
continue
|
||
|
||
# global _traffic_light_task, ... (Moved to ctx? No, kept as globals but local to logic)
|
||
# Actually better to use ctx for these too, but they are localized here.
|
||
# For safety, I should use the global module variable.
|
||
|
||
# 更新待处理帧
|
||
_traffic_light_pending_frame = bgr
|
||
|
||
# 如果没有正在运行的任务,启动一个
|
||
if _traffic_light_task is None or _traffic_light_task.done():
|
||
if _traffic_light_task is not None and _traffic_light_task.done():
|
||
try:
|
||
result = _traffic_light_task.result()
|
||
if result and result.get('vis_image') is not None:
|
||
enc = turbo_encode(result['vis_image'], quality=80)
|
||
if enc:
|
||
_traffic_light_result_jpeg = enc
|
||
except Exception:
|
||
pass
|
||
|
||
# 启动新任务
|
||
if _traffic_light_pending_frame is not None:
|
||
frame = _traffic_light_pending_frame
|
||
_traffic_light_pending_frame = None
|
||
loop = asyncio.get_event_loop()
|
||
_traffic_light_task = loop.run_in_executor(
|
||
frame_processing_executor,
|
||
trafficlight_detection.process_single_frame,
|
||
frame,
|
||
None
|
||
)
|
||
|
||
# 广播红绿灯检测结果(独立于盲道导航缓存)
|
||
if ctx.camera_viewers:
|
||
if _traffic_light_result_jpeg is not None:
|
||
await _broadcast_to_viewers(_traffic_light_result_jpeg)
|
||
else:
|
||
await _broadcast_to_viewers(data) # 首帧回退
|
||
continue # 跳过盲道导航的广播逻辑
|
||
else:
|
||
# 【Day 15 跳帧机制】非阻塞式帧处理
|
||
# 不等待处理完成,使用最后一次成功的结果
|
||
# global _nav_processing_task, _nav_last_result_image, ... (Moved to ctx)
|
||
|
||
# 更新待处理帧(始终是最新的)
|
||
ctx.nav_pending_frame = bgr
|
||
|
||
# 如果没有正在运行的任务,启动一个
|
||
if ctx.nav_processing_task is None or ctx.nav_processing_task.done():
|
||
# 检查上一个任务的结果
|
||
if ctx.nav_processing_task is not None and ctx.nav_processing_task.done():
|
||
# Day 20: 记录处理耗时
|
||
if ctx.nav_task_start_time is not None:
|
||
nav_elapsed = (time.perf_counter() - ctx.nav_task_start_time) * 1000
|
||
_perf_nav_times.append(nav_elapsed)
|
||
ctx.nav_task_start_time = None
|
||
|
||
try:
|
||
res = ctx.nav_processing_task.result()
|
||
if res is not None:
|
||
ctx.nav_last_result_image = res.annotated_image
|
||
# 【Day 19 优化】立即编码并缓存 JPEG,避免每帧重复编码
|
||
if ctx.nav_last_result_image is not None:
|
||
# 使用 TurboJPEG 编码
|
||
enc_result = turbo_encode(ctx.nav_last_result_image, quality=80)
|
||
if enc_result:
|
||
if len(ctx.recent_finals) > 50:
|
||
ctx.recent_finals.pop(0)
|
||
ctx.nav_last_result_jpeg = enc_result
|
||
# 语音引导
|
||
if res.guidance_text:
|
||
try:
|
||
# Day 21 优化:视觉优先级中断
|
||
# 当检测到近距离障碍物时,打断正在进行的 AI 对话
|
||
obstacle_keywords = ['前方有', '停一下', '注意避让', '左侧有', '右侧有']
|
||
is_obstacle_warning = any(kw in res.guidance_text for kw in obstacle_keywords)
|
||
|
||
if is_obstacle_warning:
|
||
# 检查是否有正在进行的 AI 对话
|
||
if is_playing_now():
|
||
# 打断 AI 对话,优先播报障碍物警告
|
||
logger.warning(f"[PRIORITY INTERRUPT] 检测到障碍物警告,打断AI对话: {res.guidance_text}")
|
||
asyncio.create_task(hard_reset_audio("Obstacle priority interrupt"))
|
||
|
||
play_voice_text(res.guidance_text)
|
||
asyncio.create_task(ui_broadcast_final(f"[导航] {res.guidance_text}"))
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
logger.error(f"[NAV MASTER] 获取导航结果异常:", exc_info=True)
|
||
|
||
# 启动新的处理任务
|
||
if ctx.nav_pending_frame is not None:
|
||
frame_to_process = ctx.nav_pending_frame
|
||
ctx.nav_pending_frame = None
|
||
ctx.nav_task_start_time = time.perf_counter() # Day 20: 记录开始时间
|
||
loop = asyncio.get_event_loop()
|
||
ctx.nav_processing_task = loop.run_in_executor(
|
||
ctx.frame_processing_executor,
|
||
ctx.orchestrator.process_frame,
|
||
frame_to_process
|
||
)
|
||
|
||
# 使用最后一次成功的结果(不阻塞等待)
|
||
out_img = ctx.nav_last_result_image if ctx.nav_last_result_image is not None else bgr
|
||
except Exception as e:
|
||
if frame_counter % 100 == 0:
|
||
logger.error(f"[NAV MASTER] 处理帧时出错: {e}")
|
||
|
||
# 【Day 19 优化】广播导航结果,优先使用缓存的 JPEG
|
||
if ctx.camera_viewers:
|
||
_t_broadcast = time.perf_counter() # Day 20: 计时
|
||
# 如果有缓存的 JPEG(导航结果),直接使用
|
||
if ctx.nav_last_result_jpeg is not None:
|
||
await _broadcast_to_viewers(ctx.nav_last_result_jpeg)
|
||
elif out_img is not None:
|
||
# 回退:使用 TurboJPEG 编码当前帧
|
||
enc_result = turbo_encode(out_img, quality=80)
|
||
if enc_result:
|
||
await _broadcast_to_viewers(enc_result)
|
||
else:
|
||
# 【Day 23 修复】首帧回退:导航刚启动时无处理结果,直接广播原始帧
|
||
await _broadcast_to_viewers(data)
|
||
_perf_broadcast_times.append((time.perf_counter() - _t_broadcast) * 1000) # Day 20
|
||
# 已托管,进入下一帧
|
||
continue
|
||
|
||
# 【Day 19 优化】零拷贝直传:原始 JPEG 直接转发,无需解码再编码
|
||
# 之前的问题:imdecode + imencode 浪费 CPU,原始 data 就是 JPEG
|
||
if not ctx.yolomedia_sending_frames and ctx.camera_viewers:
|
||
try:
|
||
# 直接转发原始 JPEG 数据,跳过解码-编码循环
|
||
await _broadcast_to_viewers(data)
|
||
except Exception as e:
|
||
logger.error(f"[CAMERA] Broadcast error: {e}")
|
||
|
||
elif "type" in msg and msg["type"] in ("websocket.close", "websocket.disconnect"):
|
||
break
|
||
except WebSocketDisconnect:
|
||
pass
|
||
except Exception as e:
|
||
logger.error(f"[CAMERA ERROR] {e}")
|
||
finally:
|
||
try:
|
||
if WebSocketState is None or ws.client_state == WebSocketState.CONNECTED:
|
||
await ws.close(code=1000)
|
||
except Exception:
|
||
pass
|
||
# esp32_camera_ws = None (Moved to ctx)
|
||
ctx.esp32_camera_ws = None
|
||
logger.info("[CAMERA] 设备已断开")
|
||
|
||
# 【新增】清理导航状态
|
||
if ctx.blind_path_navigator:
|
||
ctx.blind_path_navigator.reset()
|
||
if ctx.cross_street_navigator:
|
||
ctx.cross_street_navigator.reset()
|
||
if ctx.orchestrator:
|
||
ctx.orchestrator.reset()
|
||
logger.info("[NAV MASTER] 统领器已重置")
|
||
|
||
# ---------- WebSocket:浏览器订阅相机帧 ----------
|
||
@app.websocket("/ws/viewer")
|
||
async def ws_viewer(ws: WebSocket):
|
||
await ws.accept()
|
||
ctx.camera_viewers.add(ws)
|
||
logger.info(f"[VIEWER] Browser connected. Total viewers: {len(ctx.camera_viewers)}")
|
||
try:
|
||
while True:
|
||
# 保持连接活跃
|
||
await asyncio.sleep(60)
|
||
except (WebSocketDisconnect, asyncio.CancelledError):
|
||
pass # 正常关闭,静默处理
|
||
finally:
|
||
try:
|
||
ctx.camera_viewers.remove(ws)
|
||
except Exception:
|
||
pass
|
||
logger.info(f"[VIEWER] Removed. Total viewers: {len(ctx.camera_viewers)}")
|
||
|
||
# ---------- WebSocket:浏览器订阅 IMU ----------
|
||
@app.websocket("/ws")
|
||
async def ws_imu(ws: WebSocket):
|
||
await ws.accept()
|
||
ctx.imu_ws_clients.add(ws)
|
||
try:
|
||
while True:
|
||
await asyncio.sleep(60)
|
||
except (WebSocketDisconnect, asyncio.CancelledError):
|
||
pass # 正常关闭,静默处理
|
||
finally:
|
||
ctx.imu_ws_clients.discard(ws)
|
||
|
||
async def imu_broadcast(msg: str):
|
||
if not ctx.imu_ws_clients: return
|
||
dead = []
|
||
for ws in list(ctx.imu_ws_clients):
|
||
try:
|
||
await ws.send_text(msg)
|
||
except Exception:
|
||
dead.append(ws)
|
||
for ws in dead:
|
||
ctx.imu_ws_clients.discard(ws)
|
||
|
||
# ---------- 服务端 IMU 估计(原样保留) ----------
|
||
from math import atan2, hypot, pi
|
||
GRAV_BETA = 0.98
|
||
STILL_W = 0.4
|
||
YAW_DB = 0.08
|
||
YAW_LEAK = 0.2
|
||
ANG_EMA = 0.15
|
||
AUTO_REZERO = True
|
||
USE_PROJ = True
|
||
FREEZE_STILL= True
|
||
G = 9.807
|
||
A_TOL = 0.08 * G
|
||
gLP = {"x":0.0, "y":0.0, "z":0.0}
|
||
gOff= {"x":0.0, "y":0.0, "z":0.0}
|
||
BIAS_ALPHA = 0.002
|
||
yaw = 0.0
|
||
Rf = Pf = Yf = 0.0
|
||
ref = {"roll":0.0, "pitch":0.0, "yaw":0.0}
|
||
holdStart = 0.0
|
||
isStill = False
|
||
last_ts_imu = 0.0
|
||
last_wall = 0.0
|
||
imu_store: List[Dict[str, Any]] = []
|
||
|
||
def _wrap180(a: float) -> float:
|
||
a = a % 360.0
|
||
if a >= 180.0: a -= 360.0
|
||
if a < -180.0: a += 360.0
|
||
return a
|
||
|
||
def process_imu_and_maybe_store(d: Dict[str, Any]):
|
||
global gLP, gOff, yaw, Rf, Pf, Yf, ref, holdStart, isStill, last_ts_imu, last_wall
|
||
|
||
t_ms = float(d.get("ts", 0.0))
|
||
now_wall = time.monotonic()
|
||
if t_ms <= 0.0:
|
||
t_ms = (now_wall * 1000.0)
|
||
if last_ts_imu <= 0.0 or t_ms <= last_ts_imu or (t_ms - last_ts_imu) > 3000.0:
|
||
dt = 0.02
|
||
else:
|
||
dt = (t_ms - last_ts_imu) / 1000.0
|
||
last_ts_imu = t_ms
|
||
|
||
ax = float(((d.get("accel") or {}).get("x", 0.0)))
|
||
ay = float(((d.get("accel") or {}).get("y", 0.0)))
|
||
az = float(((d.get("accel") or {}).get("z", 0.0)))
|
||
wx = float(((d.get("gyro") or {}).get("x", 0.0)))
|
||
wy = float(((d.get("gyro") or {}).get("y", 0.0)))
|
||
wz = float(((d.get("gyro") or {}).get("z", 0.0)))
|
||
|
||
gLP["x"] = GRAV_BETA * gLP["x"] + (1.0 - GRAV_BETA) * ax
|
||
gLP["y"] = GRAV_BETA * gLP["y"] + (1.0 - GRAV_BETA) * ay
|
||
gLP["z"] = GRAV_BETA * gLP["z"] + (1.0 - GRAV_BETA) * az
|
||
gmag = hypot(gLP["x"], gLP["y"], gLP["z"]) or 1.0
|
||
gHat = {"x": gLP["x"]/gmag, "y": gLP["y"]/gmag, "z": gLP["z"]/gmag}
|
||
|
||
roll = (atan2(az, ay) * 180.0 / pi)
|
||
pitch = (atan2(-ax, ay) * 180.0 / pi)
|
||
|
||
aNorm = hypot(ax, ay, az); wNorm = hypot(wx, wy, wz)
|
||
nearFlat = (abs(roll) < 2.0 and abs(pitch) < 2.0)
|
||
stillCond = (abs(aNorm - G) < A_TOL) and (wNorm < STILL_W)
|
||
|
||
if stillCond:
|
||
if holdStart <= 0.0: holdStart = t_ms
|
||
if not isStill and (t_ms - holdStart) > 350.0: isStill = True
|
||
gOff["x"] = (1.0 - BIAS_ALPHA)*gOff["x"] + BIAS_ALPHA*wx
|
||
gOff["y"] = (1.0 - BIAS_ALPHA)*gOff["y"] + BIAS_ALPHA*wy
|
||
gOff["z"] = (1.0 - BIAS_ALPHA)*gOff["z"] + BIAS_ALPHA*wz
|
||
else:
|
||
holdStart = 0.0; isStill = False
|
||
|
||
if USE_PROJ:
|
||
yawdot = ((wx - gOff["x"])*gHat["x"] + (wy - gOff["y"])*gHat["y"] + (wz - gOff["z"])*gHat["z"])
|
||
else:
|
||
yawdot = (wy - gOff["y"])
|
||
|
||
if abs(yawdot) < YAW_DB: yawdot = 0.0
|
||
if FREEZE_STILL and stillCond: yawdot = 0.0
|
||
|
||
yaw = _wrap180(yaw + yawdot * dt)
|
||
|
||
if (YAW_LEAK > 0.0) and nearFlat and stillCond and abs(yaw) > 0.0:
|
||
step = YAW_LEAK * dt * (-1.0 if yaw > 0 else (1.0 if yaw < 0 else 0.0))
|
||
if abs(yaw) <= abs(step): yaw = 0.0
|
||
else: yaw += step
|
||
|
||
global Rf, Pf, Yf, ref, last_wall
|
||
Rf = ANG_EMA * roll + (1.0 - ANG_EMA) * Rf
|
||
Pf = ANG_EMA * pitch + (1.0 - ANG_EMA) * Pf
|
||
Yf = ANG_EMA * yaw + (1.0 - ANG_EMA) * Yf
|
||
|
||
if AUTO_REZERO and nearFlat and (wNorm < STILL_W):
|
||
if holdStart <= 0.0: holdStart = t_ms
|
||
if not isStill and (t_ms - holdStart) > 350.0:
|
||
ref.update({"roll": Rf, "pitch": Pf, "yaw": Yf})
|
||
isStill = True
|
||
|
||
R = _wrap180(Rf - ref["roll"])
|
||
P = _wrap180(Pf - ref["pitch"])
|
||
Y = _wrap180(Yf - ref["yaw"])
|
||
|
||
now_wall = time.monotonic()
|
||
if last_wall <= 0.0 or (now_wall - last_wall) >= 0.100:
|
||
last_wall = now_wall
|
||
item = {
|
||
"ts": t_ms/1000.0,
|
||
"angles": {"roll": R, "pitch": P, "yaw": Y},
|
||
"accel": {"x": ax, "y": ay, "z": az},
|
||
"gyro": {"x": wx, "y": wy, "z": wz},
|
||
}
|
||
imu_store.append(item)
|
||
|
||
# ---------- UDP 接收 IMU 并转发 ----------
|
||
class UDPProto(asyncio.DatagramProtocol):
|
||
def connection_made(self, transport):
|
||
print(f"[UDP] listening on {UDP_IP}:{UDP_PORT}")
|
||
def datagram_received(self, data, addr):
|
||
try:
|
||
s = data.decode('utf-8', errors='ignore').strip()
|
||
d = json.loads(s)
|
||
if 'ts' not in d and 'timestamp_ms' in d:
|
||
d['ts'] = d.pop('timestamp_ms')
|
||
process_imu_and_maybe_store(d)
|
||
asyncio.create_task(imu_broadcast(json.dumps(d)))
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# --- 导出接口(可选) ---
|
||
def get_last_frames():
|
||
return ctx.last_frames
|
||
|
||
def get_camera_ws():
|
||
return ctx.esp32_camera_ws
|
||
|
||
if __name__ == "__main__":
|
||
import signal
|
||
import logging
|
||
|
||
# Day 13: 抑制 Ctrl+C 时的 asyncio CancelledError 日志
|
||
logging.getLogger("uvicorn.error").setLevel(logging.CRITICAL)
|
||
|
||
# Day 13: 移除重复的信号处理器,模块级别已经处理了
|
||
# 信号处理在模块顶部已注册
|
||
|
||
# Day 20: Numba JIT 预热,避免首次调用时的编译延迟
|
||
try:
|
||
from numba_utils import warmup as numba_warmup
|
||
numba_warmup()
|
||
except ImportError:
|
||
print("[启动] Numba 未安装,跳过预热")
|
||
|
||
uvicorn.run(
|
||
app, host="0.0.0.0", port=8081,
|
||
log_level="warning", access_log=False,
|
||
loop="asyncio", workers=1, reload=False
|
||
)
|