1572 lines
79 KiB
Python
1572 lines
79 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
YOLOv8 单类分割 + MediaPipe Hand Landmarker + 光流追踪(多边形)
|
||
更新点(本版重点):
|
||
- 左下角第二个进度条"距离(≈1)" 已完全替换为:ratio = 物体面积 / 手面积 的"接近 1 程度"可视化
|
||
-> range_score = 1 - clamp(|ratio - 1| / RATIO_TOL, 0..1)
|
||
-> 画面同时显示 ratio 数值;ratio<1 提示"向前靠近",ratio>1 提示"后退",在 [1±RATIO_TOL] 内为"保持"
|
||
其他特性:
|
||
- Enter 锁定:在分割掩码"内收 5px"的内边界上取光流点
|
||
- TRACK 期间:监控当前多边形外扩 40px 周边区域的分割,命中即重锁
|
||
- 成功判定:放宽"握持(Grasp)"启发式(拿瓶子无需特别紧)
|
||
- 手骨架单色渲染;测距箭头(端点定位线 + 箭头 + 像素值)
|
||
- 中文绘制优先 Pillow + 系统中文字体(避免问号)
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import threading
|
||
import math
|
||
import cv2
|
||
import numpy as np
|
||
import mediapipe as mp
|
||
from mediapipe.framework.formats import landmark_pb2
|
||
from ultralytics import YOLO
|
||
from ultralytics.utils.plotting import Colors
|
||
import bridge_io
|
||
|
||
# Day 26: 抑制 pygame 社区欢迎信息
|
||
import os
|
||
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "1"
|
||
import pygame # 用于播放本地音频文件
|
||
|
||
from audio_player import play_audio_threadsafe
|
||
PERF_DEBUG = False # 打印调试信息(False 关闭)
|
||
HAND_DOWNSCALE = 0.8 # HandLandmarker 的输入缩放 0.5=长宽各减半(≈1/4 像素量)
|
||
HAND_FPS_DIV = 1 # 人手每 2 帧跑一次(1=每帧;2=隔帧;3=每3帧)
|
||
|
||
|
||
# === 前端风格配色(BGR) + UI叠加管理(左下角按行堆叠) ===
|
||
FRONTEND_COLORS = {
|
||
"text": (230, 237, 243), # --text: #e6edf3
|
||
"muted": (159, 176, 195), # --muted: #9fb0c3
|
||
"ok": (126, 231, 135), # --ok: #7ee787
|
||
"err": (128, 128, 255), # --err: #ff8080 (BGR)
|
||
"accent": (251, 218, 97), # #61dafb 近似的强调色(BGR 取近似亮色)
|
||
}
|
||
|
||
# 底部指令按钮文本
|
||
CURRENT_COMMAND_TEXT = "—"
|
||
|
||
_UI_LINE = 0
|
||
_UI_H = 0
|
||
_UI_TR_LINE = 0 # 右上角逐行叠放计数
|
||
_UI_TOP_MARGIN = 12
|
||
_UI_RIGHT_MARGIN = 12
|
||
UNIFIED_FONT_PX = 12 # 统一字号
|
||
|
||
|
||
def ui_reset_overlay(img_h: int):
|
||
"""每帧调用一次,重置叠加行计数(改为右上角布局)。"""
|
||
global _UI_LINE, _UI_H, _UI_TR_LINE
|
||
_UI_LINE = 0
|
||
_UI_TR_LINE = 0
|
||
_UI_H = int(img_h)
|
||
|
||
|
||
def _ui_next_y_top(font_size: int) -> int:
|
||
"""返回右上角下一行的y(顶部对齐),并推进行计数。"""
|
||
global _UI_TR_LINE
|
||
line_gap = max(4, int(font_size * 0.25))
|
||
y_top = _UI_TOP_MARGIN + (_UI_TR_LINE * (font_size + line_gap))
|
||
_UI_TR_LINE += 1
|
||
return y_top
|
||
|
||
|
||
def set_current_command(text: str):
|
||
global CURRENT_COMMAND_TEXT
|
||
try:
|
||
CURRENT_COMMAND_TEXT = str(text) if text else "—"
|
||
except Exception:
|
||
CURRENT_COMMAND_TEXT = "—"
|
||
|
||
|
||
def draw_command_pill(img_bgr: np.ndarray, label: str):
|
||
"""统一改为右上角白色文案。不再绘制底部圆角按钮。"""
|
||
text_prefix = "当前指令:"
|
||
full_text = f"{text_prefix}{label if label else '—'}"
|
||
# 直接用统一文本渲染
|
||
draw_text_cn(img_bgr, full_text, (0, 0), font_size=UNIFIED_FONT_PX, color=(255,255,255), ui_hint=True)
|
||
|
||
try:
|
||
from yoloe_backend import YoloEBackend
|
||
_YOLOE_READY = True
|
||
except Exception as e:
|
||
_YOLOE_READY = False
|
||
print(f"[DETECTOR] YOLOE backend not ready: {e}", flush=True)
|
||
|
||
# ========= 路径参数(按需修改)=========
|
||
YOLO_MODEL_PATH = 'model/shoppingbest5.pt'
|
||
HAND_TASK_PATH = 'model/hand_landmarker.task'
|
||
|
||
# ========= 摄像头 =========
|
||
CAM_INDEX = 0
|
||
INPUT_W, INPUT_H = 600, 480
|
||
|
||
# ========= 分割显示 =========
|
||
STROKE_WIDTH = 5 # 增加描边宽度,让黄框和绿框更粗
|
||
MASK_ALPHA = 0.45
|
||
CONF_THRESHOLD = 0.20
|
||
|
||
# —— 单 prompt 识别(只显示一个类)——
|
||
PROMPT_NAME = "AD_milk"
|
||
PROMPT_STRICT = True
|
||
|
||
# ========= 对齐条参数 =========
|
||
ALIGN_LOOSE_PCT = 0.12 # 归一化距离阈(相对画面对角线)
|
||
|
||
# ========= 距离条参数(本版采用"ratio≈1"为目标)=========
|
||
RATIO_IDEAL = 1.0 # 理想值:物体面积/手面积 ≈ 1
|
||
RATIO_TOL = 0.25 # 容许偏离:±25% 内认为距离合适
|
||
|
||
# ========= 语音播报 =========
|
||
TTS_INTERVAL_SEC = 1.0
|
||
ENABLE_TTS = True
|
||
|
||
# ========= 光流(LK)与特征点 =========
|
||
LK_PARAMS = dict(winSize=(21, 21),
|
||
maxLevel=3,
|
||
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 12, 0.03))
|
||
FEATURE_PARAMS = dict(maxCorners=600,
|
||
qualityLevel=0.001,
|
||
minDistance=5,
|
||
blockSize=7)
|
||
|
||
# ========= 关键参数:内收与周边监控 =========
|
||
INNER_OFFSET_PX_LOCK = 5 # Enter 锁定:掩码腐蚀像素,保证点在物体内部
|
||
EDGE_DILATE_PX = 2 # 取内边界后小膨胀,利于提点
|
||
PERI_MONITOR_PX = 40 # TRACK:监控多边形外扩 40px 的周边带
|
||
PERI_CHECK_EVERY = 5 # 每隔 N 帧做一次周边分割检查,改为每帧
|
||
|
||
# ========= 轮廓精度参数 =========
|
||
CONTOUR_EPSILON_FACTOR = 0.002 # Douglas-Peucker算法的精度因子,越小越精细
|
||
TRACK_EPSILON_FACTOR = 0.003 # 追踪模式下的轮廓精度因子
|
||
|
||
# ========= YOLO实时矫正参数 =========
|
||
YOLO_CORRECTION_IOU_THRESHOLD = 0.2 # IoU阈值,越低越积极矫正
|
||
YOLO_CORRECTION_CONF_THRESHOLD = 0.15 # 置信度阈值,越低检测越敏感
|
||
|
||
# ========= 方向引导音频路径 =========
|
||
AUDIO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "music") # 相对路径
|
||
AUDIO_FILES = {
|
||
"向上": os.path.join(AUDIO_DIR, "向上.wav"),
|
||
"向下": os.path.join(AUDIO_DIR, "向下.wav"),
|
||
"向左": os.path.join(AUDIO_DIR, "向左.wav"),
|
||
"向右": os.path.join(AUDIO_DIR, "向右.wav"),
|
||
"向前": os.path.join(AUDIO_DIR, "向前.wav"),
|
||
"后退": os.path.join(AUDIO_DIR, "向后.wav"),
|
||
"OK": os.path.join(AUDIO_DIR, "已对中.wav"),
|
||
}
|
||
GUIDANCE_INTERVAL_SEC = 1.5 # 引导播报间隔
|
||
|
||
# 初始化pygame音频
|
||
pygame.mixer.init()
|
||
|
||
# ========= 窗口 =========
|
||
WINDOW = "YOLO Seg + Flow Polygon (Peri-Relock) (Grab Guidance)"
|
||
|
||
# ======== MediaPipe 别名 ========
|
||
BaseOptions = mp.tasks.BaseOptions
|
||
VisionRunningMode = mp.tasks.vision.RunningMode
|
||
HandLandmarker = mp.tasks.vision.HandLandmarker
|
||
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
|
||
HAND_CONNECTIONS = mp.solutions.hands.HAND_CONNECTIONS
|
||
|
||
# ======== HandLandmarker 回调缓存 ========
|
||
_last_result = None # (result, timestamp_ms)
|
||
|
||
def on_result(result: mp.tasks.vision.HandLandmarkerResult,
|
||
output_image: mp.Image, timestamp_ms: int):
|
||
global _last_result
|
||
_last_result = (result, timestamp_ms)
|
||
|
||
def _to_proto(hand_lms) -> landmark_pb2.NormalizedLandmarkList:
|
||
proto = landmark_pb2.NormalizedLandmarkList()
|
||
proto.landmark.extend([
|
||
landmark_pb2.NormalizedLandmark(x=p.x, y=p.y, z=p.z) for p in hand_lms
|
||
])
|
||
return proto
|
||
|
||
# —— 手骨架单色渲染 —— #
|
||
def draw_hands_mono(img_bgr, hand_lms, color=(0, 255, 255), r=2, t=2):
|
||
mp_drawing = mp.solutions.drawing_utils
|
||
landmark_spec = mp_drawing.DrawingSpec(color=color, thickness=-1, circle_radius=r)
|
||
connection_spec = mp_drawing.DrawingSpec(color=color, thickness=t, circle_radius=r)
|
||
if hasattr(hand_lms, "landmark"):
|
||
proto = hand_lms
|
||
else:
|
||
proto = _to_proto(hand_lms)
|
||
mp_drawing.draw_landmarks(
|
||
img_bgr,
|
||
landmark_list=proto,
|
||
connections=HAND_CONNECTIONS,
|
||
landmark_drawing_spec=landmark_spec,
|
||
connection_drawing_spec=connection_spec,
|
||
)
|
||
|
||
def norm_name(s: str) -> str:
|
||
return "".join(str(s).lower().split())
|
||
|
||
# ======== TTS(pyttsx3)========
|
||
class Speaker:
|
||
def __init__(self, enable=True):
|
||
self.enable = enable
|
||
self._engine = None
|
||
self._lock = threading.Lock()
|
||
if enable:
|
||
try:
|
||
import pyttsx3
|
||
self._engine = pyttsx3.init()
|
||
self._engine.setProperty('rate', 190)
|
||
self._engine.setProperty('volume', 1.0)
|
||
except Exception:
|
||
self._engine = None
|
||
self.enable = False
|
||
|
||
def say_async(self, text: str):
|
||
if not self.enable or not text:
|
||
return
|
||
def _run():
|
||
try:
|
||
with self._lock:
|
||
self._engine.stop()
|
||
self._engine.say(text)
|
||
self._engine.iterate()
|
||
t0 = time.time()
|
||
while self._engine.isBusy() and (time.time() - t0) < 1.2:
|
||
self._engine.iterate()
|
||
time.sleep(0.01)
|
||
except Exception:
|
||
pass
|
||
threading.Thread(target=_run, daemon=True).start()
|
||
|
||
# ======== 中文文本绘制(优先 Pillow)========
|
||
_PIL_OK = False
|
||
_FONT_PATH = None
|
||
def _init_font():
|
||
global _PIL_OK, _FONT_PATH
|
||
try:
|
||
from PIL import ImageFont # noqa
|
||
_PIL_OK = True
|
||
except Exception:
|
||
_PIL_OK = False
|
||
return
|
||
candidates = [
|
||
# Linux 中文字体路径 (Ubuntu/Debian)
|
||
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
||
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
||
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
||
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
||
"/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
|
||
]
|
||
for p in candidates:
|
||
if os.path.exists(p):
|
||
_FONT_PATH = p
|
||
return
|
||
_PIL_OK = False
|
||
_init_font()
|
||
|
||
def draw_text_cn(img_bgr, text, xy, font_size=20, color=(255,255,255), stroke=None, ui_hint=True):
|
||
"""
|
||
统一的文本绘制:
|
||
- 默认采用前端风格:小字体、左下角按行堆叠(ui_hint=True)。
|
||
- 若 ui_hint=False 则按传入 xy 精确定位(用于贴近目标的小标注)。
|
||
"""
|
||
# 统一样式:微软雅黑 + 固定字号 + 纯白
|
||
color = (255, 255, 255)
|
||
font_size = int(UNIFIED_FONT_PX)
|
||
|
||
H, W = img_bgr.shape[:2]
|
||
# 右上角堆叠布局:计算y顶边,并按文本宽度右对齐
|
||
y_top = _ui_next_y_top(font_size) if ui_hint else _ui_next_y_top(font_size)
|
||
# 先估算文本尺寸
|
||
tw = th = 0
|
||
font_obj = None
|
||
|
||
if _PIL_OK and _FONT_PATH:
|
||
try:
|
||
from PIL import Image, ImageDraw, ImageFont
|
||
font_obj = ImageFont.truetype(_FONT_PATH, font_size)
|
||
# 计算文本尺寸
|
||
bbox = ImageDraw.Draw(Image.new('RGB', (1,1))).textbbox((0,0), text, font=font_obj)
|
||
tw = max(1, bbox[2] - bbox[0])
|
||
th = max(1, bbox[3] - bbox[1])
|
||
except Exception:
|
||
pass
|
||
if _PIL_OK and _FONT_PATH and font_obj is not None:
|
||
try:
|
||
from PIL import Image, ImageDraw
|
||
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
|
||
pil_img = Image.fromarray(img_rgb)
|
||
draw = ImageDraw.Draw(pil_img)
|
||
x = max(8, W - _UI_RIGHT_MARGIN - tw)
|
||
y = y_top
|
||
draw.text((x, y), text, fill=(255,255,255), font=font_obj)
|
||
img_bgr[:] = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR)
|
||
return
|
||
except Exception:
|
||
pass
|
||
# OpenCV 回退:估算尺寸并右对齐
|
||
if tw <= 0 or th <= 0:
|
||
scale = font_size/24.0
|
||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, scale, 2)
|
||
x = max(8, W - _UI_RIGHT_MARGIN - int(tw))
|
||
y_baseline = int(y_top + th)
|
||
cv2.putText(img_bgr, text, (x, y_baseline), cv2.FONT_HERSHEY_SIMPLEX, font_size/24.0, color, 2, cv2.LINE_AA)
|
||
|
||
# ======== 工具函数 ========
|
||
def clamp01(x): return max(0.0, min(1.0, x))
|
||
|
||
def draw_progress_bars(vis, align_score, range_score):
|
||
"""第一条=对齐,第二条=距离(≈1),对应 ratio 与 1 的接近程度"""
|
||
H, W = vis.shape[:2]
|
||
bar_w = int(W * 0.28)
|
||
bar_h = 12
|
||
gap = 8
|
||
x0 = 12
|
||
y0 = H - 2*bar_h - gap - 12
|
||
# 背景
|
||
cv2.rectangle(vis, (x0, y0), (x0 + bar_w, y0 + bar_h), (50, 50, 50), -1)
|
||
cv2.rectangle(vis, (x0, y0 + bar_h + gap), (x0 + bar_w, y0 + 2*bar_h + gap), (50, 50, 50), -1)
|
||
# 填充
|
||
cv2.rectangle(vis, (x0, y0), (x0 + int(bar_w * clamp01(align_score)), y0 + bar_h), (0, 220, 0), -1)
|
||
cv2.rectangle(vis, (x0, y0 + bar_h + gap), (x0 + int(bar_w * clamp01(range_score)), y0 + 2*bar_h + gap), (0, 180, 255), -1)
|
||
draw_text_cn(vis, "对齐", (x0, y0 - 18), font_size=18, color=(180,180,180))
|
||
draw_text_cn(vis, "距离(≈1)", (x0, y0 + bar_h + gap - 18), font_size=18, color=(180,180,180))
|
||
|
||
def polygon_center_and_area(poly):
|
||
if poly is None or len(poly) < 3:
|
||
return None, 0.0
|
||
poly = np.array(poly, dtype=np.float32)
|
||
M = cv2.moments(poly)
|
||
if abs(M["m00"]) < 1e-6:
|
||
c = np.mean(poly, axis=0)
|
||
return (float(c[0]), float(c[1])), 0.0
|
||
cx = float(M["m10"] / M["m00"])
|
||
cy = float(M["m01"] / M["m00"])
|
||
area = float(cv2.contourArea(poly.astype(np.int32)))
|
||
return (cx, cy), area
|
||
|
||
def hand_bbox_and_area(lms, W, H):
|
||
xs = [int(p.x * W) for p in lms]
|
||
ys = [int(p.y * H) for p in lms]
|
||
if not xs or not ys:
|
||
return None, 0.0
|
||
x0, y0, x1, y1 = min(xs), min(ys), max(xs), max(ys)
|
||
w = max(1, x1 - x0)
|
||
h = max(1, y1 - y0)
|
||
area = float(w * h)
|
||
return (x0, y0, w, h), area
|
||
|
||
# ======== 手势:握持(Grasp) 识别(放宽版启发式)========
|
||
THUMB_INDEX_CLOSE = 0.34 # 放宽
|
||
FINGERTIP_NEAR = 0.44 # 放宽
|
||
MIN_CURLED_COUNT = 1 # 放宽
|
||
|
||
def detect_grasp(hand_lms, W, H):
|
||
box, _ = hand_bbox_and_area(hand_lms, W, H)
|
||
if not box:
|
||
return False, 0.0
|
||
x0, y0, w0, h0 = box
|
||
hand_diag = float(np.hypot(w0, h0)) + 1e-6
|
||
palm_idx = [0, 5, 9, 13, 17]
|
||
px = np.mean([hand_lms[i].x * W for i in palm_idx])
|
||
py = np.mean([hand_lms[i].y * H for i in palm_idx])
|
||
palm = np.array([px, py], dtype=np.float32)
|
||
t4 = np.array([hand_lms[4].x * W, hand_lms[4].y * H], dtype=np.float32)
|
||
t8 = np.array([hand_lms[8].x * W, hand_lms[8].y * H], dtype=np.float32)
|
||
thumb_index_dist = float(np.linalg.norm(t4 - t8)) / hand_diag
|
||
tips = [12, 16, 20]
|
||
dists = []
|
||
for i in tips:
|
||
ti = np.array([hand_lms[i].x * W, hand_lms[i].y * H], dtype=np.float32)
|
||
dists.append(float(np.linalg.norm(ti - palm)) / hand_diag)
|
||
curled_cnt = sum(1 for d in dists if d < FINGERTIP_NEAR)
|
||
cond1 = (thumb_index_dist < THUMB_INDEX_CLOSE)
|
||
cond2 = (curled_cnt >= MIN_CURLED_COUNT)
|
||
score = 0.5 * (1.0 - min(thumb_index_dist / THUMB_INDEX_CLOSE, 1.0)) + \
|
||
0.5 * min(curled_cnt / 3.0, 1.0)
|
||
return (cond1 and cond2), score
|
||
|
||
# ======== 内收后的边界提点 ========
|
||
def inner_offset_edge(mask_bin, offset_px=5, edge_dilate_px=2):
|
||
if offset_px > 0:
|
||
k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*offset_px+1, 2*offset_px+1))
|
||
eroded = cv2.erode(mask_bin.astype(np.uint8), k, iterations=1)
|
||
else:
|
||
eroded = mask_bin.astype(np.uint8)
|
||
edges = cv2.Canny(eroded*255, 50, 150)
|
||
if edge_dilate_px > 0:
|
||
k2 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*edge_dilate_px+1, 2*edge_dilate_px+1))
|
||
edges = cv2.dilate(edges, k2, iterations=1)
|
||
return edges # uint8 0/255
|
||
|
||
# ======== YOLO 分割:全帧或 ROI 内选择最佳 mask ========
|
||
def find_best_mask(frame_bgr, yolo, W, H, target_cls_id, conf_thr=0.10, roi_rect=None):
|
||
results = yolo(frame_bgr, verbose=False)
|
||
best_mask = None
|
||
best_score = 0.0
|
||
if results and results[0].masks is not None:
|
||
r0 = results[0]
|
||
for mask_t, conf_t, cls_t in zip(r0.masks.data, r0.boxes.conf, r0.boxes.cls):
|
||
cls_id = int(cls_t.item())
|
||
conf_value = float(conf_t.item())
|
||
if target_cls_id is not None and cls_id != target_cls_id:
|
||
continue
|
||
if conf_value < conf_thr:
|
||
continue
|
||
mask_np = mask_t.detach().cpu().numpy()
|
||
mask_rz = cv2.resize(mask_np, (W, H), interpolation=cv2.INTER_LINEAR)
|
||
mask_bin = (mask_rz > 0.5).astype(np.uint8)
|
||
|
||
if roi_rect is not None:
|
||
x0, y0, x1, y1 = roi_rect
|
||
x0, y0 = max(0, x0), max(0, y0)
|
||
x1, y1 = min(W-1, x1), min(H-1, y1)
|
||
roi = np.zeros_like(mask_bin, dtype=np.uint8)
|
||
roi[y0:y1+1, x0:x1+1] = 1
|
||
overlap = (mask_bin & roi).sum()
|
||
score = float(overlap)
|
||
else:
|
||
score = float(mask_bin.sum())
|
||
|
||
if score > best_score:
|
||
best_score = score
|
||
best_mask = mask_bin
|
||
return best_mask
|
||
|
||
# ======== 工程化:测距箭头(端点定位线 + 箭头 + 像素值)========
|
||
def draw_measure_arrow(img, p1, p2, txt=None):
|
||
p1 = (int(p1[0]), int(p1[1]))
|
||
p2 = (int(p2[0]), int(p2[1]))
|
||
# 端点定位线
|
||
def end_cap(pt, size=8, color=(255,255,255), t=1):
|
||
x, y = pt
|
||
cv2.line(img, (x - size, y), (x + size, y), color, t, cv2.LINE_AA)
|
||
cv2.line(img, (x, y - size), (x, y + size), color, t, cv2.LINE_AA)
|
||
end_cap(p1, size=7, color=(255,255,255), t=1)
|
||
end_cap(p2, size=7, color=(255,255,255), t=1)
|
||
# 箭头
|
||
cv2.arrowedLine(img, p1, p2, (255,255,255), 2, cv2.LINE_AA, tipLength=0.18)
|
||
# 文本
|
||
if txt is None:
|
||
d = int(np.hypot(p2[0]-p1[0], p2[1]-p1[1]))
|
||
txt = f"{d}px"
|
||
mid = ((p1[0]+p2[0])//2, (p1[1]+p2[1])//2)
|
||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||
fs, th = 0.6, 2
|
||
(tw, th_text), _ = cv2.getTextSize(txt, font, fs, th)
|
||
pad = 4
|
||
x0 = mid[0] - tw//2 - pad
|
||
y0 = mid[1] - th_text - 6
|
||
x1 = mid[0] + tw//2 + pad
|
||
y1 = mid[1] + 6
|
||
cv2.rectangle(img, (x0, y0), (x1, y1), (32,32,32), -1)
|
||
cv2.putText(img, txt, (x0+pad, y1-6), font, fs, (255,255,255), th, cv2.LINE_AA)
|
||
|
||
# 添加绘制虚线的函数
|
||
def draw_dashed_line(img, pt1, pt2, color=(255, 255, 255), thickness=2, dash_length=10, gap_length=5):
|
||
"""绘制虚线"""
|
||
pt1 = np.array(pt1, dtype=np.float32)
|
||
pt2 = np.array(pt2, dtype=np.float32)
|
||
line_vec = pt2 - pt1
|
||
line_len = np.linalg.norm(line_vec)
|
||
if line_len < 1:
|
||
return
|
||
|
||
line_vec = line_vec / line_len # 单位向量
|
||
|
||
# 绘制虚线段
|
||
current_pos = 0
|
||
while current_pos < line_len:
|
||
start_pos = current_pos
|
||
end_pos = min(current_pos + dash_length, line_len)
|
||
|
||
start_pt = pt1 + line_vec * start_pos
|
||
end_pt = pt1 + line_vec * end_pos
|
||
|
||
cv2.line(img, tuple(start_pt.astype(int)), tuple(end_pt.astype(int)), color, thickness)
|
||
|
||
current_pos += dash_length + gap_length
|
||
|
||
# 添加绘制手部轮廓的函数
|
||
def draw_hand_contour(img, hand_lms, W, H, color=(255, 255, 255), thickness=1):
|
||
"""绘制手部landmarks的凸包轮廓"""
|
||
# 获取所有手部关键点
|
||
points = []
|
||
for lm in hand_lms:
|
||
x = int(lm.x * W)
|
||
y = int(lm.y * H)
|
||
points.append([x, y])
|
||
|
||
if len(points) > 3:
|
||
points = np.array(points, dtype=np.int32)
|
||
# 计算凸包
|
||
hull = cv2.convexHull(points)
|
||
# 绘制凸包轮廓
|
||
cv2.polylines(img, [hull], True, color, thickness)
|
||
|
||
# 检测手和物体是否接触
|
||
def check_hand_object_contact(hand_box, poly, overlap_threshold=0.15):
|
||
"""
|
||
检测手的边界框和物体多边形是否有重叠
|
||
返回: (是否接触, 重叠比例)
|
||
"""
|
||
if hand_box is None or poly is None or len(poly) < 3:
|
||
return False, 0.0
|
||
|
||
# 获取手的边界框
|
||
hx, hy, hw, hh = hand_box
|
||
hand_rect = np.array([
|
||
[hx, hy],
|
||
[hx + hw, hy],
|
||
[hx + hw, hy + hh],
|
||
[hx, hy + hh]
|
||
], dtype=np.int32)
|
||
|
||
# 创建掩码来计算重叠
|
||
H = int(max(hy + hh, np.max(poly[:, 1])) + 10)
|
||
W = int(max(hx + hw, np.max(poly[:, 0])) + 10)
|
||
|
||
hand_mask = np.zeros((H, W), dtype=np.uint8)
|
||
cv2.fillPoly(hand_mask, [hand_rect], 1)
|
||
|
||
obj_mask = np.zeros((H, W), dtype=np.uint8)
|
||
cv2.fillPoly(obj_mask, [poly.astype(np.int32)], 1)
|
||
|
||
# 计算重叠
|
||
intersection = np.logical_and(hand_mask, obj_mask).sum()
|
||
hand_area = hand_mask.sum()
|
||
|
||
# 重叠比例(相对于手的面积)
|
||
overlap_ratio = intersection / max(1.0, hand_area)
|
||
|
||
return overlap_ratio > overlap_threshold, overlap_ratio
|
||
|
||
# 添加方向判断函数
|
||
def get_guidance_direction(hand_center, object_center, hand_area, object_area, hand_box=None, poly=None):
|
||
"""
|
||
根据手心和物体中心位置,以及面积比,返回引导方向
|
||
返回: (方向文字, 是否需要前后调整)
|
||
"""
|
||
if hand_center is None or object_center is None:
|
||
return None, None
|
||
|
||
# 首先检查手和物体是否接触
|
||
is_touching = False
|
||
overlap_ratio = 0.0
|
||
if hand_box is not None and poly is not None:
|
||
is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
|
||
|
||
hx, hy = hand_center
|
||
ox, oy = object_center
|
||
|
||
# 计算水平和垂直偏差
|
||
dx = ox - hx # 正数表示物体在右边
|
||
dy = oy - hy # 正数表示物体在下边
|
||
|
||
# 如果手和物体已经接触,直接返回"向前"
|
||
if is_touching:
|
||
return "向前", f"接触度: {overlap_ratio:.1%}"
|
||
|
||
# 如果没有接触,引导上下左右
|
||
# 判断主要方向
|
||
h_threshold = 30 # 水平偏差阈值(像素)
|
||
v_threshold = 30 # 垂直偏差阈值(像素)
|
||
|
||
h_dir = None
|
||
v_dir = None
|
||
|
||
# 水平方向
|
||
if abs(dx) > h_threshold:
|
||
h_dir = "向右" if dx > 0 else "向左"
|
||
|
||
# 垂直方向
|
||
if abs(dy) > v_threshold:
|
||
v_dir = "向下" if dy > 0 else "向上"
|
||
|
||
# 选择偏移最大的方向
|
||
if abs(dx) > abs(dy) and h_dir:
|
||
# 水平偏移更大
|
||
return h_dir, v_dir
|
||
elif v_dir:
|
||
# 垂直偏移更大或相等
|
||
return v_dir, h_dir
|
||
else:
|
||
# 已经在中心附近但还没接触,提示靠近
|
||
distance = np.sqrt(dx**2 + dy**2)
|
||
if distance < 50: # 很近但还没接触
|
||
return "向前", "请缓慢靠近"
|
||
else:
|
||
return "保持", None
|
||
|
||
# 播放音频的函数
|
||
def play_guidance_audio(direction):
|
||
"""播放方向引导音频"""
|
||
# 直接调用新的音频播放函数
|
||
play_audio_threadsafe(direction)
|
||
# 同步更新底部按钮的指令文本
|
||
try:
|
||
if isinstance(direction, str) and direction.strip():
|
||
set_current_command(direction.strip())
|
||
except Exception:
|
||
pass
|
||
|
||
# 添加居中判断函数
|
||
def get_center_guidance(object_center, frame_center, threshold=30):
|
||
"""
|
||
判断物体是否在画面中心,返回引导方向
|
||
返回: (方向文字, 是否已居中)
|
||
"""
|
||
if object_center is None:
|
||
return None, False
|
||
|
||
ox, oy = object_center
|
||
cx, cy = frame_center
|
||
|
||
dx = cx - ox # 正数表示需要向右移动
|
||
dy = cy - oy # 正数表示需要向下移动
|
||
|
||
# 判断是否已经居中
|
||
distance = np.sqrt(dx**2 + dy**2)
|
||
if distance < threshold:
|
||
return "已居中", True
|
||
|
||
# 判断主要方向(对调左右和上下)
|
||
if abs(dx) > abs(dy):
|
||
return "向左" if dx > 0 else "向右", False # 对调了
|
||
else:
|
||
return "向上" if dy > 0 else "向下", False # 对调了
|
||
|
||
def main(headless: bool = False, prompt_name: str = None, stop_event=None):
|
||
|
||
# OpenCV 优化
|
||
try:
|
||
import cv2
|
||
cv2.setUseOptimized(True)
|
||
cv2.setNumThreads(2) # 视 CPU 核心数而定;树莓派类设备可设 1
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
|
||
|
||
# 如果传入了 prompt_name,使用它替换全局的 PROMPT_NAME
|
||
global PROMPT_NAME
|
||
if prompt_name:
|
||
PROMPT_NAME = prompt_name
|
||
print(f"[YOLOMEDIA] Using dynamic prompt: {PROMPT_NAME}")
|
||
|
||
speaker = Speaker(ENABLE_TTS)
|
||
last_tts_ts = 0.0
|
||
MODE = "SEGMENT" # 模式:SEGMENT -> FLASH -> CENTER_GUIDE -> TRACK
|
||
colors = Colors()
|
||
|
||
FRAME_IDX = 0
|
||
last_mask = None # 上一帧"目标掩膜"(用于 IoU 降噪)
|
||
flow_mask = None # 光流外推得到的掩膜(你现有代码里会更新它)
|
||
flow_grace = 0 # YOLOE 丢检后,允许光流顶住的计数
|
||
last_seen_ts = 0.0 # 最近一次 YOLOE 成功检测的时间戳
|
||
locked_id = None # (可选)若你在 tracker 里记录了 id,可在下面选择相同 id
|
||
# 刷新/容错参数(可按需微调)
|
||
REDETECT_EVERY = 5 # 每 5 帧强制"信任 YOLOE 一次"
|
||
FLOW_GRACE_MAX = 8 # YOLOE 连续丢检时,光流最多顶 8 帧
|
||
IOU_MIN_KEEP = 0.20 # 新/旧掩膜 IoU 太低时,用平滑合成,避免闪烁
|
||
|
||
|
||
|
||
print("[INIT] 加载 YOLO 模型...")
|
||
# NOTE: shoppingbest 不再用于找东西流程;如其他模式仍需,可保留 yolo = YOLO(...) 但不在本流程使用
|
||
# yolo = YOLO(YOLO_MODEL_PATH)
|
||
|
||
# —— 直接启用 YOLOE 文本提示后端(不再先查 shoppingbest)——
|
||
use_yoloe = False
|
||
yoloe_backend = None
|
||
if _YOLOE_READY:
|
||
try:
|
||
yoloe_backend = YoloEBackend() # 可用 YOLOE_MODEL_PATH 环境变量指定模型
|
||
yoloe_backend.set_text_classes([PROMPT_NAME]) # 文本类别
|
||
use_yoloe = True
|
||
print(f"[DETECTOR] YOLOE text-prompt backend enabled for: {PROMPT_NAME}", flush=True)
|
||
except Exception as e:
|
||
print(f"[DETECTOR] YOLOE init failed: {e}", flush=True)
|
||
else:
|
||
print("[DETECTOR] YOLOE backend not ready (import failed)", flush=True)
|
||
|
||
# 类名映射(YOLOE 模式下简化)
|
||
if use_yoloe:
|
||
# YOLOE 模式下,只有一个目标类
|
||
id_to_name = {0: PROMPT_NAME}
|
||
name_to_id = {norm_name(PROMPT_NAME): 0}
|
||
target_cls_id = 0
|
||
else:
|
||
# 如果将来需要支持传统 YOLO,可以在这里初始化
|
||
id_to_name = {}
|
||
name_to_id = {}
|
||
target_cls_id = None
|
||
|
||
# 目标类已在上面的 YOLOE 模式中设置
|
||
|
||
print(f"[CLASS] target id={target_cls_id}, name={id_to_name.get(target_cls_id, 'N/A')}")
|
||
print(f"[阈值] conf >= {CONF_THRESHOLD:.2f}")
|
||
|
||
# Hand Landmarker
|
||
print("[INIT] 初始化 Hand Landmarker...")
|
||
base = BaseOptions(model_asset_path=HAND_TASK_PATH)
|
||
hand_options = HandLandmarkerOptions(
|
||
base_options=base,
|
||
running_mode=VisionRunningMode.LIVE_STREAM,
|
||
num_hands=1,
|
||
min_hand_detection_confidence=0.40,
|
||
min_hand_presence_confidence=0.50,
|
||
min_tracking_confidence=0.70,
|
||
result_callback=on_result
|
||
)
|
||
landmarker = HandLandmarker.create_from_options(hand_options)
|
||
|
||
W = None
|
||
H = None
|
||
print("[Bridge] 等待 ESP32 画面 ...")
|
||
|
||
# [headless] 仅在非 headless 时创建窗口(原逻辑保留,外层加判断)
|
||
if not headless:
|
||
cv2.namedWindow(WINDOW, cv2.WINDOW_NORMAL)
|
||
|
||
# 光流缓存
|
||
old_gray = None
|
||
p0 = None
|
||
lock_edge_debug = None # 调试可视化:内边界
|
||
track_frame_count = 0 # 控制周边监控频率
|
||
last_poly_box = None # 当前多边形外接矩形
|
||
|
||
fps_hist = []
|
||
|
||
# 添加自动锁定相关变量
|
||
auto_lock_start_time = None # 开始检测到物体的时间
|
||
auto_lock_delay = 1.0 # 1秒后自动锁定
|
||
last_detected_mask = None # 最后检测到的mask
|
||
|
||
# 添加闪烁动画相关变量
|
||
flash_start_time = None # 闪烁开始时间
|
||
flash_duration = 1.0 # 闪烁持续时间(秒)
|
||
flash_frequency = 1 # 闪烁频率(Hz) - 只闪一次
|
||
flash_mask = None # 用于闪烁的mask
|
||
flash_color = (0, 255, 255) # 闪烁颜色(黄色)
|
||
|
||
# 添加引导相关变量
|
||
last_guidance_time = 0
|
||
last_guidance_direction = None
|
||
|
||
# 添加居中引导相关变量
|
||
center_guide_mask = None # 用于居中引导的mask
|
||
center_guide_start = None # 居中引导开始时间
|
||
center_threshold = 30 # 居中判定阈值(像素)
|
||
last_center_guide_time = 0 # 上次居中引导语音时间
|
||
center_reached = False # 是否已经到达中心
|
||
|
||
# 添加抓取跟踪相关变量
|
||
grasp_tracking_frames = [] # 存储最近的手和物体位置
|
||
grasp_tracking_duration = 1.0 # 需要持续1秒
|
||
grasp_movement_threshold = 10 # 最小移动像素阈值(提高阈值)
|
||
grasp_detected = False # 是否已经检测到抓取
|
||
grasp_start_time = None # 开始检测到协同移动的时间
|
||
|
||
# 背景参考点(用于检测相机移动) - 移到这里初始化
|
||
background_points = None
|
||
old_background_gray = None
|
||
|
||
try:
|
||
while True:
|
||
# 检查停止事件
|
||
if stop_event and stop_event.is_set():
|
||
print("[YOLOMEDIA] Stop event detected, exiting...")
|
||
break
|
||
|
||
frame = bridge_io.wait_raw_bgr(timeout_sec=0.5)
|
||
if frame is None:
|
||
# 没取到帧就继续等(ESP32还没连上或暂时无新帧)
|
||
# [headless] 给出 1ms 让出调度,避免空转
|
||
if headless:
|
||
cv2.waitKey(1)
|
||
continue
|
||
|
||
# 每帧重置 UI 文字叠加到左下角
|
||
H, W = frame.shape[:2]
|
||
ui_reset_overlay(H)
|
||
|
||
vis = frame.copy()
|
||
t_now = time.time()
|
||
|
||
# 抽帧 + 降采样(人手识别)
|
||
if FRAME_IDX % HAND_FPS_DIV == 0:
|
||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||
if HAND_DOWNSCALE and HAND_DOWNSCALE != 1.0:
|
||
small = cv2.resize(rgb, None, fx=HAND_DOWNSCALE, fy=HAND_DOWNSCALE, interpolation=cv2.INTER_AREA)
|
||
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=small)
|
||
else:
|
||
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
|
||
landmarker.detect_async(mp_image, int(t_now * 1000))
|
||
# 否则跳过,复用上一次 _last_result;Landmarker 会自己做 tracking
|
||
|
||
|
||
# 取手心、手框、握持(放宽版)
|
||
hand_center = None
|
||
hand_area = None
|
||
hand_box = None
|
||
grasp_now = False
|
||
grasp_score = 0.0
|
||
if _last_result is not None:
|
||
res, _ = _last_result
|
||
if res.hand_landmarks and len(res.hand_landmarks) > 0:
|
||
l0 = res.hand_landmarks[0]
|
||
|
||
# 绘制手部骨骼
|
||
draw_hands_mono(vis, l0, color=(0, 255, 255), r=2, t=2)
|
||
|
||
# 绘制手部轮廓(替代矩形框)
|
||
draw_hand_contour(vis, l0, W, H, color=(255, 255, 255), thickness=1)
|
||
|
||
xs = [p.x * W for p in l0]
|
||
ys = [p.y * H for p in l0]
|
||
hand_center = (float(sum(xs)/len(xs)), float(sum(ys)/len(ys)))
|
||
hand_box, hand_area = hand_bbox_and_area(l0, W, H)
|
||
# 注释掉矩形框绘制
|
||
# if hand_box:
|
||
# x0, y0, w0, h0 = hand_box
|
||
# cv2.rectangle(vis, (x0, y0), (x0+w0, y0+h0), (0,255,255), 1)
|
||
grasp_now, grasp_score = detect_grasp(l0, W, H)
|
||
draw_text_cn(vis, f"握持评分: {grasp_score:.2f}", (10, 70), font_size=18, color=(0, 180, 255))
|
||
|
||
|
||
if MODE == "SEGMENT":
|
||
# —— 仅 YOLOE:每帧文本提示分割 + 取最大目标(删掉 shoppingbest 与重复 YOLOE 段)——
|
||
FRAME_IDX += 1
|
||
candidate_masks = []
|
||
detected_object = False
|
||
|
||
if use_yoloe and yoloe_backend is not None:
|
||
# 每帧都跑;persist=True 便于维持目标 ID
|
||
det = yoloe_backend.segment(frame, conf=0.20, iou=0.45, persist=True)
|
||
H, W = frame.shape[:2]
|
||
|
||
# 选一个掩膜:优先与 locked_id 相同;否则面积最大
|
||
chosen_idx = None
|
||
if det["masks"]:
|
||
if locked_id is not None and det["ids"] and (locked_id in det["ids"]):
|
||
chosen_idx = det["ids"].index(locked_id)
|
||
else:
|
||
areas = [int(m.sum()) for m in det["masks"]]
|
||
chosen_idx = int(np.argmax(areas))
|
||
|
||
if chosen_idx is not None:
|
||
m = det["masks"][chosen_idx]
|
||
if m.shape[:2] != (H, W):
|
||
m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
|
||
|
||
mask_bin = (m > 0).astype(np.uint8)
|
||
candidate_masks.append({
|
||
"mask": mask_bin,
|
||
"area": int(mask_bin.sum()),
|
||
"name": PROMPT_NAME,
|
||
"cls_id": 0,
|
||
"conf": 0.99,
|
||
})
|
||
detected_object = True
|
||
|
||
# 简单可视化(半透明叠层 + 轮廓),不影响你后面的逻辑
|
||
colored = np.zeros_like(frame, dtype=np.uint8)
|
||
colored[mask_bin == 1] = (0, 255, 255)
|
||
vis = cv2.addWeighted(vis, 1.0, colored, MASK_ALPHA, 0)
|
||
contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
# 选择最大轮廓并进行适度平滑
|
||
largest_contour = max(contours, key=cv2.contourArea)
|
||
# 使用Douglas-Peucker算法适度简化,保持更多细节
|
||
epsilon = CONTOUR_EPSILON_FACTOR * cv2.arcLength(largest_contour, True) # 更小的epsilon保留更多细节
|
||
smoothed_contour = cv2.approxPolyDP(largest_contour, epsilon, True)
|
||
cv2.drawContours(vis, [smoothed_contour], -1, (0, 255, 255), STROKE_WIDTH)
|
||
|
||
# 记录 id,减少目标跳变
|
||
if det["ids"] and len(det["ids"]) > chosen_idx and det["ids"][chosen_idx] is not None:
|
||
locked_id = int(det["ids"][chosen_idx])
|
||
|
||
else:
|
||
# YOLOE 未就绪:提示并保持原画面(不阻塞前端)
|
||
draw_text_cn(vis, "YOLOE 未就绪,显示原始画面", (10, 100), font_size=22, color=(0, 215, 255))
|
||
|
||
# 选择面积最大的mask ←—— 这一行下面开始保留你的原代码
|
||
|
||
# 选择面积最大的mask
|
||
if candidate_masks:
|
||
# 按面积降序排序
|
||
candidate_masks.sort(key=lambda x: x['area'], reverse=True)
|
||
largest_mask_info = candidate_masks[0]
|
||
last_detected_mask = largest_mask_info['mask']
|
||
|
||
# 可选:在最大的物体上添加特殊标记
|
||
contours, _ = cv2.findContours(last_detected_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
# 找到最大轮廓的中心
|
||
M = cv2.moments(contours[0])
|
||
if M["m00"] != 0:
|
||
cx = int(M["m10"] / M["m00"])
|
||
cy = int(M["m01"] / M["m00"])
|
||
# 在最大物体中心画一个圆圈标记
|
||
cv2.circle(vis, (cx, cy), 8, (0, 255, 0), 2)
|
||
cv2.circle(vis, (cx, cy), 12, (0, 255, 0), 1)
|
||
# 目标标签:保持就地标注
|
||
draw_text_cn(vis, "目标", (cx + 15, cy - 5), font_size=16, color=FRONTEND_COLORS["ok"], ui_hint=False)
|
||
|
||
# 显示检测信息
|
||
if len(candidate_masks) > 1:
|
||
draw_text_cn(vis, f"检测到{len(candidate_masks)}个物体,选择最大的(面积: {largest_mask_info['area']})",
|
||
(10, H - 30), font_size=16, color=(255, 255, 0))
|
||
|
||
# 自动锁定逻辑
|
||
if detected_object and last_detected_mask is not None:
|
||
if auto_lock_start_time is None:
|
||
auto_lock_start_time = t_now
|
||
print(f"[AUTO] 检测到物体,选择最大的(面积: {np.sum(last_detected_mask)}),开始倒计时...")
|
||
#play_guidance_audio("检测到物体") # 添加这行
|
||
|
||
elapsed = t_now - auto_lock_start_time
|
||
remaining = auto_lock_delay - elapsed
|
||
|
||
if remaining > 0:
|
||
# 显示倒计时(移动到左下角,前端风格)
|
||
draw_text_cn(vis, f"检测到物体,{remaining:.1f}秒后自动锁定", (10, 100), font_size=16, color=FRONTEND_COLORS["text"], stroke=(0,0,0))
|
||
|
||
# 绘制锁定框 - 使用虚线框表示正在准备锁定
|
||
if last_detected_mask is not None:
|
||
contours, _ = cv2.findContours(last_detected_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
# 找到最大轮廓
|
||
largest_contour = max(contours, key=cv2.contourArea)
|
||
# 简化轮廓
|
||
epsilon = CONTOUR_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
|
||
smoothed_contour = cv2.approxPolyDP(largest_contour, epsilon, True)
|
||
|
||
# 根据倒计时进度改变颜色亮度
|
||
progress = 1.0 - (remaining / auto_lock_delay)
|
||
color_intensity = int(100 + 155 * progress) # 从100到255
|
||
lock_color = (0, color_intensity, color_intensity) # 黄色渐亮
|
||
|
||
# 绘制虚线轮廓
|
||
pts = smoothed_contour.reshape(-1, 2)
|
||
for i in range(len(pts)):
|
||
pt1 = tuple(pts[i])
|
||
pt2 = tuple(pts[(i + 1) % len(pts)])
|
||
# 使用虚线效果(通过绘制短线段)
|
||
draw_dashed_line(vis, pt1, pt2, color=lock_color, thickness=3,
|
||
dash_length=15, gap_length=8)
|
||
else:
|
||
# 进入闪烁模式
|
||
print("[AUTO] 进入闪烁动画模式")
|
||
MODE = "FLASH"
|
||
flash_start_time = t_now
|
||
flash_mask = last_detected_mask.copy()
|
||
auto_lock_start_time = None
|
||
play_guidance_audio("检测到物体")
|
||
else:
|
||
# 没有检测到物体,重置计时器
|
||
if auto_lock_start_time is not None:
|
||
print("[AUTO] 物体丢失,重置倒计时")
|
||
auto_lock_start_time = None
|
||
last_detected_mask = None
|
||
draw_text_cn(vis, "分割中... 等待检测到物体", (10, 100), font_size=16, color=FRONTEND_COLORS["muted"])
|
||
|
||
elif MODE == "FLASH":
|
||
# 闪烁动画模式
|
||
if flash_start_time is not None and flash_mask is not None:
|
||
elapsed = t_now - flash_start_time
|
||
|
||
if elapsed < flash_duration:
|
||
# 计算渐入渐出效果
|
||
# 前0.3秒渐入,中间0.4秒保持,后0.3秒渐出
|
||
if elapsed < 0.3:
|
||
# 渐入阶段
|
||
alpha = elapsed / 0.3 * 0.8 # 0到0.8
|
||
elif elapsed < 0.7:
|
||
# 保持阶段
|
||
alpha = 0.8
|
||
else:
|
||
# 渐出阶段
|
||
alpha = (1.0 - elapsed) / 0.3 * 0.8 # 0.8到0
|
||
|
||
# 绘制闪烁的mask
|
||
colored = np.zeros_like(frame, dtype=np.uint8)
|
||
colored[flash_mask == 1] = flash_color
|
||
vis = cv2.addWeighted(vis, 1.0 - alpha, colored, alpha, 0)
|
||
|
||
# 绘制轮廓(固定粗细,颜色渐变)
|
||
contours, _ = cv2.findContours(flash_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
# 轮廓颜色也跟随alpha变化
|
||
contour_color = tuple(int(c * (0.5 + alpha * 0.5)) for c in flash_color)
|
||
cv2.drawContours(vis, contours, -1, contour_color, STROKE_WIDTH + 1)
|
||
|
||
# 显示提示文字(左下角)
|
||
draw_text_cn(vis, "正在锁定目标...", (10, 100), font_size=18, color=FRONTEND_COLORS["accent"])
|
||
else:
|
||
# 闪烁结束,初始化光流追踪并进入居中引导模式
|
||
print("[AUTO] 闪烁结束,初始化光流追踪")
|
||
edge_mask = inner_offset_edge(flash_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||
pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
|
||
|
||
if pts is not None and len(pts) >= 8:
|
||
p0 = pts
|
||
old_gray = gray
|
||
MODE = "CENTER_GUIDE"
|
||
lock_edge_debug = edge_mask.copy()
|
||
track_frame_count = 0
|
||
center_guide_start = t_now
|
||
center_reached = False
|
||
flash_start_time = None
|
||
flash_mask = None
|
||
last_detected_mask = None
|
||
print(f"[LOCK] 内边界特征点数={len(p0)} → CENTER_GUIDE")
|
||
else:
|
||
print("[LOCK] 内边界特征点不足,返回检测模式")
|
||
MODE = "SEGMENT"
|
||
flash_start_time = None
|
||
flash_mask = None
|
||
last_detected_mask = None
|
||
|
||
elif MODE == "CENTER_GUIDE":
|
||
# 居中引导模式(使用光流追踪)
|
||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||
poly_center = None
|
||
poly_area = 0.0
|
||
|
||
if old_gray is not None and p0 is not None and len(p0) >= 5:
|
||
# 光流追踪
|
||
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, gray, p0, None, **LK_PARAMS)
|
||
if p1 is not None and st is not None:
|
||
good_new = p1[st == 1]
|
||
if len(good_new) >= 5:
|
||
p0 = good_new.reshape(-1, 1, 2)
|
||
hull = cv2.convexHull(good_new.reshape(-1,1,2))
|
||
poly = hull.reshape(-1, 2)
|
||
|
||
if len(poly) >= 3:
|
||
H, W = frame.shape[:2]
|
||
|
||
# 把当前光流多边形 rasterize 成掩膜(便于与 YOLOE 掩膜做 IoU)
|
||
poly_mask = np.zeros((H, W), dtype=np.uint8)
|
||
cv2.fillPoly(poly_mask, [poly.astype(np.int32)], 1)
|
||
|
||
# 降频:每3帧用 YOLOE 重新检测,其余帧依赖光流维持
|
||
need_reseed = False
|
||
new_det_mask = None
|
||
|
||
if use_yoloe and yoloe_backend is not None and (FRAME_IDX % 3 == 0):
|
||
# 添加调试信息
|
||
if FRAME_IDX % 30 == 0: # 每30帧打印一次
|
||
print(f"[YOLOE] 实时检测第 {FRAME_IDX} 帧")
|
||
det = yoloe_backend.segment(frame, conf=0.20, iou=0.45, persist=True)
|
||
if det["masks"]:
|
||
# 取面积最大的那个
|
||
areas = [int(m.sum()) for m in det["masks"]]
|
||
j = int(np.argmax(areas))
|
||
m = det["masks"][j]
|
||
if m.shape[:2] != (H, W):
|
||
m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
|
||
new_det_mask = (m > 0).astype(np.uint8)
|
||
|
||
# 和当前光流多边形的 IoU
|
||
inter = np.logical_and(new_det_mask, poly_mask).sum()
|
||
union = np.logical_or(new_det_mask, poly_mask).sum() + 1e-6
|
||
iou = inter / union
|
||
|
||
# IoU 太低,说明漂了:用 YOLOE 的掩膜重播种光流
|
||
# 降低阈值,让 YOLOE 更容易更新光流
|
||
if iou < 0.5: # 从 IOU_MIN_KEEP (0.20) 提高到 0.5
|
||
need_reseed = True
|
||
# 用新掩膜的「内边界特征点」播种
|
||
edge_mask = inner_offset_edge(new_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
gray2 = gray # 本帧灰度图已在上面算过
|
||
pts = cv2.goodFeaturesToTrack(gray2, mask=edge_mask, **FEATURE_PARAMS)
|
||
if pts is not None and len(pts) >= 8:
|
||
p0 = pts
|
||
old_gray = gray2
|
||
# 更新 last_mask,便于下游逻辑一致
|
||
last_mask = new_det_mask.copy()
|
||
last_seen_ts = time.time()
|
||
flow_grace = 0
|
||
print("[RESEED] YOLOE 低 IoU 触发重播种(已更新光流特征点)")
|
||
|
||
# 如果这帧没重播种,但 YOLOE 有结果且与 poly 很接近,可以做一次"平滑融合",抑制抖动
|
||
if (not need_reseed) and (new_det_mask is not None):
|
||
inter = np.logical_and(new_det_mask, poly_mask).sum()
|
||
union = np.logical_or(new_det_mask, poly_mask).sum() + 1e-6
|
||
iou = inter / union
|
||
# 降低融合阈值,让 YOLOE 结果更容易被采用
|
||
if iou < 0.95: # 从 0.90 提高到 0.95
|
||
# 增加 YOLOE 的权重,让实时检测更明显
|
||
poly_mask = ((0.8 * new_det_mask + 0.2 * poly_mask) > 0.5).astype(np.uint8)
|
||
# 用更新后的 poly_mask 回写到可视化与引导的后续变量(如果你下游用的是 last_detected_mask/last_mask)
|
||
last_mask = poly_mask.copy()
|
||
# 更新多边形轮廓,让可视化实时更新
|
||
contours, _ = cv2.findContours(poly_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
# 找到最大轮廓
|
||
largest_contour = max(contours, key=cv2.contourArea)
|
||
# 使用精细的轮廓处理,保留更多细节
|
||
epsilon = TRACK_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
|
||
poly = cv2.approxPolyDP(largest_contour, epsilon, True).reshape(-1, 2)
|
||
# 注释掉凸包处理,保留原始轮廓细节
|
||
# hull = cv2.convexHull(poly.reshape(-1,1,2))
|
||
# poly = hull.reshape(-1, 2)
|
||
# 重新计算特征点
|
||
edge_mask = inner_offset_edge(poly_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
|
||
if pts is not None and len(pts) >= 5:
|
||
p0 = pts
|
||
|
||
# 绘制追踪的多边形 - 使用更粗的线条
|
||
cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True, color=(0,255,255), thickness=STROKE_WIDTH)
|
||
|
||
# 计算多边形中心
|
||
poly_center, poly_area = polygon_center_and_area(poly)
|
||
|
||
if poly_center:
|
||
object_center = (int(poly_center[0]), int(poly_center[1]))
|
||
|
||
# 画面中心
|
||
frame_center = (W // 2, H // 2)
|
||
|
||
# 绘制物品中心点
|
||
cv2.circle(vis, object_center, 8, (0, 255, 0), -1)
|
||
cv2.circle(vis, object_center, 12, (0, 255, 0), 2)
|
||
|
||
# 绘制画面中心十字
|
||
cv2.line(vis, (frame_center[0] - 20, frame_center[1]),
|
||
(frame_center[0] + 20, frame_center[1]), (255, 255, 255), 2)
|
||
cv2.line(vis, (frame_center[0], frame_center[1] - 20),
|
||
(frame_center[0], frame_center[1] + 20), (255, 255, 255), 2)
|
||
|
||
# 绘制引导虚线
|
||
draw_dashed_line(vis, object_center, frame_center,
|
||
color=(255, 255, 0), thickness=2,
|
||
dash_length=10, gap_length=5)
|
||
|
||
# 获取引导方向
|
||
direction, is_centered = get_center_guidance(object_center, frame_center, center_threshold)
|
||
|
||
if not center_reached:
|
||
if is_centered:
|
||
# 到达中心,播放OK音效
|
||
center_reached = True
|
||
last_center_guide_time = t_now
|
||
play_guidance_audio("OK")
|
||
try:
|
||
bridge_io.send_ui_final("✓ 物品已居中!")
|
||
except Exception:
|
||
pass
|
||
draw_text_cn(vis, "✓ 物品已居中!", (10, 60), font_size=18, color=FRONTEND_COLORS["ok"])
|
||
else:
|
||
# 显示引导文字
|
||
msg = f"请将物品移到画面中心: {direction}"
|
||
try:
|
||
# 节流:每次语音播报也推一次final
|
||
if t_now - last_center_guide_time > GUIDANCE_INTERVAL_SEC:
|
||
bridge_io.send_ui_final(msg)
|
||
except Exception:
|
||
pass
|
||
draw_text_cn(vis, msg,
|
||
(10, 40), font_size=18, color=FRONTEND_COLORS["text"])
|
||
|
||
# 显示距离信息
|
||
dx = frame_center[0] - object_center[0]
|
||
dy = frame_center[1] - object_center[1]
|
||
distance = int(np.sqrt(dx**2 + dy**2))
|
||
draw_text_cn(vis, f"距离: {distance}px",
|
||
(10, 60), font_size=16, color=FRONTEND_COLORS["muted"])
|
||
|
||
# 播放语音引导
|
||
if t_now - last_center_guide_time > GUIDANCE_INTERVAL_SEC:
|
||
play_guidance_audio(direction)
|
||
last_center_guide_time = t_now
|
||
else:
|
||
# 已经居中,显示成功信息
|
||
try:
|
||
bridge_io.send_ui_final("✓ 物品已成功移到中心!")
|
||
except Exception:
|
||
pass
|
||
draw_text_cn(vis, "✓ 物品已成功移到中心!",
|
||
(10, 60), font_size=18, color=FRONTEND_COLORS["ok"])
|
||
|
||
# 等待1秒后进入手部追踪模式
|
||
if t_now - last_center_guide_time > 1.0:
|
||
print("[CENTER] 进入手部追踪模式")
|
||
try:
|
||
bridge_io.send_ui_final("进入手部追踪模式")
|
||
except Exception:
|
||
pass
|
||
MODE = "TRACK"
|
||
# 保持当前的光流追踪状态
|
||
else:
|
||
# 多边形中心计算失败,显示警告
|
||
draw_text_cn(vis, "正在追踪物体...", (10, 100), font_size=20, color=(255, 255, 0))
|
||
else:
|
||
# 光流点数不足,尝试重新检测
|
||
MODE = "SEGMENT"
|
||
old_gray = None
|
||
p0 = None
|
||
print("[CENTER] 光流追踪失败,返回检测模式")
|
||
|
||
old_gray = gray
|
||
|
||
else: # MODE == "TRACK"
|
||
# 手部追踪模式(原有逻辑保持不变)
|
||
align_score = 0.0
|
||
range_score = 0.0
|
||
ratio = None
|
||
|
||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||
track_frame_count += 1
|
||
|
||
relock_done = False
|
||
poly_center = None
|
||
poly_area = 0.0
|
||
|
||
# 初始化camera_movement为默认值
|
||
camera_movement = np.array([0.0, 0.0])
|
||
|
||
# 初始化或更新背景参考点(在物体多边形外部取点)
|
||
if background_points is None or track_frame_count % 30 == 0:
|
||
# 在画面四角取一些背景特征点
|
||
mask_for_bg = np.ones((H, W), dtype=np.uint8) * 255
|
||
if last_poly_box:
|
||
x, y, w, h = last_poly_box
|
||
# 扩大区域,排除物体和手
|
||
expand = 100
|
||
x1 = max(0, x - expand)
|
||
y1 = max(0, y - expand)
|
||
x2 = min(W, x + w + expand)
|
||
y2 = min(H, y + h + expand)
|
||
mask_for_bg[y1:y2, x1:x2] = 0
|
||
|
||
# 在背景区域提取特征点
|
||
try:
|
||
bg_pts = cv2.goodFeaturesToTrack(gray, maxCorners=20,
|
||
qualityLevel=0.1,
|
||
minDistance=30,
|
||
mask=mask_for_bg)
|
||
if bg_pts is not None and len(bg_pts) >= 5:
|
||
background_points = bg_pts
|
||
old_background_gray = gray.copy()
|
||
except Exception as e:
|
||
#print(f"[TRACK] 背景特征点提取失败: {e}")
|
||
background_points = None
|
||
|
||
# 计算背景移动(相机移动)
|
||
if old_background_gray is not None and background_points is not None and len(background_points) > 0:
|
||
try:
|
||
bg_p1, bg_st, _ = cv2.calcOpticalFlowPyrLK(
|
||
old_background_gray, gray, background_points, None, **LK_PARAMS
|
||
)
|
||
if bg_p1 is not None and bg_st is not None:
|
||
good_bg_old = background_points[bg_st == 1]
|
||
good_bg_new = bg_p1[bg_st == 1]
|
||
if len(good_bg_new) >= 3 and len(good_bg_old) >= 3:
|
||
# 计算背景的平均移动
|
||
bg_movement = np.mean(good_bg_new - good_bg_old, axis=0)
|
||
camera_movement = bg_movement.reshape(2)
|
||
background_points = good_bg_new.reshape(-1, 1, 2)
|
||
old_background_gray = gray.copy()
|
||
except Exception as e:
|
||
print(f"[TRACK] 背景光流计算失败: {e}")
|
||
camera_movement = np.array([0.0, 0.0])
|
||
|
||
if old_gray is not None and p0 is not None and len(p0) >= 5:
|
||
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, gray, p0, None, **LK_PARAMS)
|
||
if p1 is not None and st is not None:
|
||
good_new = p1[st == 1]
|
||
if len(good_new) >= 5:
|
||
p0 = good_new.reshape(-1, 1, 2)
|
||
hull = cv2.convexHull(good_new.reshape(-1,1,2))
|
||
poly = hull.reshape(-1, 2)
|
||
|
||
if len(poly) >= 3:
|
||
# 统一的 YOLOE 实时检测和校正(每帧)
|
||
latest_det_mask = None
|
||
if use_yoloe and yoloe_backend is not None:
|
||
# 添加调试信息
|
||
if track_frame_count % 30 == 0: # 每30帧打印一次
|
||
print(f"[YOLOE] TRACK模式实时检测第 {track_frame_count} 帧")
|
||
|
||
# YOLOE 实时检测(统一调用,避免重复)
|
||
det = yoloe_backend.segment(frame, conf=YOLO_CORRECTION_CONF_THRESHOLD, iou=0.45, persist=True)
|
||
if det["masks"]:
|
||
# 取面积最大的那个
|
||
areas = [int(m.sum()) for m in det["masks"]]
|
||
j = int(np.argmax(areas))
|
||
m = det["masks"][j]
|
||
if m.shape[:2] != (H, W):
|
||
m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
|
||
latest_det_mask = (m > 0).astype(np.uint8)
|
||
|
||
# 和当前光流多边形的 IoU
|
||
poly_mask = np.zeros((H, W), dtype=np.uint8)
|
||
cv2.fillPoly(poly_mask, [poly.astype(np.int32)], 1)
|
||
inter = np.logical_and(latest_det_mask, poly_mask).sum()
|
||
union = np.logical_or(latest_det_mask, poly_mask).sum() + 1e-6
|
||
iou = inter / union
|
||
|
||
# 降低IoU阈值,更积极地校正
|
||
if iou > YOLO_CORRECTION_IOU_THRESHOLD: # 使用可配置阈值
|
||
# 用 YOLOE 结果更新多边形
|
||
contours, _ = cv2.findContours(latest_det_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
||
if contours:
|
||
largest_contour = max(contours, key=cv2.contourArea)
|
||
# 使用更精细的轮廓处理,减少过度简化
|
||
epsilon = TRACK_EPSILON_FACTOR * cv2.arcLength(largest_contour, True)
|
||
poly = cv2.approxPolyDP(largest_contour, epsilon, True).reshape(-1, 2)
|
||
|
||
# 更新光流特征点
|
||
edge_mask = inner_offset_edge(latest_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
|
||
if pts is not None and len(pts) >= 5:
|
||
p0 = pts
|
||
#print(f"[TRACK] YOLOE 实时校正,IoU: {iou:.3f}")
|
||
|
||
# 检查是否接触,决定轮廓颜色
|
||
is_touching = False
|
||
overlap_ratio = 0.0
|
||
if hand_box is not None and poly is not None:
|
||
is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
|
||
|
||
# 绘制多边形(可能已被 YOLOE 更新)- 使用更粗的线条
|
||
if is_touching:
|
||
# 接触时用亮绿色,并添加发光效果
|
||
poly_color = (0, 255, 127)
|
||
# 绘制一个更粗的外层轮廓作为发光效果
|
||
cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True,
|
||
color=(127, 255, 127), thickness=STROKE_WIDTH + 4)
|
||
# 添加半透明的填充效果
|
||
overlay = vis.copy()
|
||
cv2.fillPoly(overlay, [poly.astype(np.int32)], (0, 255, 0))
|
||
cv2.addWeighted(overlay, 0.15, vis, 0.85, 0, vis)
|
||
else:
|
||
# 未接触时用普通绿色
|
||
poly_color = (0, 255, 0)
|
||
cv2.polylines(vis, [poly.astype(np.int32)], isClosed=True, color=poly_color, thickness=STROKE_WIDTH)
|
||
# 多边形质心与面积
|
||
poly_center, poly_area = polygon_center_and_area(poly)
|
||
if poly_center:
|
||
pc = (int(poly_center[0]), int(poly_center[1]))
|
||
cv2.circle(vis, pc, 6, (0,255,0), -1)
|
||
|
||
# 多边形外接矩形(用于周边监控)
|
||
x, y, w, h = cv2.boundingRect(poly.astype(np.int32))
|
||
last_poly_box = (x, y, w, h)
|
||
|
||
# ====== 对齐分数(第一条)======
|
||
if hand_center and poly_center:
|
||
hc = np.array(hand_center, dtype=np.float32)
|
||
oc = np.array(poly_center, dtype=np.float32)
|
||
dist = float(np.linalg.norm(oc - hc))
|
||
diag = float(np.linalg.norm([W, H]))
|
||
align_score = 1.0 - min(dist/(ALIGN_LOOSE_PCT*diag + 1e-6), 1.0)
|
||
|
||
# 绘制虚线引导(替代原来的实线箭头)
|
||
draw_dashed_line(vis, (hc[0], hc[1]), (oc[0], oc[1]),
|
||
color=(255, 255, 0), thickness=2,
|
||
dash_length=15, gap_length=10)
|
||
|
||
# 方向引导
|
||
direction, secondary = get_guidance_direction(
|
||
hand_center, poly_center, hand_area, poly_area,
|
||
hand_box, poly
|
||
)
|
||
|
||
if direction and direction != "保持":
|
||
# 根据是否接触显示不同颜色
|
||
if direction == "向前":
|
||
# 手已经接触物体,用绿色显示
|
||
guide_color = (0, 255, 0) # 绿色
|
||
draw_text_cn(vis, f"引导: {direction} - 伸手抓取", (W//2 - 80, 40),
|
||
font_size=24, color=guide_color, stroke=(0, 0, 0))
|
||
else:
|
||
# 还未接触,用黄色显示
|
||
guide_color = (0, 255, 255) # 黄色
|
||
draw_text_cn(vis, f"引导: {direction}", (W//2 - 60, 40),
|
||
font_size=24, color=guide_color, stroke=(0, 0, 0))
|
||
|
||
# 显示次要信息(接触度或其他方向)
|
||
if secondary:
|
||
if isinstance(secondary, str):
|
||
# 接触度信息
|
||
draw_text_cn(vis, secondary, (W//2 - 60, 70),
|
||
font_size=18, color=(0, 255, 0))
|
||
else:
|
||
# 其他方向信息
|
||
draw_text_cn(vis, f"(或 {secondary})", (W//2 - 60, 70),
|
||
font_size=18, color=(200, 200, 200))
|
||
|
||
# 播放语音引导 - 确保每个方向都会播放
|
||
if t_now - last_guidance_time > GUIDANCE_INTERVAL_SEC:
|
||
# 检查方向是否改变,或者时间间隔足够
|
||
if direction != last_guidance_direction or t_now - last_guidance_time > GUIDANCE_INTERVAL_SEC * 2:
|
||
play_guidance_audio(direction)
|
||
last_guidance_direction = direction
|
||
last_guidance_time = t_now
|
||
print(f"[GUIDE] 播放引导音频: {direction}")
|
||
else:
|
||
align_score = 0.0
|
||
|
||
# 显示接触状态
|
||
is_touching, overlap_ratio = check_hand_object_contact(hand_box, poly, overlap_threshold=0.1)
|
||
if is_touching:
|
||
draw_text_cn(vis, f"状态: 已接触 ({overlap_ratio:.1%})", (10, 95),
|
||
font_size=16, color=(0, 255, 0))
|
||
else:
|
||
# 计算手和物体的距离
|
||
if hand_center and poly_center:
|
||
distance = np.sqrt((hand_center[0] - poly_center[0])**2 +
|
||
(hand_center[1] - poly_center[1])**2)
|
||
draw_text_cn(vis, f"距离: {distance:.0f}px", (10, 95),
|
||
font_size=16, color=FRONTEND_COLORS["muted"])
|
||
|
||
# 成功条件:握持(放宽)
|
||
if (_last_result and _last_result[0].hand_landmarks and len(_last_result[0].hand_landmarks) > 0):
|
||
l0 = _last_result[0].hand_landmarks[0]
|
||
grasp_now, grasp_score = detect_grasp(l0, W, H)
|
||
else:
|
||
grasp_now, grasp_score = False, 0.0
|
||
|
||
# guidance_msg 相关代码已经集成到上面的引导逻辑中
|
||
|
||
# ===== 周边监控 & 重新锁定(复用YOLO结果)=====
|
||
if (track_frame_count % PERI_CHECK_EVERY == 0) and (last_poly_box is not None) and (latest_det_mask is not None):
|
||
# 直接使用刚才的YOLO检测结果,避免重复调用
|
||
px, py, pw, ph = last_poly_box
|
||
x0 = max(0, px - PERI_MONITOR_PX)
|
||
y0 = max(0, py - PERI_MONITOR_PX)
|
||
x1 = min(W - 1, px + pw + PERI_MONITOR_PX)
|
||
y1 = min(H - 1, py + ph + PERI_MONITOR_PX)
|
||
|
||
# 检查周边区域是否有更好的检测结果
|
||
peri_area = latest_det_mask[y0:y1, x0:x1].sum()
|
||
total_area = latest_det_mask.sum()
|
||
|
||
# 如果周边区域有显著检测结果,重新锁定
|
||
if peri_area > total_area * 0.1: # 周边有10%以上的检测面积
|
||
edge_mask = inner_offset_edge(latest_det_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
|
||
if pts is not None and len(pts) >= 8:
|
||
p0 = pts
|
||
old_gray = gray
|
||
lock_edge_debug = edge_mask.copy()
|
||
#print(f"[PERI] 周边重锁定,特征点数={len(p0)}")
|
||
else:
|
||
MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
|
||
else:
|
||
MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
|
||
else:
|
||
MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
|
||
else:
|
||
MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
|
||
|
||
|
||
|
||
if MODE == "SEGMENT":
|
||
draw_text_cn(vis, "追踪丢失 → 正在重新识别。按 Enter 重新锁定", (10, 100), font_size=22, color=(0,0,255))
|
||
|
||
old_gray = gray
|
||
|
||
# FPS(移动到左下角样式)
|
||
if 'fps_hist' not in locals():
|
||
fps_hist = []
|
||
fps_hist.append(t_now)
|
||
if len(fps_hist) > 30:
|
||
fps_hist.pop(0)
|
||
fps = 0.0 if len(fps_hist) < 2 else (len(fps_hist)-1)/(fps_hist[-1]-fps_hist[0])
|
||
draw_text_cn(vis, f"FPS: {fps:.1f}", (10, 40), font_size=16, color=FRONTEND_COLORS["ok"])
|
||
|
||
# 右下角显示"内边界/最近一次锁定"的调试图
|
||
if lock_edge_debug is not None:
|
||
# 极小缩放并放在右下角
|
||
small = cv2.resize(lock_edge_debug, (0,0), fx=0.22, fy=0.22, interpolation=cv2.INTER_NEAREST)
|
||
sh, sw = small.shape[:2]
|
||
small_bgr = cv2.cvtColor(small, cv2.COLOR_GRAY2BGR)
|
||
# 右下角位置,留 10-12px 边距
|
||
x1 = max(8, W - sw - 12)
|
||
y1 = max(8, H - sh - 12)
|
||
y2 = y1 + sh
|
||
x2 = x1 + sw
|
||
vis[y1:y2, x1:x2] = small_bgr
|
||
# 标签置于图上方紧贴,使用更小字号
|
||
#draw_text_cn(vis, "内边界", (x1, y1 - 8), font_size=12, color=FRONTEND_COLORS["muted"], ui_hint=False)
|
||
|
||
# 底部中间的"当前指令"按钮(始终绘制,文案随音频同步)
|
||
draw_command_pill(vis, CURRENT_COMMAND_TEXT)
|
||
|
||
# 展示(无论 headless 与否,都会推给前端)
|
||
bridge_io.send_vis_bgr(vis)
|
||
|
||
# [headless] 只有非 headless 时才弹窗与键盘交互;headless 下用 waitKey(1) 让出调度
|
||
if not headless:
|
||
cv2.imshow(WINDOW, vis)
|
||
key = cv2.waitKey(1) & 0xFF
|
||
if key in (27, ord('q')):
|
||
break
|
||
elif key == ord('r'):
|
||
MODE = "SEGMENT"; old_gray = None; p0 = None; lock_edge_debug = None
|
||
elif key == 13: # Enter:从 SEGMENT 锁定并开始 TRACK(内收 5px)
|
||
if MODE == "SEGMENT":
|
||
# 使用 YOLOE 进行手动锁定
|
||
if use_yoloe and yoloe_backend is not None:
|
||
det = yoloe_backend.segment(frame, conf=CONF_THRESHOLD, iou=0.45, persist=True)
|
||
if det["masks"]:
|
||
# 取面积最大的那个
|
||
areas = [int(m.sum()) for m in det["masks"]]
|
||
j = int(np.argmax(areas))
|
||
m = det["masks"][j]
|
||
if m.shape[:2] != (H, W):
|
||
m = cv2.resize(m, (W, H), interpolation=cv2.INTER_NEAREST)
|
||
best_mask = (m > 0.5).astype(np.uint8)
|
||
else:
|
||
best_mask = None
|
||
else:
|
||
best_mask = None
|
||
if best_mask is not None:
|
||
edge_mask = inner_offset_edge(best_mask, offset_px=INNER_OFFSET_PX_LOCK, edge_dilate_px=EDGE_DILATE_PX)
|
||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||
pts = cv2.goodFeaturesToTrack(gray, mask=edge_mask, **FEATURE_PARAMS)
|
||
if pts is not None and len(pts) >= 8:
|
||
p0 = pts
|
||
old_gray = gray
|
||
MODE = "TRACK"
|
||
lock_edge_debug = edge_mask.copy()
|
||
track_frame_count = 0
|
||
print(f"[LOCK] 内边界特征点数={len(p0)} → TRACK")
|
||
else:
|
||
print("[LOCK] 内边界特征点不足,请调整画面后重试。")
|
||
else:
|
||
print("[LOCK] 当前帧未找到有效分割,请重试。")
|
||
else:
|
||
# headless 下也调用一次 waitKey(1),让 OpenCV 的计时器/回调得到机会,且避免 CPU 忙等
|
||
cv2.waitKey(1)
|
||
|
||
# 在 headless 模式下检查停止事件
|
||
if stop_event and stop_event.is_set():
|
||
print("[YOLOMEDIA] Received stop signal in headless mode")
|
||
break
|
||
|
||
finally:
|
||
try:
|
||
landmarker.close()
|
||
except Exception:
|
||
pass
|
||
#cap.release()
|
||
# [headless] 仅在非 headless 时销毁窗口
|
||
if not headless:
|
||
cv2.destroyAllWindows()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|