Files
NaviGlassServer/qwen_extractor.py
2025-12-31 15:42:30 +08:00

65 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# qwen_extractor.py
# -*- coding: utf-8 -*-
from typing import List, Tuple
import os
from openai import OpenAI
# —— 本地优先映射(可随时扩充/改名)——
LOCAL_CN2EN = {
"红牛": "Red_Bull",
"ad钙奶": "AD_milk",
"ad 钙奶": "AD_milk",
"ad": "AD_milk",
"钙奶": "AD_milk",
"矿泉水": "bottle",
"水瓶": "bottle",
"可乐": "coke",
"雪碧": "sprite",
}
def _make_client() -> OpenAI:
# 复用你百炼兼容端点;支持从环境变量读取
base_url = os.getenv("DASHSCOPE_COMPAT_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
api_key = "sk-a9440db694924559ae4ebdc2023d2b9a"
return OpenAI(api_key=api_key, base_url=base_url)
PROMPT_SYS = (
"You are a label normalizer. Convert the given Chinese object "
"description into a short, lowercase English YOLO/vision class name "
"(1~3 words). If multiple are given, return the single most likely one. "
"Output ONLY the label, no punctuation."
)
def extract_english_label(query_cn: str) -> Tuple[str, str]:
"""
返回 (label_en, source)source ∈ {'local', 'qwen', 'fallback'}
"""
q = (query_cn or "").strip().lower()
if q in LOCAL_CN2EN:
return LOCAL_CN2EN[q], "local"
# 简单规则:去掉前缀修饰词
for k, v in LOCAL_CN2EN.items():
if k in q:
return v, "local"
# 调用 Qwen Turbo兼容 Chat Completions
try:
client = _make_client()
msgs = [
{"role": "system", "content": PROMPT_SYS},
{"role": "user", "content": query_cn.strip()},
]
rsp = client.chat.completions.create(
model=os.getenv("QWEN_MODEL", "qwen-turbo"),
messages=msgs,
stream=False
)
label = (rsp.choices[0].message.content or "").strip()
# 清洗一下
label = label.replace(".", "").replace(",", "").replace(" ", " ").strip()
# 兜底:空就回 'bottle'
return (label or "bottle"), "qwen"
except Exception:
return "bottle", "fallback"