65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
# qwen_extractor.py
|
||
# -*- coding: utf-8 -*-
|
||
from typing import List, Tuple
|
||
import os
|
||
from openai import OpenAI
|
||
|
||
# —— 本地优先映射(可随时扩充/改名)——
|
||
LOCAL_CN2EN = {
|
||
"红牛": "Red_Bull",
|
||
"ad钙奶": "AD_milk",
|
||
"ad 钙奶": "AD_milk",
|
||
"ad": "AD_milk",
|
||
"钙奶": "AD_milk",
|
||
"矿泉水": "bottle",
|
||
"水瓶": "bottle",
|
||
"可乐": "coke",
|
||
"雪碧": "sprite",
|
||
}
|
||
|
||
def _make_client() -> OpenAI:
|
||
# 复用你百炼兼容端点;支持从环境变量读取
|
||
base_url = os.getenv("DASHSCOPE_COMPAT_BASE", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||
api_key = "sk-a9440db694924559ae4ebdc2023d2b9a"
|
||
return OpenAI(api_key=api_key, base_url=base_url)
|
||
|
||
PROMPT_SYS = (
|
||
"You are a label normalizer. Convert the given Chinese object "
|
||
"description into a short, lowercase English YOLO/vision class name "
|
||
"(1~3 words). If multiple are given, return the single most likely one. "
|
||
"Output ONLY the label, no punctuation."
|
||
)
|
||
|
||
def extract_english_label(query_cn: str) -> Tuple[str, str]:
|
||
"""
|
||
返回 (label_en, source);source ∈ {'local', 'qwen', 'fallback'}
|
||
"""
|
||
q = (query_cn or "").strip().lower()
|
||
if q in LOCAL_CN2EN:
|
||
return LOCAL_CN2EN[q], "local"
|
||
|
||
# 简单规则:去掉前缀修饰词
|
||
for k, v in LOCAL_CN2EN.items():
|
||
if k in q:
|
||
return v, "local"
|
||
|
||
# 调用 Qwen Turbo(兼容 Chat Completions)
|
||
try:
|
||
client = _make_client()
|
||
msgs = [
|
||
{"role": "system", "content": PROMPT_SYS},
|
||
{"role": "user", "content": query_cn.strip()},
|
||
]
|
||
rsp = client.chat.completions.create(
|
||
model=os.getenv("QWEN_MODEL", "qwen-turbo"),
|
||
messages=msgs,
|
||
stream=False
|
||
)
|
||
label = (rsp.choices[0].message.content or "").strip()
|
||
# 清洗一下
|
||
label = label.replace(".", "").replace(",", "").replace(" ", " ").strip()
|
||
# 兜底:空就回 'bottle'
|
||
return (label or "bottle"), "qwen"
|
||
except Exception:
|
||
return "bottle", "fallback"
|