Files
NaviGlassServer/glm_client.py
2026-01-05 09:08:40 +08:00

229 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# glm_client.py
# -*- coding: utf-8 -*-
"""
GLM-4.6v-Flash LLM 客户端 - Day 22
使用官方 zai-sdk + glm-4.6v-flash 模型
"""
import os
import asyncio
from datetime import datetime
from typing import AsyncGenerator, Optional
from zai import ZhipuAiClient
# API 配置
API_KEY = os.getenv("GLM_API_KEY")
if not API_KEY:
raise RuntimeError("未设置 GLM_API_KEY 环境变量,请在 .env 中配置")
MODEL = "glm-4.6v-flash" # 升级到 glm-4.6v-flash (支持视觉)
# 星期映射
WEEKDAY_MAP = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
def get_system_prompt() -> str:
"""动态生成 system prompt包含当前时间信息"""
now = datetime.now()
current_time = now.strftime("%H:%M")
current_date = now.strftime("%Y年%m月%d")
current_weekday = WEEKDAY_MAP[now.weekday()]
return f"""你是一个视障辅助AI助手安装在智能导盲眼镜上。
当前时间:{current_time}
今天日期:{current_date} {current_weekday}
请用极简短的语言回答每次回答不超过2-3句话。
避免冗长解释,只提供最关键的信息。
语气友好但简洁。"""
# 客户端和对话历史
_client = None
_conversation_history = []
MAX_HISTORY_TURNS = 5 # 保留最近5轮对话
def _get_client() -> ZhipuAiClient:
"""获取智谱 AI 客户端"""
global _client
if _client is None:
_client = ZhipuAiClient(api_key=API_KEY)
return _client
def clear_conversation_history():
"""清除对话历史"""
global _conversation_history
_conversation_history = []
print("[GLM] 对话历史已清除")
async def chat(user_message: str, image_base64: Optional[str] = None) -> str:
"""
与 GLM-4.6v-Flash 对话(带上下文记忆)
Args:
user_message: 用户消息文本
image_base64: 可选Base64 编码的图片
Returns:
AI 回复文本
"""
global _conversation_history
client = _get_client()
# 构建用户消息
if image_base64:
# 多模态消息(带图片)
user_content = [
{"type": "text", "text": user_message},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
]
else:
user_content = user_message
# 添加用户消息到历史
_conversation_history.append({"role": "user", "content": user_content})
# 限制历史长度(每轮 = 1用户 + 1助手 = 2条消息
max_messages = MAX_HISTORY_TURNS * 2
if len(_conversation_history) > max_messages:
_conversation_history = _conversation_history[-max_messages:]
# 构建完整消息列表(每次动态生成包含当前时间的 system prompt
messages = [{"role": "system", "content": get_system_prompt()}] + _conversation_history
# Day 22: 添加重试逻辑处理速率限制
max_retries = 3
retry_delay = 1 # 初始延迟1秒
for attempt in range(max_retries):
try:
# Day 22: 升级到 glm-4.6v-flash
# 【修正】根据官方文档thinking 参数也是必须的 (即使是 Vision 模型)
response = await asyncio.to_thread(
client.chat.completions.create,
model=MODEL,
messages=messages,
thinking={"type": "disabled"}, # 显式禁用思考以降低延迟
)
if response.choices and len(response.choices) > 0:
ai_reply = response.choices[0].message.content.strip()
# 添加助手回复到历史
_conversation_history.append({"role": "assistant", "content": ai_reply})
print(f"[GLM] 回复: {ai_reply[:50]}..." if len(ai_reply) > 50 else f"[GLM] 回复: {ai_reply}")
return ai_reply
return ""
except Exception as e:
error_str = str(e)
# 检查是否是速率限制错误429 或 1305
if "429" in error_str or "1305" in error_str or "请求过多" in error_str:
if attempt < max_retries - 1:
print(f"[GLM] 速率限制,{retry_delay}秒后重试... (尝试 {attempt + 1}/{max_retries})")
await asyncio.sleep(retry_delay)
retry_delay *= 2 # 指数退避
continue
print(f"[GLM] 调用失败: {e}")
import traceback
traceback.print_exc()
break
# 所有重试失败,移除用户消息
if _conversation_history and _conversation_history[-1]["role"] == "user":
_conversation_history.pop()
return "抱歉,我暂时无法回答。"
async def chat_stream(user_message: str, image_base64: Optional[str] = None) -> AsyncGenerator[str, None]:
"""
流式对话(逐字返回)- GLM-4.6v-Flash
Args:
user_message: 用户消息文本
image_base64: 可选Base64 编码的图片
Yields:
AI 回复的文本片段
"""
global _conversation_history
client = _get_client()
# 构建用户消息
if image_base64:
user_content = [
{"type": "text", "text": user_message},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
]
else:
user_content = user_message
# 添加用户消息到历史
_conversation_history.append({"role": "user", "content": user_content})
# 限制历史长度
max_messages = MAX_HISTORY_TURNS * 2
if len(_conversation_history) > max_messages:
_conversation_history = _conversation_history[-max_messages:]
# 构建完整消息列表
messages = [{"role": "system", "content": get_system_prompt()}] + _conversation_history
full_response = ""
try:
# 流式调用
# Day 22: 升级到 glm-4.6v-flash
max_retries = 3
retry_delay = 1
response = None
for attempt in range(max_retries):
try:
# 【修正】根据官方文档thinking 参数也是必须的
response = await asyncio.to_thread(
client.chat.completions.create,
model=MODEL,
messages=messages,
thinking={"type": "disabled"},
stream=True,
)
break # 成功则跳出循环
except Exception as e:
error_str = str(e)
if attempt < max_retries - 1:
if "429" in error_str or "1305" in error_str or "请求过多" in error_str:
print(f"[GLM] (流式) 速率限制,{retry_delay}秒后重试... ({attempt + 1}/{max_retries})")
await asyncio.sleep(retry_delay)
retry_delay *= 2
continue
# 其他网络错误也可以重试
print(f"[GLM] (流式) 连接错误: {e},重试... ({attempt + 1}/{max_retries})")
await asyncio.sleep(retry_delay)
continue
else:
raise e # 最后一次尝试失败,抛出异常
for chunk in response:
if chunk.choices[0].delta.content:
text = chunk.choices[0].delta.content
full_response += text
yield text
# 添加完整回复到历史
if full_response:
_conversation_history.append({"role": "assistant", "content": full_response})
print(f"[GLM] 流式完成: {full_response[:50]}..." if len(full_response) > 50 else f"[GLM] 流式完成: {full_response}")
except Exception as e:
print(f"[GLM] 流式调用失败: {e}")
import traceback
traceback.print_exc()
# 移除刚才添加的用户消息
if _conversation_history and _conversation_history[-1]["role"] == "user":
_conversation_history.pop()
yield "抱歉,我暂时无法回答。"