Files
ViGent2/backend/app/services/qr_login_service.py
Kevin Wong ee342cc40f 更新
2026-02-08 16:23:39 +08:00

737 lines
33 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
QR码自动登录服务
后端Playwright无头模式获取二维码前端扫码后自动保存Cookie
"""
import asyncio
import time
import base64
import json
from pathlib import Path
from typing import Optional, Dict, Any, List, Sequence, Mapping, Union
from playwright.async_api import async_playwright, Page, Frame, BrowserContext, Browser, Playwright as PW
from loguru import logger
from app.core.config import settings
class QRLoginService:
"""QR码登录服务"""
# 登录监控超时 (秒)
LOGIN_TIMEOUT = 180
def __init__(self, platform: str, cookies_dir: Path) -> None:
self.platform = platform
self.cookies_dir = cookies_dir
self.qr_code_image: Optional[str] = None
self.login_success: bool = False
self.cookies_data: Optional[Dict[str, Any]] = None
# Playwright 资源 (手动管理生命周期)
self.playwright: Optional[PW] = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
# 抖音 check_qrconnect API 响应拦截
self._qr_api_confirmed: bool = False
self._qr_redirect_url: Optional[str] = None
self._douyin_needs_verify: bool = False # 需要APP验证
# 刷脸验证二维码(点击刷脸后页面展示新二维码,需要前端再次展示给用户)
self._face_verify_qr: Optional[str] = None # base64 截图
# 每个平台使用多个选择器 (使用逗号分隔Playwright会同时等待它们)
self.platform_configs = {
"bilibili": {
"url": "https://passport.bilibili.com/login",
"qr_selectors": [
"div[class*='qrcode'] canvas", # 常见canvas二维码
"div[class*='qrcode'] img", # 常见图片二维码
".qrcode-img img", # 旧版
".login-scan-box img", # 扫码框
"div[class*='scan'] img"
],
"success_indicator": "https://www.bilibili.com/"
},
"douyin": {
"url": "https://creator.douyin.com/",
"qr_selectors": [
".qrcode img", # 优先尝试
"img[alt='qrcode']",
"canvas[class*='qr']",
"img[src*='qr']"
],
"success_indicator": "https://creator.douyin.com/creator-micro"
},
"xiaohongshu": {
"url": "https://creator.xiaohongshu.com/",
"qr_selectors": [
".qrcode img",
"img[alt*='二维码']",
"canvas.qr-code",
"img[class*='qr']"
],
"success_indicator": "https://creator.xiaohongshu.com/publish"
},
"weixin": {
"url": "https://channels.weixin.qq.com/platform/",
"qr_selectors": [
"div[class*='qrcode'] img",
"img[alt*='二维码']",
"img[src*='qr']",
"canvas",
"svg",
"img[class*='qr']"
],
"success_indicator": "https://channels.weixin.qq.com/platform"
}
}
def _resolve_headless_mode(self) -> str:
# 抖音和微信使用 headful 模式xvfb 虚拟显示),避免反爬检测
# 其他平台使用 headless-new
if self.platform == "douyin":
mode = (settings.DOUYIN_HEADLESS_MODE or "").strip().lower()
return mode or "headful"
if self.platform == "weixin":
mode = (settings.WEIXIN_HEADLESS_MODE or "").strip().lower()
return mode or "headful"
return "headless-new"
def _is_square_bbox(self, bbox: Optional[Dict[str, float]], min_side: int = 100) -> bool:
if not bbox:
return False
width = bbox.get("width", 0)
height = bbox.get("height", 0)
if width < min_side or height < min_side:
return False
if height == 0:
return False
ratio = width / height
return 0.75 <= ratio <= 1.33
async def _pick_best_candidate(self, locator, min_side: int = 100):
best = None
best_area = 0
try:
count = await locator.count()
except Exception:
return None
for i in range(count):
try:
candidate = locator.nth(i)
if not await candidate.is_visible():
continue
bbox = await candidate.bounding_box()
if not self._is_square_bbox(bbox, min_side=min_side):
continue
area = bbox["width"] * bbox["height"]
if area > best_area:
best = candidate
best_area = area
except Exception:
continue
return best
async def _find_qr_in_frames(self, page: Page, selectors: List[str], min_side: int):
combined_selector = ", ".join(selectors)
for frame in page.frames:
if frame == page.main_frame:
continue
try:
locator = frame.locator(combined_selector)
candidate = await self._pick_best_candidate(locator, min_side=min_side)
if candidate:
return candidate
except Exception:
continue
return None
async def _scan_qr_candidates(self, page: Page, selectors: List[str], min_side: int):
combined_selector = ", ".join(selectors)
try:
locator = page.locator(combined_selector)
candidate = await self._pick_best_candidate(locator, min_side=min_side)
if candidate:
return candidate
except Exception:
pass
return await self._find_qr_in_frames(page, selectors, min_side=min_side)
async def _try_text_strategy_in_frames(self, page: Page):
for frame in page.frames:
if frame == page.main_frame:
continue
try:
candidate = await self._try_text_strategy(frame)
if candidate:
return candidate
except Exception:
continue
return None
async def start_login(self) -> Dict[str, Any]:
"""
启动登录流程
Returns:
dict: 包含二维码base64和状态
"""
if self.platform not in self.platform_configs:
return {"success": False, "message": "不支持的平台"}
config = self.platform_configs[self.platform]
try:
# 1. 启动 Playwright (不使用 async with手动管理生命周期)
self.playwright = await async_playwright().start()
mode = self._resolve_headless_mode()
headless = mode not in ("headful", "false", "0", "no")
launch_args = [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-dev-shm-usage',
]
if headless and mode in ("new", "headless-new", "headless_new"):
launch_args.append("--headless=new")
if not headless:
# headful 模式下 xvfb 没有 GPU需要软件渲染
launch_args.extend([
'--use-gl=swiftshader',
'--disable-gpu',
])
# Stealth模式启动浏览器
launch_options: Dict[str, Any] = {
"headless": headless,
"args": launch_args,
}
# 根据平台选择对应的浏览器配置
if self.platform == "douyin":
chrome_path = (settings.DOUYIN_CHROME_PATH or "").strip()
browser_channel = (settings.DOUYIN_BROWSER_CHANNEL or "").strip()
user_agent = settings.DOUYIN_USER_AGENT
locale = settings.DOUYIN_LOCALE
timezone_id = settings.DOUYIN_TIMEZONE_ID
elif self.platform == "weixin":
chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip()
browser_channel = (settings.WEIXIN_BROWSER_CHANNEL or "").strip()
user_agent = settings.WEIXIN_USER_AGENT
locale = settings.WEIXIN_LOCALE
timezone_id = settings.WEIXIN_TIMEZONE_ID
else:
# B站、小红书等使用通用默认值
chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip()
browser_channel = ""
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
locale = "zh-CN"
timezone_id = "Asia/Shanghai"
if chrome_path and Path(chrome_path).exists():
launch_options["executable_path"] = chrome_path
elif browser_channel:
launch_options["channel"] = browser_channel
self.browser = await self.playwright.chromium.launch(**launch_options)
# 配置真实浏览器特征
self.context = await self.browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent=user_agent,
locale=locale,
timezone_id=timezone_id
)
page = await self.context.new_page()
# 注入stealth.js
stealth_path = Path(__file__).parent / 'uploader' / 'stealth.min.js'
if stealth_path.exists():
await page.add_init_script(path=str(stealth_path))
logger.debug(f"[{self.platform}] Stealth模式已启用")
urls_to_try = [config["url"]]
if self.platform == "weixin":
urls_to_try = [
"https://channels.weixin.qq.com/platform/",
"https://channels.weixin.qq.com/",
]
qr_image = None
# 抖音:拦截 QR 登录相关 API 响应,检测登录成功
if self.platform == "douyin":
async def _on_douyin_qr_response(response):
try:
url = response.url or ""
if "check_qrconnect" not in url.lower():
return
body = None
try:
body = await response.json()
except Exception:
try:
text = await response.text()
import re as _re
m = _re.search(r'\{.*\}', text, _re.DOTALL)
if m:
body = json.loads(m.group())
except Exception:
pass
if not body:
return
data = body.get("data", {})
redirect_url = data.get("redirect_url", "")
status_val = data.get("status", "")
desc = data.get("description", body.get("description", ""))
logger.info(
f"[douyin][qr-poll] status={status_val} "
f"desc={desc[:60]} redirect={'yes' if redirect_url else 'no'}"
)
# 检测需要APP验证
if "完成验证" in desc or "验证后" in desc:
self._douyin_needs_verify = True
logger.warning("[douyin] 需要APP验证")
if self._qr_api_confirmed:
return
# 检测登录成功:出现 redirect_url
if redirect_url:
self._qr_redirect_url = redirect_url
self._qr_api_confirmed = True
logger.success(f"[douyin] 登录确认redirect_url={redirect_url[:120]}")
except Exception as e:
logger.debug(f"[douyin][qr-poll] error: {e}")
page.on("response", _on_douyin_qr_response)
for url in urls_to_try:
logger.info(f"[{self.platform}] 打开登录页: {url}")
wait_until = "domcontentloaded" if self.platform == "weixin" else "networkidle"
await page.goto(url, wait_until=wait_until)
# 等待页面加载
await asyncio.sleep(1 if self.platform == "weixin" else 2)
# 提取二维码 (并行策略)
qr_image = await self._extract_qr_code(page, config["qr_selectors"])
if qr_image:
break
if not qr_image:
await self._cleanup()
return {"success": False, "message": "未找到二维码"}
logger.info(f"[{self.platform}] 二维码已获取,等待扫码...")
# 启动后台监控任务 (浏览器保持开启)
asyncio.create_task(
self._monitor_login_status(page, config["success_indicator"])
)
return {
"success": True,
"qr_code": qr_image,
"message": "请扫码登录"
}
except Exception as e:
logger.exception(f"[{self.platform}] 启动登录失败: {e}")
await self._cleanup()
return {"success": False, "message": f"启动失败: {str(e)}"}
async def _extract_qr_code(self, page: Page, selectors: List[str]) -> Optional[str]:
"""
提取二维码图片 (优化策略顺序)
抖音CSS 优先Text 策略每次超时 15 秒)
B站Text 优先
其他CSS -> Text
"""
qr_element = None
if self.platform == "douyin":
# 抖音CSS 优先Text 备用CSS 成功率高且快)
for attempt in range(2):
if attempt > 0:
logger.info(f"[{self.platform}] 等待页面加载后重试...")
await asyncio.sleep(2)
# 策略1: CSS (快速)
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略CSS: 开始等待...")
el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
if el:
logger.info(f"[{self.platform}] 策略CSS: 匹配成功")
screenshot = await el.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] 策略CSS 失败: {e}")
# 策略2: Text (备用)
qr_element = await self._try_text_strategy(page)
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] Text策略截图失败: {e}")
elif self.platform == "bilibili":
# B站Text 优先
for attempt in range(2):
if attempt > 0:
logger.info(f"[{self.platform}] 等待页面加载后重试...")
await asyncio.sleep(2)
qr_element = await self._try_text_strategy(page)
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] Text策略截图失败: {e}")
qr_element = None
if not qr_element:
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略CSS: 开始等待...")
el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
if el:
logger.info(f"[{self.platform}] 策略CSS: 匹配成功")
screenshot = await el.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] 策略CSS 失败: {e}")
else:
# 其他平台 (小红书/微信等):保持原顺序 CSS -> Text
# 策略1: CSS 选择器
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略1(CSS): 开始等待...")
if self.platform == "weixin":
min_side = 120
start_time = time.monotonic()
while time.monotonic() - start_time < 12:
qr_element = await self._scan_qr_candidates(page, selectors, min_side=min_side)
if qr_element:
logger.info(f"[{self.platform}] 策略1(CSS): 匹配成功")
break
await asyncio.sleep(0.5)
else:
await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
locator = page.locator(combined_selector)
qr_element = await self._pick_best_candidate(locator, min_side=100)
if qr_element:
logger.info(f"[{self.platform}] 策略1(CSS): 匹配成功")
except Exception as e:
logger.warning(f"[{self.platform}] 策略1(CSS) 失败: {e}")
# 策略2: Text
if not qr_element:
qr_element = await self._try_text_strategy(page)
if not qr_element and self.platform == "weixin":
qr_element = await self._try_text_strategy_in_frames(page)
# 如果找到元素,截图返回
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.error(f"[{self.platform}] 截图失败: {e}")
# 所有策略失败
logger.error(f"[{self.platform}] 所有QR码提取策略失败")
return None
async def _try_text_strategy(self, page: Union[Page, Frame]) -> Optional[Any]:
"""基于文本查找二维码图片"""
try:
logger.debug(f"[{self.platform}] 策略Text: 开始搜索...")
keywords = [
"扫码登录",
"二维码",
"打开抖音",
"抖音APP",
"使用APP扫码",
"微信扫码",
"请使用微信扫码",
"视频号"
]
for kw in keywords:
try:
text_el = page.get_by_text(kw, exact=False).first
await text_el.wait_for(state="visible", timeout=2000)
# 向上查找图片
parent = text_el
for _ in range(5):
parent = parent.locator("..")
candidates = parent.locator("img, canvas")
min_side = 120 if self.platform == "weixin" else 100
best = await self._pick_best_candidate(candidates, min_side=min_side)
if best:
logger.info(f"[{self.platform}] 策略Text: 成功")
return best
except Exception:
continue
except Exception as e:
logger.warning(f"[{self.platform}] 策略Text 失败: {e}")
return None
async def _monitor_login_status(self, page: Page, success_url: str):
"""监控登录状态 — 简洁版
策略:
1. 监听页面 URL 变化和 session cookie 出现(通用,适用所有平台)
2. 抖音特殊:如果 API 拦截到 redirect_url直接导航过去拿 cookie
3. 抖音特殊如果需要APP验证且JS轮询停了等用户验证完后
用 page.goto 重新访问首页,让服务器分配 session
"""
try:
logger.info(f"[{self.platform}] 开始监控登录状态...")
key_cookies = {
"bilibili": ["SESSDATA"],
"douyin": ["sessionid", "sessionid_ss", "sid_guard", "sid_tt", "uid_tt"],
"xiaohongshu": ["web_session"],
"weixin": ["wxuin", "wxsid", "pass_ticket", "uin", "skey",
"p_uin", "p_skey", "pac_uid"],
}
target_cookies = key_cookies.get(self.platform, [])
initial_url = page.url
_verify_detected_at: Optional[int] = None # 检测到需要验证的时间点(循环计数)
for i in range(self.LOGIN_TIMEOUT):
await asyncio.sleep(1)
if not self.context:
break
try:
# ── 检查 session cookie ──
cookies = [dict(c) for c in await self.context.cookies()]
cookie_names = [c.get("name") for c in cookies]
has_session = any(n in cookie_names for n in target_cookies) if target_cookies else False
current_url = page.url
# 每10秒打一次日志
if i % 10 == 0:
logger.info(
f"[{self.platform}] 等待登录... i={i} "
f"URL={current_url[:80]} session={has_session} "
f"cookies={len(cookies)}"
)
# ── 成功条件:有 session cookie ──
if has_session:
logger.success(f"[{self.platform}] 登录成功检测到session cookie")
self.login_success = True
await asyncio.sleep(2)
final = [dict(c) for c in await self.context.cookies()]
await self._save_cookies(final)
break
# ── 成功条件URL 跳转到目标页 ──
if success_url in current_url:
logger.success(f"[{self.platform}] 登录成功URL={current_url[:80]}")
self.login_success = True
await asyncio.sleep(2)
final = [dict(c) for c in await self.context.cookies()]
await self._save_cookies(final)
break
# ── 抖音API 拦截到 redirect_url → 直接导航 ──
if self.platform == "douyin" and self._qr_api_confirmed and self._qr_redirect_url:
logger.info(f"[douyin] 导航到 redirect_url...")
try:
await page.goto(self._qr_redirect_url, wait_until="domcontentloaded", timeout=30000)
except Exception:
pass
await asyncio.sleep(3)
# 重置,下一轮循环会检查 cookie
self._qr_api_confirmed = False
self._qr_redirect_url = None
continue
# ── 抖音需要APP验证点击"手机刷脸验证"选项 ──
if self.platform == "douyin" and self._douyin_needs_verify:
if _verify_detected_at is None:
_verify_detected_at = i
logger.info("[douyin] 检测到身份验证弹窗,将点击手机刷脸验证...")
elapsed = i - _verify_detected_at
# 第一次:点击"手机刷脸验证"选项
if elapsed == 2:
try:
clicked = await page.evaluate("""() => {
// 查找身份验证弹窗中的选项
const allEls = document.querySelectorAll('div, span, p, a, li');
for (const el of allEls) {
const text = (el.textContent || '').trim();
// 点击"手机刷脸验证"
if (text.includes('刷脸验证') && text.length < 30) {
el.click();
return '刷脸验证';
}
}
return null;
}""")
if clicked:
logger.info(f"[douyin] 已点击验证选项: {clicked}")
else:
logger.warning("[douyin] 未找到验证选项")
except Exception as e:
logger.warning(f"[douyin] 点击验证选项异常: {e}")
# 点击后等待新二维码出现,提取弹窗内二维码截图
if elapsed == 5 and not self._face_verify_qr:
try:
# 用 JS 在"刷脸验证"弹窗内找最大的正方形 img即二维码跳过头像
qr_selector = await page.evaluate("""() => {
// 找到包含"刷脸验证"文字的弹窗
const allEls = document.querySelectorAll('div, h2, h3, span, p');
let modal = null;
for (const el of allEls) {
const text = (el.textContent || '').trim();
if (text.includes('刷脸验证') && text.length < 20) {
modal = el;
for (let i = 0; i < 8; i++) {
if (!modal.parentElement) break;
modal = modal.parentElement;
if (modal.offsetWidth > 250 && modal.offsetHeight > 250) break;
}
break;
}
}
if (!modal) return null;
// 用 offsetWidth/Height显示尺寸而非 naturalWidth源文件可能很大
const imgs = modal.querySelectorAll('img');
let best = null;
let bestArea = 0;
for (const img of imgs) {
const w = img.offsetWidth;
const h = img.offsetHeight;
if (w < 80 || h < 80) continue;
const ratio = Math.abs(w - h) / Math.max(w, h);
if (ratio > 0.3) continue;
const area = w * h;
if (area > bestArea) {
bestArea = area;
best = img;
}
}
if (best) {
best.setAttribute('data-face-qr', 'true');
return 'img[data-face-qr="true"]';
}
return null;
}""")
if qr_selector:
qr_el = page.locator(qr_selector).first
if await qr_el.is_visible():
screenshot = await qr_el.screenshot()
self._face_verify_qr = base64.b64encode(screenshot).decode()
logger.info("[douyin] 刷脸弹窗内二维码截图已捕获")
else:
logger.warning("[douyin] 二维码元素不可见")
if not self._face_verify_qr:
# 兜底:整页截图
logger.warning("[douyin] 未在弹窗内找到二维码,使用全页截图")
screenshot = await page.screenshot()
self._face_verify_qr = base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[douyin] 截取刷脸二维码异常: {e}")
# 之后每10秒打一次日志
if elapsed > 0 and elapsed % 10 == 0:
logger.info(f"[douyin] 等待用户完成手机验证... ({elapsed}s)")
except Exception as e:
logger.warning(f"[{self.platform}] 监控异常: {e}")
if not self.login_success:
logger.warning(f"[{self.platform}] 登录超时")
except Exception as e:
logger.error(f"[{self.platform}] 监控异常: {e}")
finally:
await self._cleanup()
async def _cleanup(self) -> None:
"""清理资源"""
if self.context:
try:
await self.context.close()
except Exception:
pass
self.context = None
if self.browser:
try:
await self.browser.close()
except Exception:
pass
self.browser = None
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
async def _save_cookies(self, cookies: Sequence[Mapping[str, Any]]) -> None:
"""保存Cookie到文件"""
try:
cookie_file = self.cookies_dir / f"{self.platform}_cookies.json"
if self.platform == "bilibili":
# Bilibili 使用简单格式 (biliup库需要)
cookie_dict = {c.get('name'): c.get('value') for c in cookies if c.get('name')}
required = ['SESSDATA', 'bili_jct', 'DedeUserID', 'DedeUserID__ckMd5']
cookie_dict = {k: v for k, v in cookie_dict.items() if k in required}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(cookie_dict, f, indent=2)
self.cookies_data = cookie_dict
else:
# Douyin/Xiaohongshu 使用 Playwright storage_state 完整格式
# 这样可以直接用 browser.new_context(storage_state=file)
storage_state = {
"cookies": cookies,
"origins": []
}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(storage_state, f, indent=2)
self.cookies_data = storage_state
logger.success(f"[{self.platform}] Cookie已保存")
except Exception as e:
logger.error(f"[{self.platform}] 保存Cookie失败: {e}")
def get_login_status(self) -> Dict[str, Any]:
"""获取登录状态"""
result: Dict[str, Any] = {
"success": self.login_success,
"cookies_saved": self.cookies_data is not None
}
# 刷脸验证:返回新二维码截图给前端展示
if self._face_verify_qr:
result["face_verify_qr"] = self._face_verify_qr
return result