""" QR码自动登录服务 后端Playwright无头模式获取二维码,前端扫码后自动保存Cookie """ import asyncio import time import base64 import json from pathlib import Path from typing import Optional, Dict, Any, List, Sequence, Mapping, Union from playwright.async_api import async_playwright, Page, Frame, BrowserContext, Browser, Playwright as PW from loguru import logger from app.core.config import settings class QRLoginService: """QR码登录服务""" # 登录监控超时 (秒) LOGIN_TIMEOUT = 180 def __init__(self, platform: str, cookies_dir: Path) -> None: self.platform = platform self.cookies_dir = cookies_dir self.qr_code_image: Optional[str] = None self.login_success: bool = False self.cookies_data: Optional[Dict[str, Any]] = None # Playwright 资源 (手动管理生命周期) self.playwright: Optional[PW] = None self.browser: Optional[Browser] = None self.context: Optional[BrowserContext] = None # 抖音 check_qrconnect API 响应拦截 self._qr_api_confirmed: bool = False self._qr_redirect_url: Optional[str] = None self._douyin_needs_verify: bool = False # 需要APP验证 # 刷脸验证二维码(点击刷脸后页面展示新二维码,需要前端再次展示给用户) self._face_verify_qr: Optional[str] = None # base64 截图 # 每个平台使用多个选择器 (使用逗号分隔,Playwright会同时等待它们) self.platform_configs = { "bilibili": { "url": "https://passport.bilibili.com/login", "qr_selectors": [ "div[class*='qrcode'] canvas", # 常见canvas二维码 "div[class*='qrcode'] img", # 常见图片二维码 ".qrcode-img img", # 旧版 ".login-scan-box img", # 扫码框 "div[class*='scan'] img" ], "success_indicator": "https://www.bilibili.com/" }, "douyin": { "url": "https://creator.douyin.com/", "qr_selectors": [ ".qrcode img", # 优先尝试 "img[alt='qrcode']", "canvas[class*='qr']", "img[src*='qr']" ], "success_indicator": "https://creator.douyin.com/creator-micro" }, "xiaohongshu": { "url": "https://creator.xiaohongshu.com/", "qr_selectors": [ ".qrcode img", "img[alt*='二维码']", "canvas.qr-code", "img[class*='qr']" ], "success_indicator": "https://creator.xiaohongshu.com/publish" }, "weixin": { "url": "https://channels.weixin.qq.com/platform/", "qr_selectors": [ "div[class*='qrcode'] img", "img[alt*='二维码']", "img[src*='qr']", "canvas", "svg", "img[class*='qr']" ], "success_indicator": "https://channels.weixin.qq.com/platform" } } def _resolve_headless_mode(self) -> str: # 抖音和微信使用 headful 模式(xvfb 虚拟显示),避免反爬检测 # 其他平台使用 headless-new if self.platform == "douyin": mode = (settings.DOUYIN_HEADLESS_MODE or "").strip().lower() return mode or "headful" if self.platform == "weixin": mode = (settings.WEIXIN_HEADLESS_MODE or "").strip().lower() return mode or "headful" return "headless-new" def _is_square_bbox(self, bbox: Optional[Dict[str, float]], min_side: int = 100) -> bool: if not bbox: return False width = bbox.get("width", 0) height = bbox.get("height", 0) if width < min_side or height < min_side: return False if height == 0: return False ratio = width / height return 0.75 <= ratio <= 1.33 async def _pick_best_candidate(self, locator, min_side: int = 100): best = None best_area = 0 try: count = await locator.count() except Exception: return None for i in range(count): try: candidate = locator.nth(i) if not await candidate.is_visible(): continue bbox = await candidate.bounding_box() if not self._is_square_bbox(bbox, min_side=min_side): continue area = bbox["width"] * bbox["height"] if area > best_area: best = candidate best_area = area except Exception: continue return best async def _find_qr_in_frames(self, page: Page, selectors: List[str], min_side: int): combined_selector = ", ".join(selectors) for frame in page.frames: if frame == page.main_frame: continue try: locator = frame.locator(combined_selector) candidate = await self._pick_best_candidate(locator, min_side=min_side) if candidate: return candidate except Exception: continue return None async def _scan_qr_candidates(self, page: Page, selectors: List[str], min_side: int): combined_selector = ", ".join(selectors) try: locator = page.locator(combined_selector) candidate = await self._pick_best_candidate(locator, min_side=min_side) if candidate: return candidate except Exception: pass return await self._find_qr_in_frames(page, selectors, min_side=min_side) async def _try_text_strategy_in_frames(self, page: Page): for frame in page.frames: if frame == page.main_frame: continue try: candidate = await self._try_text_strategy(frame) if candidate: return candidate except Exception: continue return None async def start_login(self) -> Dict[str, Any]: """ 启动登录流程 Returns: dict: 包含二维码base64和状态 """ if self.platform not in self.platform_configs: return {"success": False, "message": "不支持的平台"} config = self.platform_configs[self.platform] try: # 1. 启动 Playwright (不使用 async with,手动管理生命周期) self.playwright = await async_playwright().start() mode = self._resolve_headless_mode() headless = mode not in ("headful", "false", "0", "no") launch_args = [ '--disable-blink-features=AutomationControlled', '--no-sandbox', '--disable-dev-shm-usage', ] if headless and mode in ("new", "headless-new", "headless_new"): launch_args.append("--headless=new") if not headless: # headful 模式下 xvfb 没有 GPU,需要软件渲染 launch_args.extend([ '--use-gl=swiftshader', '--disable-gpu', ]) # Stealth模式启动浏览器 launch_options: Dict[str, Any] = { "headless": headless, "args": launch_args, } # 根据平台选择对应的浏览器配置 if self.platform == "douyin": chrome_path = (settings.DOUYIN_CHROME_PATH or "").strip() browser_channel = (settings.DOUYIN_BROWSER_CHANNEL or "").strip() user_agent = settings.DOUYIN_USER_AGENT locale = settings.DOUYIN_LOCALE timezone_id = settings.DOUYIN_TIMEZONE_ID elif self.platform == "weixin": chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip() browser_channel = (settings.WEIXIN_BROWSER_CHANNEL or "").strip() user_agent = settings.WEIXIN_USER_AGENT locale = settings.WEIXIN_LOCALE timezone_id = settings.WEIXIN_TIMEZONE_ID else: # B站、小红书等:使用通用默认值 chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip() browser_channel = "" user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" locale = "zh-CN" timezone_id = "Asia/Shanghai" if chrome_path and Path(chrome_path).exists(): launch_options["executable_path"] = chrome_path elif browser_channel: launch_options["channel"] = browser_channel self.browser = await self.playwright.chromium.launch(**launch_options) # 配置真实浏览器特征 self.context = await self.browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent=user_agent, locale=locale, timezone_id=timezone_id ) page = await self.context.new_page() # 注入stealth.js stealth_path = Path(__file__).parent / 'uploader' / 'stealth.min.js' if stealth_path.exists(): await page.add_init_script(path=str(stealth_path)) logger.debug(f"[{self.platform}] Stealth模式已启用") urls_to_try = [config["url"]] if self.platform == "weixin": urls_to_try = [ "https://channels.weixin.qq.com/platform/", "https://channels.weixin.qq.com/", ] qr_image = None # 抖音:拦截 QR 登录相关 API 响应,检测登录成功 if self.platform == "douyin": async def _on_douyin_qr_response(response): try: url = response.url or "" if "check_qrconnect" not in url.lower(): return body = None try: body = await response.json() except Exception: try: text = await response.text() import re as _re m = _re.search(r'\{.*\}', text, _re.DOTALL) if m: body = json.loads(m.group()) except Exception: pass if not body: return data = body.get("data", {}) redirect_url = data.get("redirect_url", "") status_val = data.get("status", "") desc = data.get("description", body.get("description", "")) logger.info( f"[douyin][qr-poll] status={status_val} " f"desc={desc[:60]} redirect={'yes' if redirect_url else 'no'}" ) # 检测需要APP验证 if "完成验证" in desc or "验证后" in desc: self._douyin_needs_verify = True logger.warning("[douyin] 需要APP验证") if self._qr_api_confirmed: return # 检测登录成功:出现 redirect_url if redirect_url: self._qr_redirect_url = redirect_url self._qr_api_confirmed = True logger.success(f"[douyin] 登录确认!redirect_url={redirect_url[:120]}") except Exception as e: logger.debug(f"[douyin][qr-poll] error: {e}") page.on("response", _on_douyin_qr_response) for url in urls_to_try: logger.info(f"[{self.platform}] 打开登录页: {url}") wait_until = "domcontentloaded" if self.platform == "weixin" else "networkidle" await page.goto(url, wait_until=wait_until) # 等待页面加载 await asyncio.sleep(1 if self.platform == "weixin" else 2) # 提取二维码 (并行策略) qr_image = await self._extract_qr_code(page, config["qr_selectors"]) if qr_image: break if not qr_image: await self._cleanup() return {"success": False, "message": "未找到二维码"} logger.info(f"[{self.platform}] 二维码已获取,等待扫码...") # 启动后台监控任务 (浏览器保持开启) asyncio.create_task( self._monitor_login_status(page, config["success_indicator"]) ) return { "success": True, "qr_code": qr_image, "message": "请扫码登录" } except Exception as e: logger.exception(f"[{self.platform}] 启动登录失败: {e}") await self._cleanup() return {"success": False, "message": f"启动失败: {str(e)}"} async def _extract_qr_code(self, page: Page, selectors: List[str]) -> Optional[str]: """ 提取二维码图片 (优化策略顺序) 抖音:CSS 优先(Text 策略每次超时 15 秒) B站:Text 优先 其他:CSS -> Text """ qr_element = None if self.platform == "douyin": # 抖音:CSS 优先,Text 备用(CSS 成功率高且快) for attempt in range(2): if attempt > 0: logger.info(f"[{self.platform}] 等待页面加载后重试...") await asyncio.sleep(2) # 策略1: CSS (快速) try: combined_selector = ", ".join(selectors) logger.debug(f"[{self.platform}] 策略CSS: 开始等待...") el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000) if el: logger.info(f"[{self.platform}] 策略CSS: 匹配成功") screenshot = await el.screenshot() return base64.b64encode(screenshot).decode() except Exception as e: logger.warning(f"[{self.platform}] 策略CSS 失败: {e}") # 策略2: Text (备用) qr_element = await self._try_text_strategy(page) if qr_element: try: screenshot = await qr_element.screenshot() return base64.b64encode(screenshot).decode() except Exception as e: logger.warning(f"[{self.platform}] Text策略截图失败: {e}") elif self.platform == "bilibili": # B站:Text 优先 for attempt in range(2): if attempt > 0: logger.info(f"[{self.platform}] 等待页面加载后重试...") await asyncio.sleep(2) qr_element = await self._try_text_strategy(page) if qr_element: try: screenshot = await qr_element.screenshot() return base64.b64encode(screenshot).decode() except Exception as e: logger.warning(f"[{self.platform}] Text策略截图失败: {e}") qr_element = None if not qr_element: try: combined_selector = ", ".join(selectors) logger.debug(f"[{self.platform}] 策略CSS: 开始等待...") el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000) if el: logger.info(f"[{self.platform}] 策略CSS: 匹配成功") screenshot = await el.screenshot() return base64.b64encode(screenshot).decode() except Exception as e: logger.warning(f"[{self.platform}] 策略CSS 失败: {e}") else: # 其他平台 (小红书/微信等):保持原顺序 CSS -> Text # 策略1: CSS 选择器 try: combined_selector = ", ".join(selectors) logger.debug(f"[{self.platform}] 策略1(CSS): 开始等待...") if self.platform == "weixin": min_side = 120 start_time = time.monotonic() while time.monotonic() - start_time < 12: qr_element = await self._scan_qr_candidates(page, selectors, min_side=min_side) if qr_element: logger.info(f"[{self.platform}] 策略1(CSS): 匹配成功") break await asyncio.sleep(0.5) else: await page.wait_for_selector(combined_selector, state="visible", timeout=5000) locator = page.locator(combined_selector) qr_element = await self._pick_best_candidate(locator, min_side=100) if qr_element: logger.info(f"[{self.platform}] 策略1(CSS): 匹配成功") except Exception as e: logger.warning(f"[{self.platform}] 策略1(CSS) 失败: {e}") # 策略2: Text if not qr_element: qr_element = await self._try_text_strategy(page) if not qr_element and self.platform == "weixin": qr_element = await self._try_text_strategy_in_frames(page) # 如果找到元素,截图返回 if qr_element: try: screenshot = await qr_element.screenshot() return base64.b64encode(screenshot).decode() except Exception as e: logger.error(f"[{self.platform}] 截图失败: {e}") # 所有策略失败 logger.error(f"[{self.platform}] 所有QR码提取策略失败") return None async def _try_text_strategy(self, page: Union[Page, Frame]) -> Optional[Any]: """基于文本查找二维码图片""" try: logger.debug(f"[{self.platform}] 策略Text: 开始搜索...") keywords = [ "扫码登录", "二维码", "打开抖音", "抖音APP", "使用APP扫码", "微信扫码", "请使用微信扫码", "视频号" ] for kw in keywords: try: text_el = page.get_by_text(kw, exact=False).first await text_el.wait_for(state="visible", timeout=2000) # 向上查找图片 parent = text_el for _ in range(5): parent = parent.locator("..") candidates = parent.locator("img, canvas") min_side = 120 if self.platform == "weixin" else 100 best = await self._pick_best_candidate(candidates, min_side=min_side) if best: logger.info(f"[{self.platform}] 策略Text: 成功") return best except Exception: continue except Exception as e: logger.warning(f"[{self.platform}] 策略Text 失败: {e}") return None async def _monitor_login_status(self, page: Page, success_url: str): """监控登录状态 — 简洁版 策略: 1. 监听页面 URL 变化和 session cookie 出现(通用,适用所有平台) 2. 抖音特殊:如果 API 拦截到 redirect_url,直接导航过去拿 cookie 3. 抖音特殊:如果需要APP验证且JS轮询停了,等用户验证完后 用 page.goto 重新访问首页,让服务器分配 session """ try: logger.info(f"[{self.platform}] 开始监控登录状态...") key_cookies = { "bilibili": ["SESSDATA"], "douyin": ["sessionid", "sessionid_ss", "sid_guard", "sid_tt", "uid_tt"], "xiaohongshu": ["web_session"], "weixin": ["wxuin", "wxsid", "pass_ticket", "uin", "skey", "p_uin", "p_skey", "pac_uid"], } target_cookies = key_cookies.get(self.platform, []) initial_url = page.url _verify_detected_at: Optional[int] = None # 检测到需要验证的时间点(循环计数) for i in range(self.LOGIN_TIMEOUT): await asyncio.sleep(1) if not self.context: break try: # ── 检查 session cookie ── cookies = [dict(c) for c in await self.context.cookies()] cookie_names = [c.get("name") for c in cookies] has_session = any(n in cookie_names for n in target_cookies) if target_cookies else False current_url = page.url # 每10秒打一次日志 if i % 10 == 0: logger.info( f"[{self.platform}] 等待登录... i={i} " f"URL={current_url[:80]} session={has_session} " f"cookies={len(cookies)}" ) # ── 成功条件:有 session cookie ── if has_session: logger.success(f"[{self.platform}] 登录成功!检测到session cookie") self.login_success = True await asyncio.sleep(2) final = [dict(c) for c in await self.context.cookies()] await self._save_cookies(final) break # ── 成功条件:URL 跳转到目标页 ── if success_url in current_url: logger.success(f"[{self.platform}] 登录成功!URL={current_url[:80]}") self.login_success = True await asyncio.sleep(2) final = [dict(c) for c in await self.context.cookies()] await self._save_cookies(final) break # ── 抖音:API 拦截到 redirect_url → 直接导航 ── if self.platform == "douyin" and self._qr_api_confirmed and self._qr_redirect_url: logger.info(f"[douyin] 导航到 redirect_url...") try: await page.goto(self._qr_redirect_url, wait_until="domcontentloaded", timeout=30000) except Exception: pass await asyncio.sleep(3) # 重置,下一轮循环会检查 cookie self._qr_api_confirmed = False self._qr_redirect_url = None continue # ── 抖音:需要APP验证,点击"手机刷脸验证"选项 ── if self.platform == "douyin" and self._douyin_needs_verify: if _verify_detected_at is None: _verify_detected_at = i logger.info("[douyin] 检测到身份验证弹窗,将点击手机刷脸验证...") elapsed = i - _verify_detected_at # 第一次:点击"手机刷脸验证"选项 if elapsed == 2: try: clicked = await page.evaluate("""() => { // 查找身份验证弹窗中的选项 const allEls = document.querySelectorAll('div, span, p, a, li'); for (const el of allEls) { const text = (el.textContent || '').trim(); // 点击"手机刷脸验证" if (text.includes('刷脸验证') && text.length < 30) { el.click(); return '刷脸验证'; } } return null; }""") if clicked: logger.info(f"[douyin] 已点击验证选项: {clicked}") else: logger.warning("[douyin] 未找到验证选项") except Exception as e: logger.warning(f"[douyin] 点击验证选项异常: {e}") # 点击后等待新二维码出现,提取弹窗内二维码截图 if elapsed == 5 and not self._face_verify_qr: try: # 用 JS 在"刷脸验证"弹窗内找最大的正方形 img(即二维码,跳过头像) qr_selector = await page.evaluate("""() => { // 找到包含"刷脸验证"文字的弹窗 const allEls = document.querySelectorAll('div, h2, h3, span, p'); let modal = null; for (const el of allEls) { const text = (el.textContent || '').trim(); if (text.includes('刷脸验证') && text.length < 20) { modal = el; for (let i = 0; i < 8; i++) { if (!modal.parentElement) break; modal = modal.parentElement; if (modal.offsetWidth > 250 && modal.offsetHeight > 250) break; } break; } } if (!modal) return null; // 用 offsetWidth/Height(显示尺寸)而非 naturalWidth(源文件可能很大) const imgs = modal.querySelectorAll('img'); let best = null; let bestArea = 0; for (const img of imgs) { const w = img.offsetWidth; const h = img.offsetHeight; if (w < 80 || h < 80) continue; const ratio = Math.abs(w - h) / Math.max(w, h); if (ratio > 0.3) continue; const area = w * h; if (area > bestArea) { bestArea = area; best = img; } } if (best) { best.setAttribute('data-face-qr', 'true'); return 'img[data-face-qr="true"]'; } return null; }""") if qr_selector: qr_el = page.locator(qr_selector).first if await qr_el.is_visible(): screenshot = await qr_el.screenshot() self._face_verify_qr = base64.b64encode(screenshot).decode() logger.info("[douyin] 刷脸弹窗内二维码截图已捕获") else: logger.warning("[douyin] 二维码元素不可见") if not self._face_verify_qr: # 兜底:整页截图 logger.warning("[douyin] 未在弹窗内找到二维码,使用全页截图") screenshot = await page.screenshot() self._face_verify_qr = base64.b64encode(screenshot).decode() except Exception as e: logger.warning(f"[douyin] 截取刷脸二维码异常: {e}") # 之后每10秒打一次日志 if elapsed > 0 and elapsed % 10 == 0: logger.info(f"[douyin] 等待用户完成手机验证... ({elapsed}s)") except Exception as e: logger.warning(f"[{self.platform}] 监控异常: {e}") if not self.login_success: logger.warning(f"[{self.platform}] 登录超时") except Exception as e: logger.error(f"[{self.platform}] 监控异常: {e}") finally: await self._cleanup() async def _cleanup(self) -> None: """清理资源""" if self.context: try: await self.context.close() except Exception: pass self.context = None if self.browser: try: await self.browser.close() except Exception: pass self.browser = None if self.playwright: try: await self.playwright.stop() except Exception: pass self.playwright = None async def _save_cookies(self, cookies: Sequence[Mapping[str, Any]]) -> None: """保存Cookie到文件""" try: cookie_file = self.cookies_dir / f"{self.platform}_cookies.json" if self.platform == "bilibili": # Bilibili 使用简单格式 (biliup库需要) cookie_dict = {c.get('name'): c.get('value') for c in cookies if c.get('name')} required = ['SESSDATA', 'bili_jct', 'DedeUserID', 'DedeUserID__ckMd5'] cookie_dict = {k: v for k, v in cookie_dict.items() if k in required} with open(cookie_file, 'w', encoding='utf-8') as f: json.dump(cookie_dict, f, indent=2) self.cookies_data = cookie_dict else: # Douyin/Xiaohongshu 使用 Playwright storage_state 完整格式 # 这样可以直接用 browser.new_context(storage_state=file) storage_state = { "cookies": cookies, "origins": [] } with open(cookie_file, 'w', encoding='utf-8') as f: json.dump(storage_state, f, indent=2) self.cookies_data = storage_state logger.success(f"[{self.platform}] Cookie已保存") except Exception as e: logger.error(f"[{self.platform}] 保存Cookie失败: {e}") def get_login_status(self) -> Dict[str, Any]: """获取登录状态""" result: Dict[str, Any] = { "success": self.login_success, "cookies_saved": self.cookies_data is not None } # 刷脸验证:返回新二维码截图给前端展示 if self._face_verify_qr: result["face_verify_qr"] = self._face_verify_qr return result