This commit is contained in:
Kevin Wong
2026-02-08 16:23:39 +08:00
parent 1a291a03b8
commit ee342cc40f
24 changed files with 1414 additions and 1082 deletions

View File

@@ -30,7 +30,7 @@ class Settings(BaseSettings):
# Douyin Playwright 配置
DOUYIN_HEADLESS_MODE: str = "headless-new"
DOUYIN_USER_AGENT: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
DOUYIN_USER_AGENT: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36"
DOUYIN_LOCALE: str = "zh-CN"
DOUYIN_TIMEZONE_ID: str = "Asia/Shanghai"
DOUYIN_CHROME_PATH: str = "/usr/bin/google-chrome"

View File

@@ -15,5 +15,7 @@ class GenerateRequest(BaseModel):
title_style_id: Optional[str] = None
subtitle_font_size: Optional[int] = None
title_font_size: Optional[int] = None
title_top_margin: Optional[int] = None
subtitle_bottom_margin: Optional[int] = None
bgm_id: Optional[str] = None
bgm_volume: Optional[float] = 0.2

View File

@@ -216,6 +216,16 @@ async def process_video_generation(task_id: str, req: GenerateRequest, user_id:
title_style = {}
title_style["font_size"] = int(req.title_font_size)
if req.title_top_margin is not None and req.title:
if title_style is None:
title_style = {}
title_style["top_margin"] = int(req.title_top_margin)
if req.subtitle_bottom_margin is not None and req.enable_subtitles:
if subtitle_style is None:
subtitle_style = {}
subtitle_style["bottom_margin"] = int(req.subtitle_bottom_margin)
if use_remotion:
subtitle_style = prepare_style_for_remotion(
subtitle_style,

View File

@@ -17,20 +17,20 @@ from app.services.storage import storage_service
# Import platform uploaders
from .uploader.bilibili_uploader import BilibiliUploader
from .uploader.douyin_uploader import DouyinUploader
from .uploader.xiaohongshu_uploader import XiaohongshuUploader
from .uploader.weixin_uploader import WeixinUploader
from .uploader.xiaohongshu_uploader import XiaohongshuUploader
from .uploader.weixin_uploader import WeixinUploader
class PublishService:
"""Social media publishing service (with user isolation)"""
# 支持的平台配置
PLATFORMS: Dict[str, Dict[str, Any]] = {
"douyin": {"name": "抖音", "url": "https://creator.douyin.com/", "enabled": True},
"weixin": {"name": "微信视频号", "url": "https://channels.weixin.qq.com/", "enabled": True},
"bilibili": {"name": "B站", "url": "https://member.bilibili.com/platform/upload/video/frame", "enabled": True},
"xiaohongshu": {"name": "小红书", "url": "https://creator.xiaohongshu.com/", "enabled": True},
}
PLATFORMS: Dict[str, Dict[str, Any]] = {
"douyin": {"name": "抖音", "url": "https://creator.douyin.com/", "enabled": True},
"weixin": {"name": "微信视频号", "url": "https://channels.weixin.qq.com/", "enabled": True},
"bilibili": {"name": "B站", "url": "https://member.bilibili.com/platform/upload/video/frame", "enabled": True},
"xiaohongshu": {"name": "小红书", "url": "https://creator.xiaohongshu.com/", "enabled": True},
}
def __init__(self) -> None:
# 存储活跃的登录会话,用于跟踪登录状态
@@ -175,36 +175,36 @@ class PublishService:
tid=kwargs.get('tid', 122),
copyright=kwargs.get('copyright', 1)
)
elif platform == "douyin":
uploader = DouyinUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description,
user_id=user_id,
)
elif platform == "xiaohongshu":
uploader = XiaohongshuUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description
)
elif platform == "weixin":
uploader = WeixinUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description,
user_id=user_id,
)
else:
elif platform == "douyin":
uploader = DouyinUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description,
user_id=user_id,
)
elif platform == "xiaohongshu":
uploader = XiaohongshuUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description
)
elif platform == "weixin":
uploader = WeixinUploader(
title=title,
file_path=local_video_path,
tags=tags,
publish_date=publish_time,
account_file=str(account_file),
description=description,
user_id=user_id,
)
else:
logger.warning(f"[发布] {platform} 上传功能尚未实现")
return {
"success": False,
@@ -236,30 +236,38 @@ class PublishService:
async def login(self, platform: str, user_id: Optional[str] = None) -> Dict[str, Any]:
"""
启动QR码登录流程
Args:
platform: 平台 ID
user_id: 用户 ID (用于 Cookie 隔离)
Returns:
dict: 包含二维码base64图片
"""
if platform not in self.PLATFORMS:
return {"success": False, "message": "不支持的平台"}
try:
from .qr_login_service import QRLoginService
# 获取用户专属的 Cookie 目录
cookies_dir = self._get_cookies_dir(user_id)
# 清理旧的活跃会话(避免残留会话干扰新登录)
session_key = self._get_session_key(platform, user_id)
if session_key in self.active_login_sessions:
old_service = self.active_login_sessions.pop(session_key)
try:
await old_service._cleanup()
except Exception:
pass
# 创建QR登录服务
qr_service = QRLoginService(platform, cookies_dir)
# 存储活跃会话 (带用户隔离)
session_key = self._get_session_key(platform, user_id)
self.active_login_sessions[session_key] = qr_service
# 启动登录并获取二维码
result = await qr_service.start_login()
@@ -273,27 +281,28 @@ class PublishService:
}
def get_login_session_status(self, platform: str, user_id: Optional[str] = None) -> Dict[str, Any]:
"""获取活跃登录会话的状态"""
"""获取活跃登录会话的状态(仅用于扫码轮询)"""
session_key = self._get_session_key(platform, user_id)
# 1. 如果有活跃的扫码会话,优先检查它
# 只检查活跃的扫码会话,不检查 Cookie 文件
# Cookie 文件检查会导致"重新登录"时误判为已登录
if session_key in self.active_login_sessions:
qr_service = self.active_login_sessions[session_key]
status = qr_service.get_login_status()
# 如果登录成功且Cookie已保存清理会话
if status["success"] and status["cookies_saved"]:
del self.active_login_sessions[session_key]
return {"success": True, "message": "登录成功"}
return {"success": False, "message": "等待扫码..."}
# 2. 检查本地Cookie文件是否存在
cookie_file = self._get_cookie_path(platform, user_id)
if cookie_file.exists():
return {"success": True, "message": "已登录 (历史状态)"}
return {"success": False, "message": "未登录"}
# 刷脸验证:传递新二维码给前端
result: Dict[str, Any] = {"success": False, "message": "等待扫码..."}
if status.get("face_verify_qr"):
result["face_verify_qr"] = status["face_verify_qr"]
return result
# 没有活跃会话 → 返回 False前端不应在无会话时轮询
return {"success": False, "message": "无活跃登录会话"}
def logout(self, platform: str, user_id: Optional[str] = None) -> Dict[str, Any]:
"""

View File

@@ -1,59 +1,67 @@
"""
QR码自动登录服务
后端Playwright无头模式获取二维码前端扫码后自动保存Cookie
"""
"""
QR码自动登录服务
后端Playwright无头模式获取二维码前端扫码后自动保存Cookie
"""
import asyncio
import time
import base64
import json
from pathlib import Path
import base64
import json
from pathlib import Path
from typing import Optional, Dict, Any, List, Sequence, Mapping, Union
from playwright.async_api import async_playwright, Page, Frame, BrowserContext, Browser, Playwright as PW
from loguru import logger
from app.core.config import settings
class QRLoginService:
"""QR码登录服务"""
# 登录监控超时 (秒)
LOGIN_TIMEOUT = 120
"""QR码登录服务"""
# 登录监控超时 (秒)
LOGIN_TIMEOUT = 180
def __init__(self, platform: str, cookies_dir: Path) -> None:
self.platform = platform
self.cookies_dir = cookies_dir
self.qr_code_image: Optional[str] = None
self.login_success: bool = False
self.cookies_data: Optional[Dict[str, Any]] = None
# Playwright 资源 (手动管理生命周期)
self.playwright: Optional[PW] = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
# 每个平台使用多个选择器 (使用逗号分隔Playwright会同时等待它们)
self.cookies_dir = cookies_dir
self.qr_code_image: Optional[str] = None
self.login_success: bool = False
self.cookies_data: Optional[Dict[str, Any]] = None
# Playwright 资源 (手动管理生命周期)
self.playwright: Optional[PW] = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
# 抖音 check_qrconnect API 响应拦截
self._qr_api_confirmed: bool = False
self._qr_redirect_url: Optional[str] = None
self._douyin_needs_verify: bool = False # 需要APP验证
# 刷脸验证二维码(点击刷脸后页面展示新二维码,需要前端再次展示给用户)
self._face_verify_qr: Optional[str] = None # base64 截图
# 每个平台使用多个选择器 (使用逗号分隔Playwright会同时等待它们)
self.platform_configs = {
"bilibili": {
"url": "https://passport.bilibili.com/login",
"qr_selectors": [
"div[class*='qrcode'] canvas", # 常见canvas二维码
"div[class*='qrcode'] img", # 常见图片二维码
".qrcode-img img", # 旧版
".login-scan-box img", # 扫码框
"div[class*='scan'] img"
],
"success_indicator": "https://www.bilibili.com/"
},
"douyin": {
"url": "https://creator.douyin.com/",
"qr_selectors": [
".qrcode img", # 优先尝试
"img[alt='qrcode']",
"canvas[class*='qr']",
"img[src*='qr']"
],
"success_indicator": "https://creator.douyin.com/creator-micro"
},
"bilibili": {
"url": "https://passport.bilibili.com/login",
"qr_selectors": [
"div[class*='qrcode'] canvas", # 常见canvas二维码
"div[class*='qrcode'] img", # 常见图片二维码
".qrcode-img img", # 旧版
".login-scan-box img", # 扫码框
"div[class*='scan'] img"
],
"success_indicator": "https://www.bilibili.com/"
},
"douyin": {
"url": "https://creator.douyin.com/",
"qr_selectors": [
".qrcode img", # 优先尝试
"img[alt='qrcode']",
"canvas[class*='qr']",
"img[src*='qr']"
],
"success_indicator": "https://creator.douyin.com/creator-micro"
},
"xiaohongshu": {
"url": "https://creator.xiaohongshu.com/",
"qr_selectors": [
@@ -79,10 +87,15 @@ class QRLoginService:
}
def _resolve_headless_mode(self) -> str:
if self.platform != "weixin":
return "headless"
mode = (settings.WEIXIN_HEADLESS_MODE or "").strip().lower()
return mode or "headful"
# 抖音和微信使用 headful 模式xvfb 虚拟显示),避免反爬检测
# 其他平台使用 headless-new
if self.platform == "douyin":
mode = (settings.DOUYIN_HEADLESS_MODE or "").strip().lower()
return mode or "headful"
if self.platform == "weixin":
mode = (settings.WEIXIN_HEADLESS_MODE or "").strip().lower()
return mode or "headful"
return "headless-new"
def _is_square_bbox(self, bbox: Optional[Dict[str, float]], min_side: int = 100) -> bool:
if not bbox:
@@ -158,20 +171,20 @@ class QRLoginService:
except Exception:
continue
return None
async def start_login(self) -> Dict[str, Any]:
"""
启动登录流程
Returns:
dict: 包含二维码base64和状态
"""
if self.platform not in self.platform_configs:
return {"success": False, "message": "不支持的平台"}
config = self.platform_configs[self.platform]
try:
async def start_login(self) -> Dict[str, Any]:
"""
启动登录流程
Returns:
dict: 包含二维码base64和状态
"""
if self.platform not in self.platform_configs:
return {"success": False, "message": "不支持的平台"}
config = self.platform_configs[self.platform]
try:
# 1. 启动 Playwright (不使用 async with手动管理生命周期)
self.playwright = await async_playwright().start()
@@ -180,46 +193,66 @@ class QRLoginService:
launch_args = [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-dev-shm-usage'
'--disable-dev-shm-usage',
]
if headless and mode in ("new", "headless-new", "headless_new"):
launch_args.append("--headless=new")
if not headless:
# headful 模式下 xvfb 没有 GPU需要软件渲染
launch_args.extend([
'--use-gl=swiftshader',
'--disable-gpu',
])
# Stealth模式启动浏览器
launch_options: Dict[str, Any] = {
"headless": headless,
"args": launch_args,
}
if self.platform == "weixin":
# 根据平台选择对应的浏览器配置
if self.platform == "douyin":
chrome_path = (settings.DOUYIN_CHROME_PATH or "").strip()
browser_channel = (settings.DOUYIN_BROWSER_CHANNEL or "").strip()
user_agent = settings.DOUYIN_USER_AGENT
locale = settings.DOUYIN_LOCALE
timezone_id = settings.DOUYIN_TIMEZONE_ID
elif self.platform == "weixin":
chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip()
if chrome_path:
if Path(chrome_path).exists():
launch_options["executable_path"] = chrome_path
else:
logger.warning(f"[weixin] WEIXIN_CHROME_PATH not found: {chrome_path}")
else:
channel = (settings.WEIXIN_BROWSER_CHANNEL or "").strip()
if channel:
launch_options["channel"] = channel
browser_channel = (settings.WEIXIN_BROWSER_CHANNEL or "").strip()
user_agent = settings.WEIXIN_USER_AGENT
locale = settings.WEIXIN_LOCALE
timezone_id = settings.WEIXIN_TIMEZONE_ID
else:
# B站、小红书等使用通用默认值
chrome_path = (settings.WEIXIN_CHROME_PATH or "").strip()
browser_channel = ""
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
locale = "zh-CN"
timezone_id = "Asia/Shanghai"
if chrome_path and Path(chrome_path).exists():
launch_options["executable_path"] = chrome_path
elif browser_channel:
launch_options["channel"] = browser_channel
self.browser = await self.playwright.chromium.launch(**launch_options)
# 配置真实浏览器特征
self.context = await self.browser.new_context(
viewport={'width': 1920, 'height': 1080},
user_agent=settings.WEIXIN_USER_AGENT,
locale=settings.WEIXIN_LOCALE,
timezone_id=settings.WEIXIN_TIMEZONE_ID
user_agent=user_agent,
locale=locale,
timezone_id=timezone_id
)
page = await self.context.new_page()
# 注入stealth.js
stealth_path = Path(__file__).parent / 'uploader' / 'stealth.min.js'
if stealth_path.exists():
await page.add_init_script(path=str(stealth_path))
logger.debug(f"[{self.platform}] Stealth模式已启用")
page = await self.context.new_page()
# 注入stealth.js
stealth_path = Path(__file__).parent / 'uploader' / 'stealth.min.js'
if stealth_path.exists():
await page.add_init_script(path=str(stealth_path))
logger.debug(f"[{self.platform}] Stealth模式已启用")
urls_to_try = [config["url"]]
if self.platform == "weixin":
urls_to_try = [
@@ -228,6 +261,60 @@ class QRLoginService:
]
qr_image = None
# 抖音:拦截 QR 登录相关 API 响应,检测登录成功
if self.platform == "douyin":
async def _on_douyin_qr_response(response):
try:
url = response.url or ""
if "check_qrconnect" not in url.lower():
return
body = None
try:
body = await response.json()
except Exception:
try:
text = await response.text()
import re as _re
m = _re.search(r'\{.*\}', text, _re.DOTALL)
if m:
body = json.loads(m.group())
except Exception:
pass
if not body:
return
data = body.get("data", {})
redirect_url = data.get("redirect_url", "")
status_val = data.get("status", "")
desc = data.get("description", body.get("description", ""))
logger.info(
f"[douyin][qr-poll] status={status_val} "
f"desc={desc[:60]} redirect={'yes' if redirect_url else 'no'}"
)
# 检测需要APP验证
if "完成验证" in desc or "验证后" in desc:
self._douyin_needs_verify = True
logger.warning("[douyin] 需要APP验证")
if self._qr_api_confirmed:
return
# 检测登录成功:出现 redirect_url
if redirect_url:
self._qr_redirect_url = redirect_url
self._qr_api_confirmed = True
logger.success(f"[douyin] 登录确认redirect_url={redirect_url[:120]}")
except Exception as e:
logger.debug(f"[douyin][qr-poll] error: {e}")
page.on("response", _on_douyin_qr_response)
for url in urls_to_try:
logger.info(f"[{self.platform}] 打开登录页: {url}")
wait_until = "domcontentloaded" if self.platform == "weixin" else "networkidle"
@@ -240,72 +327,94 @@ class QRLoginService:
qr_image = await self._extract_qr_code(page, config["qr_selectors"])
if qr_image:
break
if not qr_image:
await self._cleanup()
return {"success": False, "message": "未找到二维码"}
logger.info(f"[{self.platform}] 二维码已获取,等待扫码...")
# 启动后台监控任务 (浏览器保持开启)
asyncio.create_task(
self._monitor_login_status(page, config["success_indicator"])
)
return {
"success": True,
"qr_code": qr_image,
"message": "请扫码登录"
}
except Exception as e:
logger.exception(f"[{self.platform}] 启动登录失败: {e}")
await self._cleanup()
return {"success": False, "message": f"启动失败: {str(e)}"}
async def _extract_qr_code(self, page: Page, selectors: List[str]) -> Optional[str]:
"""
提取二维码图片 (优化策略顺序)
根据日志分析抖音和B站使用 Text 策略成功率最高
"""
qr_element = None
# 针对抖音和B站优先使用 Text 策略 (成功率最高,速度最快)
if self.platform in ("douyin", "bilibili"):
# 尝试最多2次 (首次 + 1次重试)
for attempt in range(2):
if attempt > 0:
logger.info(f"[{self.platform}] 等待页面加载后重试...")
await asyncio.sleep(2)
# 策略1: Text (优先,成功率最高)
qr_element = await self._try_text_strategy(page)
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] Text策略截图失败: {e}")
qr_element = None
# 策略2: CSS (备用)
if not qr_element:
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略2(CSS): 开始等待...")
# 增加超时到5秒抖音页面加载较慢
el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
if el:
logger.info(f"[{self.platform}] 策略2(CSS): 匹配成功")
screenshot = await el.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] 策略2(CSS) 失败: {e}")
# 如果已成功,退出循环
if qr_element:
break
else:
if not qr_image:
await self._cleanup()
return {"success": False, "message": "未找到二维码"}
logger.info(f"[{self.platform}] 二维码已获取,等待扫码...")
# 启动后台监控任务 (浏览器保持开启)
asyncio.create_task(
self._monitor_login_status(page, config["success_indicator"])
)
return {
"success": True,
"qr_code": qr_image,
"message": "请扫码登录"
}
except Exception as e:
logger.exception(f"[{self.platform}] 启动登录失败: {e}")
await self._cleanup()
return {"success": False, "message": f"启动失败: {str(e)}"}
async def _extract_qr_code(self, page: Page, selectors: List[str]) -> Optional[str]:
"""
提取二维码图片 (优化策略顺序)
抖音CSS 优先Text 策略每次超时 15 秒)
B站Text 优先
其他CSS -> Text
"""
qr_element = None
if self.platform == "douyin":
# 抖音CSS 优先Text 备用CSS 成功率高且快)
for attempt in range(2):
if attempt > 0:
logger.info(f"[{self.platform}] 等待页面加载后重试...")
await asyncio.sleep(2)
# 策略1: CSS (快速)
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略CSS: 开始等待...")
el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
if el:
logger.info(f"[{self.platform}] 策略CSS: 匹配成功")
screenshot = await el.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] 策略CSS 失败: {e}")
# 策略2: Text (备用)
qr_element = await self._try_text_strategy(page)
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] Text策略截图失败: {e}")
elif self.platform == "bilibili":
# B站Text 优先
for attempt in range(2):
if attempt > 0:
logger.info(f"[{self.platform}] 等待页面加载后重试...")
await asyncio.sleep(2)
qr_element = await self._try_text_strategy(page)
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] Text策略截图失败: {e}")
qr_element = None
if not qr_element:
try:
combined_selector = ", ".join(selectors)
logger.debug(f"[{self.platform}] 策略CSS: 开始等待...")
el = await page.wait_for_selector(combined_selector, state="visible", timeout=5000)
if el:
logger.info(f"[{self.platform}] 策略CSS: 匹配成功")
screenshot = await el.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[{self.platform}] 策略CSS 失败: {e}")
else:
# 其他平台 (小红书/微信等):保持原顺序 CSS -> Text
# 策略1: CSS 选择器
try:
@@ -328,36 +437,31 @@ class QRLoginService:
logger.info(f"[{self.platform}] 策略1(CSS): 匹配成功")
except Exception as e:
logger.warning(f"[{self.platform}] 策略1(CSS) 失败: {e}")
# 策略2: Text
# 策略2: Text
if not qr_element:
qr_element = await self._try_text_strategy(page)
if not qr_element and self.platform == "weixin":
qr_element = await self._try_text_strategy_in_frames(page)
# 如果找到元素,截图返回
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.error(f"[{self.platform}] 截图失败: {e}")
# 所有策略失败
logger.error(f"[{self.platform}] 所有QR码提取策略失败")
# 保存调试截图
debug_dir = Path(__file__).parent.parent.parent / 'debug_screenshots'
debug_dir.mkdir(exist_ok=True)
await page.screenshot(path=str(debug_dir / f"{self.platform}_debug.png"))
return None
# 如果找到元素,截图返回
if qr_element:
try:
screenshot = await qr_element.screenshot()
return base64.b64encode(screenshot).decode()
except Exception as e:
logger.error(f"[{self.platform}] 截图失败: {e}")
# 所有策略失败
logger.error(f"[{self.platform}] 所有QR码提取策略失败")
return None
async def _try_text_strategy(self, page: Union[Page, Frame]) -> Optional[Any]:
"""基于文本查找二维码图片"""
try:
logger.debug(f"[{self.platform}] 策略Text: 开始搜索...")
"""基于文本查找二维码图片"""
try:
logger.debug(f"[{self.platform}] 策略Text: 开始搜索...")
keywords = [
"扫码登录",
"二维码",
@@ -368,138 +472,265 @@ class QRLoginService:
"请使用微信扫码",
"视频号"
]
for kw in keywords:
try:
text_el = page.get_by_text(kw, exact=False).first
await text_el.wait_for(state="visible", timeout=2000)
# 向上查找图片
parent = text_el
for _ in range(5):
parent = parent.locator("..")
for kw in keywords:
try:
text_el = page.get_by_text(kw, exact=False).first
await text_el.wait_for(state="visible", timeout=2000)
# 向上查找图片
parent = text_el
for _ in range(5):
parent = parent.locator("..")
candidates = parent.locator("img, canvas")
min_side = 120 if self.platform == "weixin" else 100
best = await self._pick_best_candidate(candidates, min_side=min_side)
if best:
logger.info(f"[{self.platform}] 策略Text: 成功")
return best
except Exception:
continue
except Exception as e:
logger.warning(f"[{self.platform}] 策略Text 失败: {e}")
return None
async def _monitor_login_status(self, page: Page, success_url: str):
"""监控登录状态"""
try:
logger.info(f"[{self.platform}] 开始监控登录状态...")
except Exception:
continue
except Exception as e:
logger.warning(f"[{self.platform}] 策略Text 失败: {e}")
return None
async def _monitor_login_status(self, page: Page, success_url: str):
"""监控登录状态 — 简洁版
策略:
1. 监听页面 URL 变化和 session cookie 出现(通用,适用所有平台)
2. 抖音特殊:如果 API 拦截到 redirect_url直接导航过去拿 cookie
3. 抖音特殊如果需要APP验证且JS轮询停了等用户验证完后
用 page.goto 重新访问首页,让服务器分配 session
"""
try:
logger.info(f"[{self.platform}] 开始监控登录状态...")
key_cookies = {
"bilibili": ["SESSDATA"],
"douyin": ["sessionid"],
"douyin": ["sessionid", "sessionid_ss", "sid_guard", "sid_tt", "uid_tt"],
"xiaohongshu": ["web_session"],
"weixin": [
"wxuin",
"wxsid",
"pass_ticket",
"webwx_data_ticket",
"uin",
"skey",
"p_uin",
"p_skey",
"pac_uid",
],
"weixin": ["wxuin", "wxsid", "pass_ticket", "uin", "skey",
"p_uin", "p_skey", "pac_uid"],
}
target_cookies = key_cookies.get(self.platform, [])
for i in range(self.LOGIN_TIMEOUT):
await asyncio.sleep(1)
try:
if not self.context: break # 避免意外关闭
cookies = [dict(cookie) for cookie in await self.context.cookies()]
current_url = page.url
has_cookie = any((c.get('name') in target_cookies) for c in cookies) if target_cookies else False
if i % 5 == 0:
logger.debug(f"[{self.platform}] 等待登录... HasCookie: {has_cookie}")
if success_url in current_url or has_cookie:
logger.success(f"[{self.platform}] 登录成功!")
self.login_success = True
await asyncio.sleep(2) # 缓冲
# 保存Cookie
final_cookies = [dict(cookie) for cookie in await self.context.cookies()]
await self._save_cookies(final_cookies)
break
except Exception as e:
logger.warning(f"[{self.platform}] 监控循环警告: {e}")
break
if not self.login_success:
logger.warning(f"[{self.platform}] 登录超时")
except Exception as e:
logger.error(f"[{self.platform}] 监控异常: {e}")
finally:
await self._cleanup()
async def _cleanup(self) -> None:
"""清理资源"""
if self.context:
try:
await self.context.close()
except Exception:
pass
self.context = None
if self.browser:
try:
await self.browser.close()
except Exception:
pass
self.browser = None
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
initial_url = page.url
_verify_detected_at: Optional[int] = None # 检测到需要验证的时间点(循环计数)
for i in range(self.LOGIN_TIMEOUT):
await asyncio.sleep(1)
if not self.context:
break
try:
# ── 检查 session cookie ──
cookies = [dict(c) for c in await self.context.cookies()]
cookie_names = [c.get("name") for c in cookies]
has_session = any(n in cookie_names for n in target_cookies) if target_cookies else False
current_url = page.url
# 每10秒打一次日志
if i % 10 == 0:
logger.info(
f"[{self.platform}] 等待登录... i={i} "
f"URL={current_url[:80]} session={has_session} "
f"cookies={len(cookies)}"
)
# ── 成功条件:有 session cookie ──
if has_session:
logger.success(f"[{self.platform}] 登录成功检测到session cookie")
self.login_success = True
await asyncio.sleep(2)
final = [dict(c) for c in await self.context.cookies()]
await self._save_cookies(final)
break
# ── 成功条件URL 跳转到目标页 ──
if success_url in current_url:
logger.success(f"[{self.platform}] 登录成功URL={current_url[:80]}")
self.login_success = True
await asyncio.sleep(2)
final = [dict(c) for c in await self.context.cookies()]
await self._save_cookies(final)
break
# ── 抖音API 拦截到 redirect_url → 直接导航 ──
if self.platform == "douyin" and self._qr_api_confirmed and self._qr_redirect_url:
logger.info(f"[douyin] 导航到 redirect_url...")
try:
await page.goto(self._qr_redirect_url, wait_until="domcontentloaded", timeout=30000)
except Exception:
pass
await asyncio.sleep(3)
# 重置,下一轮循环会检查 cookie
self._qr_api_confirmed = False
self._qr_redirect_url = None
continue
# ── 抖音需要APP验证点击"手机刷脸验证"选项 ──
if self.platform == "douyin" and self._douyin_needs_verify:
if _verify_detected_at is None:
_verify_detected_at = i
logger.info("[douyin] 检测到身份验证弹窗,将点击手机刷脸验证...")
elapsed = i - _verify_detected_at
# 第一次:点击"手机刷脸验证"选项
if elapsed == 2:
try:
clicked = await page.evaluate("""() => {
// 查找身份验证弹窗中的选项
const allEls = document.querySelectorAll('div, span, p, a, li');
for (const el of allEls) {
const text = (el.textContent || '').trim();
// 点击"手机刷脸验证"
if (text.includes('刷脸验证') && text.length < 30) {
el.click();
return '刷脸验证';
}
}
return null;
}""")
if clicked:
logger.info(f"[douyin] 已点击验证选项: {clicked}")
else:
logger.warning("[douyin] 未找到验证选项")
except Exception as e:
logger.warning(f"[douyin] 点击验证选项异常: {e}")
# 点击后等待新二维码出现,提取弹窗内二维码截图
if elapsed == 5 and not self._face_verify_qr:
try:
# 用 JS 在"刷脸验证"弹窗内找最大的正方形 img即二维码跳过头像
qr_selector = await page.evaluate("""() => {
// 找到包含"刷脸验证"文字的弹窗
const allEls = document.querySelectorAll('div, h2, h3, span, p');
let modal = null;
for (const el of allEls) {
const text = (el.textContent || '').trim();
if (text.includes('刷脸验证') && text.length < 20) {
modal = el;
for (let i = 0; i < 8; i++) {
if (!modal.parentElement) break;
modal = modal.parentElement;
if (modal.offsetWidth > 250 && modal.offsetHeight > 250) break;
}
break;
}
}
if (!modal) return null;
// 用 offsetWidth/Height显示尺寸而非 naturalWidth源文件可能很大
const imgs = modal.querySelectorAll('img');
let best = null;
let bestArea = 0;
for (const img of imgs) {
const w = img.offsetWidth;
const h = img.offsetHeight;
if (w < 80 || h < 80) continue;
const ratio = Math.abs(w - h) / Math.max(w, h);
if (ratio > 0.3) continue;
const area = w * h;
if (area > bestArea) {
bestArea = area;
best = img;
}
}
if (best) {
best.setAttribute('data-face-qr', 'true');
return 'img[data-face-qr="true"]';
}
return null;
}""")
if qr_selector:
qr_el = page.locator(qr_selector).first
if await qr_el.is_visible():
screenshot = await qr_el.screenshot()
self._face_verify_qr = base64.b64encode(screenshot).decode()
logger.info("[douyin] 刷脸弹窗内二维码截图已捕获")
else:
logger.warning("[douyin] 二维码元素不可见")
if not self._face_verify_qr:
# 兜底:整页截图
logger.warning("[douyin] 未在弹窗内找到二维码,使用全页截图")
screenshot = await page.screenshot()
self._face_verify_qr = base64.b64encode(screenshot).decode()
except Exception as e:
logger.warning(f"[douyin] 截取刷脸二维码异常: {e}")
# 之后每10秒打一次日志
if elapsed > 0 and elapsed % 10 == 0:
logger.info(f"[douyin] 等待用户完成手机验证... ({elapsed}s)")
except Exception as e:
logger.warning(f"[{self.platform}] 监控异常: {e}")
if not self.login_success:
logger.warning(f"[{self.platform}] 登录超时")
except Exception as e:
logger.error(f"[{self.platform}] 监控异常: {e}")
finally:
await self._cleanup()
async def _cleanup(self) -> None:
"""清理资源"""
if self.context:
try:
await self.context.close()
except Exception:
pass
self.context = None
if self.browser:
try:
await self.browser.close()
except Exception:
pass
self.browser = None
if self.playwright:
try:
await self.playwright.stop()
except Exception:
pass
self.playwright = None
async def _save_cookies(self, cookies: Sequence[Mapping[str, Any]]) -> None:
"""保存Cookie到文件"""
try:
cookie_file = self.cookies_dir / f"{self.platform}_cookies.json"
if self.platform == "bilibili":
# Bilibili 使用简单格式 (biliup库需要)
"""保存Cookie到文件"""
try:
cookie_file = self.cookies_dir / f"{self.platform}_cookies.json"
if self.platform == "bilibili":
# Bilibili 使用简单格式 (biliup库需要)
cookie_dict = {c.get('name'): c.get('value') for c in cookies if c.get('name')}
required = ['SESSDATA', 'bili_jct', 'DedeUserID', 'DedeUserID__ckMd5']
cookie_dict = {k: v for k, v in cookie_dict.items() if k in required}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(cookie_dict, f, indent=2)
self.cookies_data = cookie_dict
else:
# Douyin/Xiaohongshu 使用 Playwright storage_state 完整格式
# 这样可以直接用 browser.new_context(storage_state=file)
storage_state = {
"cookies": cookies,
"origins": []
}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(storage_state, f, indent=2)
self.cookies_data = storage_state
logger.success(f"[{self.platform}] Cookie已保存")
except Exception as e:
logger.error(f"[{self.platform}] 保存Cookie失败: {e}")
def get_login_status(self) -> Dict[str, Any]:
"""获取登录状态"""
return {
"success": self.login_success,
"cookies_saved": self.cookies_data is not None
}
required = ['SESSDATA', 'bili_jct', 'DedeUserID', 'DedeUserID__ckMd5']
cookie_dict = {k: v for k, v in cookie_dict.items() if k in required}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(cookie_dict, f, indent=2)
self.cookies_data = cookie_dict
else:
# Douyin/Xiaohongshu 使用 Playwright storage_state 完整格式
# 这样可以直接用 browser.new_context(storage_state=file)
storage_state = {
"cookies": cookies,
"origins": []
}
with open(cookie_file, 'w', encoding='utf-8') as f:
json.dump(storage_state, f, indent=2)
self.cookies_data = storage_state
logger.success(f"[{self.platform}] Cookie已保存")
except Exception as e:
logger.error(f"[{self.platform}] 保存Cookie失败: {e}")
def get_login_status(self) -> Dict[str, Any]:
"""获取登录状态"""
result: Dict[str, Any] = {
"success": self.login_success,
"cookies_saved": self.cookies_data is not None
}
# 刷脸验证:返回新二维码截图给前端展示
if self._face_verify_qr:
result["face_verify_qr"] = self._face_verify_qr
return result

View File

@@ -127,6 +127,22 @@ class WeixinUploader(BaseUploader):
return False
def _attach_debug_listeners(self, page) -> None:
# post_create 响应监听始终注册(不依赖 debug 开关)
def log_post_create(response):
try:
url = response.url or ""
if "/post/post_create" in url:
if response.status < 400:
self._post_create_submitted = True
logger.info("[weixin][publish] post_create API ok")
else:
self._publish_api_error = f"发布请求失败HTTP {response.status}"
logger.warning(f"[weixin][publish] post_create_failed status={response.status}")
except Exception:
pass
page.on("response", log_post_create)
if not self._debug_artifacts_enabled():
return
@@ -1210,15 +1226,7 @@ class WeixinUploader(BaseUploader):
return False
async def _wait_for_publish_result(self, page):
success_texts = [
"\u53d1\u5e03\u6210\u529f",
"\u53d1\u5e03\u5b8c\u6210",
"\u5df2\u53d1\u5e03",
"\u5ba1\u6838\u4e2d",
"\u5f85\u5ba1\u6838",
"\u63d0\u4ea4\u6210\u529f",
"\u5df2\u63d0\u4ea4",
]
"""点击发表后等待结果:页面离开创建页即视为成功"""
failure_texts = [
"\u53d1\u5e03\u5931\u8d25",
"\u53d1\u5e03\u5f02\u5e38",
@@ -1229,38 +1237,33 @@ class WeixinUploader(BaseUploader):
"\u7f51\u7edc\u5f02\u5e38",
]
# 记录点击发表时的 URL用于判断是否跳转
create_url = page.url
start_time = time.time()
last_capture = -1
while time.time() - start_time < self.PUBLISH_TIMEOUT:
current_url = page.url
# API 层面报错 → 直接失败
if self._publish_api_error:
return False, self._publish_api_error, False
if self._post_create_submitted and (
"/post/list" in current_url
or "/platform/post/list" in current_url
):
return True, "发布成功:已进入内容列表", False
# 核心判定URL 离开了创建页(跳转到列表页或其他页面)→ 发布成功
if current_url != create_url and "/post/create" not in current_url:
logger.info(f"[weixin] page navigated away from create page: {current_url}")
return True, "发布成功:页面已跳转", False
if "channels.weixin.qq.com/platform" in current_url:
for text in success_texts:
if await self._is_text_visible(page, text, exact=False):
return True, f"发布成功:{text}", False
# post_create API 已确认成功 → 也视为成功
if self._post_create_submitted:
logger.info("[weixin] post_create API confirmed success")
return True, "发布成功:API 已确认", False
# 检查页面上的失败文案
for text in failure_texts:
if await self._is_text_visible(page, text, exact=False):
return False, f"发布失败:{text}", False
for text in success_texts:
if await self._is_text_visible(page, text, exact=False):
return True, f"发布成功:{text}", False
logger.info("[weixin] waiting for publish result...")
elapsed = int(time.time() - start_time)
if elapsed % 20 == 0 and elapsed != last_capture:
last_capture = elapsed
await self._save_debug_screenshot(page, "publish_waiting")
await asyncio.sleep(self.POLL_INTERVAL)
return False, "发布超时", True