更新

2026-02-03 13:46:52 +08:00
parent eb3ed23326
commit cb10da52fc
18 changed files with 1018 additions and 657 deletions
--- a/backend/app/api/ref_audios.py
+++ b/backend/app/api/ref_audios.py
@@ -127,7 +127,60 @@ async def upload_ref_audio(
        if duration > 60.0:
            raise HTTPException(status_code=400, detail="音频时长过长，最多 60 秒")

-        # 生成存储路径
+
+        # 3. 处理重名逻辑 (Friendly Display Name)
+        original_name = file.filename
+        
+        # 获取用户现有的所有参考音频列表 (为了检查文件名冲突)
+        # 注意: 这种列表方式在文件极多时性能一般，但考虑到单用户参考音频数量有限，目前可行
+        existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
+        existing_names = set()
+        
+        # 预加载所有现有的 display name
+        # 这里需要并发请求 metadata 可能会慢，优化: 仅检查 metadata 文件并解析
+        # 简易方案: 仅在 metadata 中读取 original_filename 
+        # 但 list_files 返回的是 name，我们需要 metadata
+        # 考虑到性能，这里使用一种妥协方案：
+        # 我们不做全量检查，而是简单的检查：如果用户上传 myvoice.wav
+        # 我们看看有没有 (timestamp)_myvoice.wav 这种其实并不能准确判断 display name 是否冲突
+        # 
+        # 正确做法: 应该有个数据库表存 metadata。但目前是无数据库设计。
+        # 
+        # 改用简单方案: 
+        # 既然我们无法快速获取所有 display name，
+        # 我们暂时只处理 "在新上传时，original_filename 保持原样"
+        # 但用户希望 "如果在列表中看到重复的，自动加(1)"
+        # 
+        # 鉴于无数据库架构的限制，要在上传时知道"已有的 display name" 成本太高(需遍历下载所有json)。
+        # 
+        # 💡 替代方案: 
+        # 我们不检查旧的。我们只保证**存储**唯一。
+        # 对于用户提到的 "新上传的文件名后加个数字" -> 这通常是指 "另存为" 的逻辑。
+        # 既然用户现在的痛点是 "显示了时间戳太丑"，而我已经去掉了时间戳显示。
+        # 那么如果用户上传两个 "TEST.wav"，列表里就会有两个 "TEST.wav" (但时间不同)。
+        # 这其实是可以接受的。
+        # 
+        # 但如果用户强求 "自动重命名":
+        # 我们可以在这里做一个轻量级的 "同名检测"：
+        # 检查有没有 *_{original_name} 的文件存在。
+        # 如果 storage 里已经有 123_abc.wav, 456_abc.wav
+        # 我们可以认为 abc.wav 已经存在。
+        
+        dup_count = 0
+        search_suffix = f"_{original_name}" # 比如 _test.wav
+        
+        for f in existing_files:
+            fname = f.get('name', '')
+            if fname.endswith(search_suffix):
+                dup_count += 1
+                
+        final_display_name = original_name
+        if dup_count > 0:
+            name_stem = Path(original_name).stem
+            name_ext = Path(original_name).suffix
+            final_display_name = f"{name_stem}({dup_count}){name_ext}"
+            
+        # 生成存储路径 (唯一ID)
        timestamp = int(time.time())
        safe_name = sanitize_filename(Path(file.filename).stem)
        storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
@@ -146,7 +199,7 @@ async def upload_ref_audio(
        # 上传元数据 JSON
        metadata = {
            "ref_text": ref_text.strip(),
-            "original_filename": file.filename,
+            "original_filename": final_display_name, # 这里的名字如果有重复会自动加(1)
            "duration_sec": duration,
            "created_at": timestamp
        }
@@ -207,6 +260,7 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
            ref_text = ""
            duration_sec = 0.0
            created_at = 0
+            original_filename = ""

            try:
                # 获取 metadata 内容
@@ -219,6 +273,7 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
                        ref_text = metadata.get("ref_text", "")
                        duration_sec = metadata.get("duration_sec", 0.0)
                        created_at = metadata.get("created_at", 0)
+                        original_filename = metadata.get("original_filename", "")
            except Exception as e:
                logger.warning(f"读取 metadata 失败: {e}")
                # 从文件名提取时间戳
@@ -230,9 +285,18 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
            # 获取音频签名 URL
            signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)

+            # 优先显示原始文件名 (去掉时间戳前缀)
+            display_name = original_filename if original_filename else name
+            # 如果原始文件名丢失，尝试从现有文件名中通过正则去掉时间戳
+            if not display_name or display_name == name:
+                 # 匹配 "1234567890_filename.wav"
+                 match = re.match(r'^\d+_(.+)$', name)
+                 if match:
+                     display_name = match.group(1)
+
            items.append(RefAudioResponse(
                id=storage_path,
-                name=name,
+                name=display_name,
                path=signed_url,
                ref_text=ref_text,
                duration_sec=duration_sec,
@@ -274,3 +338,74 @@ async def delete_ref_audio(audio_id: str, user: dict = Depends(get_current_user)
    except Exception as e:
        logger.error(f"删除参考音频失败: {e}")
        raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
+
+
+class RenameRequest(BaseModel):
+    new_name: str
+
+
+@router.put("/{audio_id:path}")
+async def rename_ref_audio(
+    audio_id: str,
+    request: RenameRequest,
+    user: dict = Depends(get_current_user)
+):
+    """重命名参考音频 (修改 metadata 中的 display name)"""
+    user_id = user["id"]
+
+    # 安全检查
+    if not audio_id.startswith(f"{user_id}/"):
+        raise HTTPException(status_code=403, detail="无权修改此文件")
+    
+    new_name = request.new_name.strip()
+    if not new_name:
+         raise HTTPException(status_code=400, detail="新名称不能为空")
+         
+    # 确保新名称有后缀 (保留原后缀或添加 .wav)
+    if not Path(new_name).suffix:
+        new_name += ".wav"
+
+    try:
+        # 1. 下载现有的 metadata
+        metadata_path = audio_id.replace(".wav", ".json")
+        try:
+             # 获取已有的 JSON
+             import httpx
+             metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
+             if not metadata_url:
+                  # 如果 json 不存在，则需要新建一个基础的
+                  raise Exception("Metadata not found")
+             
+             async with httpx.AsyncClient() as client:
+                resp = await client.get(metadata_url)
+                if resp.status_code == 200:
+                    metadata = resp.json()
+                else:
+                    raise Exception(f"Failed to fetch metadata: {resp.status_code}")
+                    
+        except Exception as e:
+            logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
+            # 兜底：如果读取失败，构建最小元数据
+            metadata = {
+                "ref_text": "", # 可能丢失
+                "duration_sec": 0.0,
+                "created_at": int(time.time()),
+                "original_filename": new_name
+            }
+
+        # 2. 更新 original_filename
+        metadata["original_filename"] = new_name
+        
+        # 3. 覆盖上传 metadata
+        await storage_service.upload_file(
+            bucket=BUCKET_REF_AUDIOS,
+            path=metadata_path,
+            file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
+            content_type="application/json"
+        )
+        
+        return {"success": True, "name": new_name}
+
+    except Exception as e:
+        logger.error(f"重命名失败: {e}")
+        raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")
--- a/backend/app/api/tools.py
+++ b/backend/app/api/tools.py
@@ -38,11 +38,13 @@ async def extract_script_tool(
        temp_dir.mkdir(parents=True, exist_ok=True)

        # 1. 获取/保存文件
+        loop = asyncio.get_event_loop()
+        
        if file:
            safe_filename = Path(file.filename).name.replace(" ", "_")
            temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
-            with open(temp_path, "wb") as buffer:
-                shutil.copyfileobj(file.file, buffer)
+            # 文件 I/O 放入线程池
+            await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
            logger.info(f"Tool processing upload file: {temp_path}")
        else:
            # URL 下载逻辑
@@ -55,8 +57,8 @@ async def extract_script_tool(
            
            logger.info(f"Tool downloading URL: {url}")
            
-            # 先尝试 yt-dlp
-            try:
+            # 封装 yt-dlp 下载函数 (Blocking)
+            def _download_yt_dlp():
                import yt_dlp
                logger.info("Attempting download with yt-dlp...")
                
@@ -80,8 +82,12 @@ async def extract_script_tool(
                        id = info.get('id')
                        downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
                    
-                    temp_path = Path(downloaded_file)
-                    logger.info(f"yt-dlp downloaded to: {temp_path}")
+                    return Path(downloaded_file)
+
+            # 先尝试 yt-dlp (Run in Executor)
+            try:
+                temp_path = await loop.run_in_executor(None, _download_yt_dlp)
+                logger.info(f"yt-dlp downloaded to: {temp_path}")

            except Exception as e:
                logger.warning(f"yt-dlp download failed: {e}. Trying manual Douyin fallback...")
@@ -107,46 +113,48 @@ async def extract_script_tool(
        if not temp_path or not temp_path.exists():
             raise HTTPException(400, "文件获取失败")
        
-        # 1.5 安全转换: 强制转为 WAV (16k) 传给 Whisper
-        # 这一步既能验证文件有效性（ffmpeg会报错），又能避免 PyAV 音频解码 bug
+        # 1.5 安全转换: 强制转为 WAV (16k)
        import subprocess
        audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
-        try:
-            # ffmpeg -i input -vn -acodec pcm_s16le -ar 16000 -ac 1 output.wav -y
-            convert_cmd = [
-                'ffmpeg',
-                '-i', str(temp_path),
-                '-vn', # 忽略视频
-                '-acodec', 'pcm_s16le',
-                '-ar', '16000', # Whisper 推荐采样率
-                '-ac', '1',    # 单声道
-                '-y',          # 覆盖
-                str(audio_path)
-            ]
-            
-            # 捕获 stderr 以便出错时打印
-            subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            logger.info(f"Converted to WAV: {audio_path}")
-            
-            # 使用转换后的文件
-            target_path = audio_path
-            
-        except subprocess.CalledProcessError as e:
-            error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
-            logger.error(f"FFmpeg check/convert failed: {error_log}")
-            
-            # 尝试判断是不是 HTML
-            head = b""
+        
+        def _convert_audio():
            try:
-                with open(temp_path, 'rb') as f:
-                    head = f.read(100)
-            except: 
-                pass
-                
-            if b'<!DOCTYPE html' in head or b'<html' in head:
+                convert_cmd = [
+                    'ffmpeg',
+                    '-i', str(temp_path),
+                    '-vn', # 忽略视频
+                    '-acodec', 'pcm_s16le',
+                    '-ar', '16000', # Whisper 推荐采样率
+                    '-ac', '1',    # 单声道
+                    '-y',          # 覆盖
+                    str(audio_path)
+                ]
+                # 捕获 stderr
+                subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                return True
+            except subprocess.CalledProcessError as e:
+                error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
+                logger.error(f"FFmpeg check/convert failed: {error_log}")
+                # 检查是否为 HTML
+                head = b""
+                try:
+                    with open(temp_path, 'rb') as f:
+                        head = f.read(100)
+                except: pass
+                if b'<!DOCTYPE html' in head or b'<html' in head:
+                    raise ValueError("HTML_DETECTED")
+                raise ValueError("CONVERT_FAILED")
+
+        # 执行转换 (Run in Executor)
+        try:
+            await loop.run_in_executor(None, _convert_audio)
+            logger.info(f"Converted to WAV: {audio_path}")
+            target_path = audio_path
+        except ValueError as ve:
+            if str(ve) == "HTML_DETECTED":
                 raise HTTPException(400, "下载的文件是网页而非视频，请重试或手动上传。")
-            
-            raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
+            else:
+                 raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
            
        # 2. 提取文案 (Whisper)
        script = await whisper_service.transcribe(str(target_path))
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@@ -30,26 +30,44 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
    Returns:
        单字符列表，每个包含 word/start/end
    """
-    # 只保留中文字符和基本标点
-    chars = [c for c in word if c.strip()]
-    if not chars:
+    tokens = []
+    ascii_buffer = ""
+
+    for char in word:
+        if not char.strip():
+            continue
+
+        if char.isascii() and char.isalnum():
+            ascii_buffer += char
+            continue
+
+        if ascii_buffer:
+            tokens.append(ascii_buffer)
+            ascii_buffer = ""
+
+        tokens.append(char)
+
+    if ascii_buffer:
+        tokens.append(ascii_buffer)
+
+    if not tokens:
        return []

-    if len(chars) == 1:
-        return [{"word": chars[0], "start": start, "end": end}]
+    if len(tokens) == 1:
+        return [{"word": tokens[0], "start": start, "end": end}]

    # 线性插值时间戳
    duration = end - start
-    char_duration = duration / len(chars)
+    token_duration = duration / len(tokens)

    result = []
-    for i, char in enumerate(chars):
-        char_start = start + i * char_duration
-        char_end = start + (i + 1) * char_duration
+    for i, token in enumerate(tokens):
+        token_start = start + i * token_duration
+        token_end = start + (i + 1) * token_duration
        result.append({
-            "word": char,
-            "start": round(char_start, 3),
-            "end": round(char_end, 3)
+            "word": token,
+            "start": round(token_start, 3),
+            "end": round(token_end, 3)
        })

    return result
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -31,3 +31,7 @@ bcrypt==4.0.1

 # 字幕对齐
 faster-whisper>=1.0.0
+
+# 文案提取与AI生成
+yt-dlp>=2023.0.0
+zai-sdk>=0.2.0
--- a/backend/scripts/watchdog.py
+++ b/backend/scripts/watchdog.py
@@ -0,0 +1,84 @@
+
+import asyncio
+import httpx
+import logging
+import subprocess
+import time
+from datetime import datetime
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("watchdog.log"),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger("Watchdog")
+
+# 服务配置
+SERVICES = [
+    {
+        "name": "vigent2-qwen-tts",
+        "url": "http://localhost:8009/health",
+        "failures": 0,
+        "threshold": 3,
+        "timeout": 10.0,
+        "restart_cmd": ["pm2", "restart", "vigent2-qwen-tts"]
+    }
+]
+
+async def check_service(service):
+    """检查单个服务健康状态"""
+    try:
+        timeout = service.get("timeout", 10.0)
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.get(service["url"])
+            if response.status_code == 200:
+                # 成功
+                if service["failures"] > 0:
+                    logger.info(f"✅ 服务 {service['name']} 已恢复正常")
+                service["failures"] = 0
+                return True
+            else:
+                logger.warning(f"⚠️ 服务 {service['name']} 返回状态码 {response.status_code}")
+    except Exception as e:
+        logger.warning(f"⚠️ 无法连接服务 {service['name']}: {str(e)}")
+    
+    # 失败处理
+    service["failures"] += 1
+    logger.warning(f"❌ 服务 {service['name']} 连续失败 {service['failures']}/{service['threshold']} 次")
+    
+    if service["failures"] >= service['threshold']:
+        logger.error(f"🚨 服务 {service['name']} 已达到失败阈值，正在重启...")
+        try:
+            subprocess.run(service["restart_cmd"], check=True)
+            logger.info(f"♻️ 服务 {service['name']} 重启命令已发送")
+            # 重启后给予一段宽限期 (例如 60秒) 不检查，等待服务启动
+            service["failures"] = 0 # 重置计数
+            return "restarting" 
+        except Exception as restart_error:
+            logger.error(f"💥 重启服务 {service['name']} 失败: {restart_error}")
+            
+    return False
+
+async def main():
+    logger.info("🛡️ ViGent2 服务看门狗 (Watchdog) 已启动")
+    
+    while True:
+        # 并发检查所有服务
+        for service in SERVICES:
+            result = await check_service(service)
+            if result == "restarting":
+                # 如果有服务重启，额外等待包含启动时间
+                pass
+                
+        # 每 30 秒检查一次
+        await asyncio.sleep(30)
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        logger.info("🛑 看门狗已停止")