This commit is contained in:
Kevin Wong
2026-02-03 13:46:52 +08:00
parent eb3ed23326
commit cb10da52fc
18 changed files with 1018 additions and 657 deletions

View File

@@ -127,7 +127,60 @@ async def upload_ref_audio(
if duration > 60.0:
raise HTTPException(status_code=400, detail="音频时长过长,最多 60 秒")
# 生成存储路径
# 3. 处理重名逻辑 (Friendly Display Name)
original_name = file.filename
# 获取用户现有的所有参考音频列表 (为了检查文件名冲突)
# 注意: 这种列表方式在文件极多时性能一般,但考虑到单用户参考音频数量有限,目前可行
existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
existing_names = set()
# 预加载所有现有的 display name
# 这里需要并发请求 metadata 可能会慢,优化: 仅检查 metadata 文件并解析
# 简易方案: 仅在 metadata 中读取 original_filename
# 但 list_files 返回的是 name我们需要 metadata
# 考虑到性能,这里使用一种妥协方案:
# 我们不做全量检查,而是简单的检查:如果用户上传 myvoice.wav
# 我们看看有没有 (timestamp)_myvoice.wav 这种其实并不能准确判断 display name 是否冲突
#
# 正确做法: 应该有个数据库表存 metadata。但目前是无数据库设计。
#
# 改用简单方案:
# 既然我们无法快速获取所有 display name
# 我们暂时只处理 "在新上传时original_filename 保持原样"
# 但用户希望 "如果在列表中看到重复的,自动加(1)"
#
# 鉴于无数据库架构的限制,要在上传时知道"已有的 display name" 成本太高(需遍历下载所有json)。
#
# 💡 替代方案:
# 我们不检查旧的。我们只保证**存储**唯一。
# 对于用户提到的 "新上传的文件名后加个数字" -> 这通常是指 "另存为" 的逻辑。
# 既然用户现在的痛点是 "显示了时间戳太丑",而我已经去掉了时间戳显示。
# 那么如果用户上传两个 "TEST.wav",列表里就会有两个 "TEST.wav" (但时间不同)。
# 这其实是可以接受的。
#
# 但如果用户强求 "自动重命名":
# 我们可以在这里做一个轻量级的 "同名检测"
# 检查有没有 *_{original_name} 的文件存在。
# 如果 storage 里已经有 123_abc.wav, 456_abc.wav
# 我们可以认为 abc.wav 已经存在。
dup_count = 0
search_suffix = f"_{original_name}" # 比如 _test.wav
for f in existing_files:
fname = f.get('name', '')
if fname.endswith(search_suffix):
dup_count += 1
final_display_name = original_name
if dup_count > 0:
name_stem = Path(original_name).stem
name_ext = Path(original_name).suffix
final_display_name = f"{name_stem}({dup_count}){name_ext}"
# 生成存储路径 (唯一ID)
timestamp = int(time.time())
safe_name = sanitize_filename(Path(file.filename).stem)
storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"
@@ -146,7 +199,7 @@ async def upload_ref_audio(
# 上传元数据 JSON
metadata = {
"ref_text": ref_text.strip(),
"original_filename": file.filename,
"original_filename": final_display_name, # 这里的名字如果有重复会自动加(1)
"duration_sec": duration,
"created_at": timestamp
}
@@ -207,6 +260,7 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
ref_text = ""
duration_sec = 0.0
created_at = 0
original_filename = ""
try:
# 获取 metadata 内容
@@ -219,6 +273,7 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
ref_text = metadata.get("ref_text", "")
duration_sec = metadata.get("duration_sec", 0.0)
created_at = metadata.get("created_at", 0)
original_filename = metadata.get("original_filename", "")
except Exception as e:
logger.warning(f"读取 metadata 失败: {e}")
# 从文件名提取时间戳
@@ -230,9 +285,18 @@ async def list_ref_audios(user: dict = Depends(get_current_user)):
# 获取音频签名 URL
signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)
# 优先显示原始文件名 (去掉时间戳前缀)
display_name = original_filename if original_filename else name
# 如果原始文件名丢失,尝试从现有文件名中通过正则去掉时间戳
if not display_name or display_name == name:
# 匹配 "1234567890_filename.wav"
match = re.match(r'^\d+_(.+)$', name)
if match:
display_name = match.group(1)
items.append(RefAudioResponse(
id=storage_path,
name=name,
name=display_name,
path=signed_url,
ref_text=ref_text,
duration_sec=duration_sec,
@@ -274,3 +338,74 @@ async def delete_ref_audio(audio_id: str, user: dict = Depends(get_current_user)
except Exception as e:
logger.error(f"删除参考音频失败: {e}")
raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")
class RenameRequest(BaseModel):
new_name: str
@router.put("/{audio_id:path}")
async def rename_ref_audio(
audio_id: str,
request: RenameRequest,
user: dict = Depends(get_current_user)
):
"""重命名参考音频 (修改 metadata 中的 display name)"""
user_id = user["id"]
# 安全检查
if not audio_id.startswith(f"{user_id}/"):
raise HTTPException(status_code=403, detail="无权修改此文件")
new_name = request.new_name.strip()
if not new_name:
raise HTTPException(status_code=400, detail="新名称不能为空")
# 确保新名称有后缀 (保留原后缀或添加 .wav)
if not Path(new_name).suffix:
new_name += ".wav"
try:
# 1. 下载现有的 metadata
metadata_path = audio_id.replace(".wav", ".json")
try:
# 获取已有的 JSON
import httpx
metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
if not metadata_url:
# 如果 json 不存在,则需要新建一个基础的
raise Exception("Metadata not found")
async with httpx.AsyncClient() as client:
resp = await client.get(metadata_url)
if resp.status_code == 200:
metadata = resp.json()
else:
raise Exception(f"Failed to fetch metadata: {resp.status_code}")
except Exception as e:
logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
# 兜底:如果读取失败,构建最小元数据
metadata = {
"ref_text": "", # 可能丢失
"duration_sec": 0.0,
"created_at": int(time.time()),
"original_filename": new_name
}
# 2. 更新 original_filename
metadata["original_filename"] = new_name
# 3. 覆盖上传 metadata
await storage_service.upload_file(
bucket=BUCKET_REF_AUDIOS,
path=metadata_path,
file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
content_type="application/json"
)
return {"success": True, "name": new_name}
except Exception as e:
logger.error(f"重命名失败: {e}")
raise HTTPException(status_code=500, detail=f"重命名失败: {str(e)}")

View File

@@ -38,11 +38,13 @@ async def extract_script_tool(
temp_dir.mkdir(parents=True, exist_ok=True)
# 1. 获取/保存文件
loop = asyncio.get_event_loop()
if file:
safe_filename = Path(file.filename).name.replace(" ", "_")
temp_path = temp_dir / f"tool_extract_{timestamp}_{safe_filename}"
with open(temp_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# 文件 I/O 放入线程池
await loop.run_in_executor(None, lambda: shutil.copyfileobj(file.file, open(temp_path, "wb")))
logger.info(f"Tool processing upload file: {temp_path}")
else:
# URL 下载逻辑
@@ -55,8 +57,8 @@ async def extract_script_tool(
logger.info(f"Tool downloading URL: {url}")
# 先尝试 yt-dlp
try:
# 封装 yt-dlp 下载函数 (Blocking)
def _download_yt_dlp():
import yt_dlp
logger.info("Attempting download with yt-dlp...")
@@ -80,8 +82,12 @@ async def extract_script_tool(
id = info.get('id')
downloaded_file = str(temp_dir / f"tool_download_{timestamp}_{id}.{ext}")
temp_path = Path(downloaded_file)
logger.info(f"yt-dlp downloaded to: {temp_path}")
return Path(downloaded_file)
# 先尝试 yt-dlp (Run in Executor)
try:
temp_path = await loop.run_in_executor(None, _download_yt_dlp)
logger.info(f"yt-dlp downloaded to: {temp_path}")
except Exception as e:
logger.warning(f"yt-dlp download failed: {e}. Trying manual Douyin fallback...")
@@ -107,46 +113,48 @@ async def extract_script_tool(
if not temp_path or not temp_path.exists():
raise HTTPException(400, "文件获取失败")
# 1.5 安全转换: 强制转为 WAV (16k) 传给 Whisper
# 这一步既能验证文件有效性ffmpeg会报错又能避免 PyAV 音频解码 bug
# 1.5 安全转换: 强制转为 WAV (16k)
import subprocess
audio_path = temp_dir / f"extract_audio_{timestamp}.wav"
try:
# ffmpeg -i input -vn -acodec pcm_s16le -ar 16000 -ac 1 output.wav -y
convert_cmd = [
'ffmpeg',
'-i', str(temp_path),
'-vn', # 忽略视频
'-acodec', 'pcm_s16le',
'-ar', '16000', # Whisper 推荐采样率
'-ac', '1', # 单声道
'-y', # 覆盖
str(audio_path)
]
# 捕获 stderr 以便出错时打印
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"Converted to WAV: {audio_path}")
# 使用转换后的文件
target_path = audio_path
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
# 尝试判断是不是 HTML
head = b""
def _convert_audio():
try:
with open(temp_path, 'rb') as f:
head = f.read(100)
except:
pass
if b'<!DOCTYPE html' in head or b'<html' in head:
convert_cmd = [
'ffmpeg',
'-i', str(temp_path),
'-vn', # 忽略视频
'-acodec', 'pcm_s16le',
'-ar', '16000', # Whisper 推荐采样率
'-ac', '1', # 单声道
'-y', # 覆盖
str(audio_path)
]
# 捕获 stderr
subprocess.run(convert_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except subprocess.CalledProcessError as e:
error_log = e.stderr.decode('utf-8', errors='ignore') if e.stderr else str(e)
logger.error(f"FFmpeg check/convert failed: {error_log}")
# 检查是否为 HTML
head = b""
try:
with open(temp_path, 'rb') as f:
head = f.read(100)
except: pass
if b'<!DOCTYPE html' in head or b'<html' in head:
raise ValueError("HTML_DETECTED")
raise ValueError("CONVERT_FAILED")
# 执行转换 (Run in Executor)
try:
await loop.run_in_executor(None, _convert_audio)
logger.info(f"Converted to WAV: {audio_path}")
target_path = audio_path
except ValueError as ve:
if str(ve) == "HTML_DETECTED":
raise HTTPException(400, "下载的文件是网页而非视频,请重试或手动上传。")
raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
else:
raise HTTPException(400, "下载的文件已损坏或格式无法识别。")
# 2. 提取文案 (Whisper)
script = await whisper_service.transcribe(str(target_path))

View File

@@ -30,26 +30,44 @@ def split_word_to_chars(word: str, start: float, end: float) -> list:
Returns:
单字符列表,每个包含 word/start/end
"""
# 只保留中文字符和基本标点
chars = [c for c in word if c.strip()]
if not chars:
tokens = []
ascii_buffer = ""
for char in word:
if not char.strip():
continue
if char.isascii() and char.isalnum():
ascii_buffer += char
continue
if ascii_buffer:
tokens.append(ascii_buffer)
ascii_buffer = ""
tokens.append(char)
if ascii_buffer:
tokens.append(ascii_buffer)
if not tokens:
return []
if len(chars) == 1:
return [{"word": chars[0], "start": start, "end": end}]
if len(tokens) == 1:
return [{"word": tokens[0], "start": start, "end": end}]
# 线性插值时间戳
duration = end - start
char_duration = duration / len(chars)
token_duration = duration / len(tokens)
result = []
for i, char in enumerate(chars):
char_start = start + i * char_duration
char_end = start + (i + 1) * char_duration
for i, token in enumerate(tokens):
token_start = start + i * token_duration
token_end = start + (i + 1) * token_duration
result.append({
"word": char,
"start": round(char_start, 3),
"end": round(char_end, 3)
"word": token,
"start": round(token_start, 3),
"end": round(token_end, 3)
})
return result

View File

@@ -31,3 +31,7 @@ bcrypt==4.0.1
# 字幕对齐
faster-whisper>=1.0.0
# 文案提取与AI生成
yt-dlp>=2023.0.0
zai-sdk>=0.2.0

View File

@@ -0,0 +1,84 @@
import asyncio
import httpx
import logging
import subprocess
import time
from datetime import datetime
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("watchdog.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger("Watchdog")
# 服务配置
SERVICES = [
{
"name": "vigent2-qwen-tts",
"url": "http://localhost:8009/health",
"failures": 0,
"threshold": 3,
"timeout": 10.0,
"restart_cmd": ["pm2", "restart", "vigent2-qwen-tts"]
}
]
async def check_service(service):
"""检查单个服务健康状态"""
try:
timeout = service.get("timeout", 10.0)
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(service["url"])
if response.status_code == 200:
# 成功
if service["failures"] > 0:
logger.info(f"✅ 服务 {service['name']} 已恢复正常")
service["failures"] = 0
return True
else:
logger.warning(f"⚠️ 服务 {service['name']} 返回状态码 {response.status_code}")
except Exception as e:
logger.warning(f"⚠️ 无法连接服务 {service['name']}: {str(e)}")
# 失败处理
service["failures"] += 1
logger.warning(f"❌ 服务 {service['name']} 连续失败 {service['failures']}/{service['threshold']}")
if service["failures"] >= service['threshold']:
logger.error(f"🚨 服务 {service['name']} 已达到失败阈值,正在重启...")
try:
subprocess.run(service["restart_cmd"], check=True)
logger.info(f"♻️ 服务 {service['name']} 重启命令已发送")
# 重启后给予一段宽限期 (例如 60秒) 不检查,等待服务启动
service["failures"] = 0 # 重置计数
return "restarting"
except Exception as restart_error:
logger.error(f"💥 重启服务 {service['name']} 失败: {restart_error}")
return False
async def main():
logger.info("🛡️ ViGent2 服务看门狗 (Watchdog) 已启动")
while True:
# 并发检查所有服务
for service in SERVICES:
result = await check_service(service)
if result == "restarting":
# 如果有服务重启,额外等待包含启动时间
pass
# 每 30 秒检查一次
await asyncio.sleep(30)
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
logger.info("🛑 看门狗已停止")