""" 参考音频管理 API 支持上传/列表/删除参考音频,用于 Qwen3-TTS 声音克隆 """ from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends from pydantic import BaseModel from typing import List, Optional from pathlib import Path from loguru import logger import time import json import subprocess import tempfile import os import re from app.core.deps import get_current_user from app.services.storage import storage_service router = APIRouter() # 支持的音频格式 ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'} # 参考音频 bucket BUCKET_REF_AUDIOS = "ref-audios" class RefAudioResponse(BaseModel): id: str name: str path: str # signed URL for playback ref_text: str duration_sec: float created_at: int class RefAudioListResponse(BaseModel): items: List[RefAudioResponse] def sanitize_filename(filename: str) -> str: """清理文件名,移除特殊字符""" safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename) if len(safe_name) > 50: ext = Path(safe_name).suffix safe_name = safe_name[:50 - len(ext)] + ext return safe_name def get_audio_duration(file_path: str) -> float: """获取音频时长 (秒)""" try: result = subprocess.run( ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration', '-of', 'csv=p=0', file_path], capture_output=True, text=True, timeout=10 ) return float(result.stdout.strip()) except Exception as e: logger.warning(f"获取音频时长失败: {e}") return 0.0 def convert_to_wav(input_path: str, output_path: str) -> bool: """将音频转换为 WAV 格式 (16kHz, mono)""" try: subprocess.run([ 'ffmpeg', '-y', '-i', input_path, '-ar', '16000', # 16kHz 采样率 '-ac', '1', # 单声道 '-acodec', 'pcm_s16le', # 16-bit PCM output_path ], capture_output=True, timeout=60, check=True) return True except Exception as e: logger.error(f"音频转换失败: {e}") return False @router.post("", response_model=RefAudioResponse) async def upload_ref_audio( file: UploadFile = File(...), ref_text: str = Form(...), user: dict = Depends(get_current_user) ): """ 上传参考音频 - file: 音频文件 (支持 wav, mp3, m4a, webm 等) - ref_text: 参考音频的转写文字 (必填) """ user_id = user["id"] # 验证文件扩展名 ext = Path(file.filename).suffix.lower() if ext not in ALLOWED_AUDIO_EXTENSIONS: raise HTTPException( status_code=400, detail=f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}" ) # 验证 ref_text if not ref_text or len(ref_text.strip()) < 2: raise HTTPException(status_code=400, detail="参考文字不能为空") try: # 创建临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input: content = await file.read() tmp_input.write(content) tmp_input_path = tmp_input.name # 转换为 WAV 格式 tmp_wav_path = tmp_input_path + ".wav" if ext != '.wav': if not convert_to_wav(tmp_input_path, tmp_wav_path): raise HTTPException(status_code=500, detail="音频格式转换失败") else: # 即使是 wav 也要标准化格式 convert_to_wav(tmp_input_path, tmp_wav_path) # 获取音频时长 duration = get_audio_duration(tmp_wav_path) if duration < 1.0: raise HTTPException(status_code=400, detail="音频时长过短,至少需要 1 秒") if duration > 60.0: raise HTTPException(status_code=400, detail="音频时长过长,最多 60 秒") # 生成存储路径 timestamp = int(time.time()) safe_name = sanitize_filename(Path(file.filename).stem) storage_path = f"{user_id}/{timestamp}_{safe_name}.wav" # 上传 WAV 文件到 Supabase with open(tmp_wav_path, 'rb') as f: wav_data = f.read() await storage_service.upload_file( bucket=BUCKET_REF_AUDIOS, path=storage_path, file_data=wav_data, content_type="audio/wav" ) # 上传元数据 JSON metadata = { "ref_text": ref_text.strip(), "original_filename": file.filename, "duration_sec": duration, "created_at": timestamp } metadata_path = f"{user_id}/{timestamp}_{safe_name}.json" await storage_service.upload_file( bucket=BUCKET_REF_AUDIOS, path=metadata_path, file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'), content_type="application/json" ) # 获取签名 URL signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path) # 清理临时文件 os.unlink(tmp_input_path) if os.path.exists(tmp_wav_path): os.unlink(tmp_wav_path) return RefAudioResponse( id=storage_path, name=file.filename, path=signed_url, ref_text=ref_text.strip(), duration_sec=duration, created_at=timestamp ) except HTTPException: raise except Exception as e: logger.error(f"上传参考音频失败: {e}") raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}") @router.get("", response_model=RefAudioListResponse) async def list_ref_audios(user: dict = Depends(get_current_user)): """列出当前用户的所有参考音频""" user_id = user["id"] try: # 列出用户目录下的文件 files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id) # 过滤出 .wav 文件并获取对应的 metadata items = [] for f in files: name = f.get("name", "") if not name.endswith(".wav"): continue storage_path = f"{user_id}/{name}" # 尝试读取 metadata metadata_name = name.replace(".wav", ".json") metadata_path = f"{user_id}/{metadata_name}" ref_text = "" duration_sec = 0.0 created_at = 0 try: # 获取 metadata 内容 metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path) import httpx async with httpx.AsyncClient() as client: resp = await client.get(metadata_url) if resp.status_code == 200: metadata = resp.json() ref_text = metadata.get("ref_text", "") duration_sec = metadata.get("duration_sec", 0.0) created_at = metadata.get("created_at", 0) except Exception as e: logger.warning(f"读取 metadata 失败: {e}") # 从文件名提取时间戳 try: created_at = int(name.split("_")[0]) except: pass # 获取音频签名 URL signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path) items.append(RefAudioResponse( id=storage_path, name=name, path=signed_url, ref_text=ref_text, duration_sec=duration_sec, created_at=created_at )) # 按创建时间倒序排列 items.sort(key=lambda x: x.created_at, reverse=True) return RefAudioListResponse(items=items) except Exception as e: logger.error(f"列出参考音频失败: {e}") raise HTTPException(status_code=500, detail=f"获取列表失败: {str(e)}") @router.delete("/{audio_id:path}") async def delete_ref_audio(audio_id: str, user: dict = Depends(get_current_user)): """删除参考音频""" user_id = user["id"] # 安全检查:确保只能删除自己的文件 if not audio_id.startswith(f"{user_id}/"): raise HTTPException(status_code=403, detail="无权删除此文件") try: # 删除 WAV 文件 await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id) # 删除 metadata JSON metadata_path = audio_id.replace(".wav", ".json") try: await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path) except: pass # metadata 可能不存在 return {"success": True, "message": "删除成功"} except Exception as e: logger.error(f"删除参考音频失败: {e}") raise HTTPException(status_code=500, detail=f"删除失败: {str(e)}")