ViGent2/backend/app/modules/ref_audios/service.py

import re
import os
import time
import json
import asyncio
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

import httpx
from loguru import logger

from app.services.storage import storage_service
from app.modules.ref_audios.schemas import RefAudioResponse, RefAudioListResponse

ALLOWED_AUDIO_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.webm', '.ogg', '.flac', '.aac'}
BUCKET_REF_AUDIOS = "ref-audios"


def sanitize_filename(filename: str) -> str:
    """清理文件名，移除特殊字符"""
    safe_name = re.sub(r'[<>:"/\\|?*\s]', '_', filename)
    if len(safe_name) > 50:
        ext = Path(safe_name).suffix
        safe_name = safe_name[:50 - len(ext)] + ext
    return safe_name


def _get_audio_duration(file_path: str) -> float:
    """获取音频时长 (秒)"""
    try:
        result = subprocess.run(
            ['ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
             '-of', 'csv=p=0', file_path],
            capture_output=True, text=True, timeout=10
        )
        return float(result.stdout.strip())
    except Exception as e:
        logger.warning(f"获取音频时长失败: {e}")
        return 0.0


def _convert_to_wav(input_path: str, output_path: str) -> bool:
    """将音频转换为 WAV 格式 (16kHz, mono)"""
    try:
        subprocess.run([
            'ffmpeg', '-y', '-i', input_path,
            '-ar', '16000',
            '-ac', '1',
            '-acodec', 'pcm_s16le',
            output_path
        ], capture_output=True, timeout=60, check=True)
        return True
    except Exception as e:
        logger.error(f"音频转换失败: {e}")
        return False


async def upload_ref_audio(file, ref_text: str, user_id: str) -> dict:
    """上传参考音频：转码、获取时长、存储到 Supabase"""
    if not file.filename:
        raise ValueError("文件名无效")
    filename = file.filename

    ext = Path(filename).suffix.lower()
    if ext not in ALLOWED_AUDIO_EXTENSIONS:
        raise ValueError(f"不支持的音频格式: {ext}。支持的格式: {', '.join(ALLOWED_AUDIO_EXTENSIONS)}")

    if not ref_text or len(ref_text.strip()) < 2:
        raise ValueError("参考文字不能为空")

    # 创建临时文件
    with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp_input:
        content = await file.read()
        tmp_input.write(content)
        tmp_input_path = tmp_input.name

    try:
        # 转换为 WAV 格式
        tmp_wav_path = tmp_input_path + ".wav"
        if not _convert_to_wav(tmp_input_path, tmp_wav_path):
            raise RuntimeError("音频格式转换失败")

        # 获取音频时长
        duration = _get_audio_duration(tmp_wav_path)
        if duration < 1.0:
            raise ValueError("音频时长过短，至少需要 1 秒")
        if duration > 60.0:
            raise ValueError("音频时长过长，最多 60 秒")

        # 检查重名
        existing_files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
        dup_count = 0
        search_suffix = f"_{filename}"
        for f in existing_files:
            fname = f.get('name', '')
            if fname.endswith(search_suffix):
                dup_count += 1

        final_display_name = filename
        if dup_count > 0:
            name_stem = Path(filename).stem
            name_ext = Path(filename).suffix
            final_display_name = f"{name_stem}({dup_count}){name_ext}"

        # 生成存储路径
        timestamp = int(time.time())
        safe_name = sanitize_filename(Path(filename).stem)
        storage_path = f"{user_id}/{timestamp}_{safe_name}.wav"

        # 上传 WAV 文件
        with open(tmp_wav_path, 'rb') as f:
            wav_data = f.read()

        await storage_service.upload_file(
            bucket=BUCKET_REF_AUDIOS,
            path=storage_path,
            file_data=wav_data,
            content_type="audio/wav"
        )

        # 上传元数据 JSON
        metadata = {
            "ref_text": ref_text.strip(),
            "original_filename": final_display_name,
            "duration_sec": duration,
            "created_at": timestamp
        }
        metadata_path = f"{user_id}/{timestamp}_{safe_name}.json"
        await storage_service.upload_file(
            bucket=BUCKET_REF_AUDIOS,
            path=metadata_path,
            file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
            content_type="application/json"
        )

        # 获取签名 URL
        signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)

        return RefAudioResponse(
            id=storage_path,
            name=filename,
            path=signed_url,
            ref_text=ref_text.strip(),
            duration_sec=duration,
            created_at=timestamp
        ).model_dump()

    finally:
        os.unlink(tmp_input_path)
        if os.path.exists(tmp_input_path + ".wav"):
            os.unlink(tmp_input_path + ".wav")


async def list_ref_audios(user_id: str) -> dict:
    """列出用户的所有参考音频"""
    files = await storage_service.list_files(BUCKET_REF_AUDIOS, user_id)
    wav_files = [f for f in files if f.get("name", "").endswith(".wav")]

    if not wav_files:
        return RefAudioListResponse(items=[]).model_dump()

    async def fetch_audio_info(f):
        name = f.get("name", "")
        storage_path = f"{user_id}/{name}"
        metadata_name = name.replace(".wav", ".json")
        metadata_path = f"{user_id}/{metadata_name}"

        ref_text = ""
        duration_sec = 0.0
        created_at = 0
        original_filename = ""

        try:
            metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
            async with httpx.AsyncClient(timeout=5.0) as client:
                resp = await client.get(metadata_url)
                if resp.status_code == 200:
                    metadata = resp.json()
                    ref_text = metadata.get("ref_text", "")
                    duration_sec = metadata.get("duration_sec", 0.0)
                    created_at = metadata.get("created_at", 0)
                    original_filename = metadata.get("original_filename", "")
        except Exception as e:
            logger.debug(f"读取 metadata 失败: {e}")
            try:
                created_at = int(name.split("_")[0])
            except:
                pass

        signed_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, storage_path)

        display_name = original_filename if original_filename else name
        if not display_name or display_name == name:
            match = re.match(r'^\d+_(.+)$', name)
            if match:
                display_name = match.group(1)

        return RefAudioResponse(
            id=storage_path,
            name=display_name,
            path=signed_url,
            ref_text=ref_text,
            duration_sec=duration_sec,
            created_at=created_at
        )

    items = await asyncio.gather(*[fetch_audio_info(f) for f in wav_files])
    items = sorted(items, key=lambda x: x.created_at, reverse=True)

    return RefAudioListResponse(items=items).model_dump()


async def delete_ref_audio(audio_id: str, user_id: str) -> None:
    """删除参考音频及其元数据"""
    if not audio_id.startswith(f"{user_id}/"):
        raise PermissionError("无权删除此文件")

    await storage_service.delete_file(BUCKET_REF_AUDIOS, audio_id)

    metadata_path = audio_id.replace(".wav", ".json")
    try:
        await storage_service.delete_file(BUCKET_REF_AUDIOS, metadata_path)
    except:
        pass


async def rename_ref_audio(audio_id: str, new_name: str, user_id: str) -> dict:
    """重命名参考音频（修改 metadata 中的 display name）"""
    if not audio_id.startswith(f"{user_id}/"):
        raise PermissionError("无权修改此文件")

    new_name = new_name.strip()
    if not new_name:
        raise ValueError("新名称不能为空")

    if not Path(new_name).suffix:
        new_name += ".wav"

    # 下载现有 metadata
    metadata_path = audio_id.replace(".wav", ".json")
    try:
        metadata_url = await storage_service.get_signed_url(BUCKET_REF_AUDIOS, metadata_path)
        async with httpx.AsyncClient() as client:
            resp = await client.get(metadata_url)
            if resp.status_code == 200:
                metadata = resp.json()
            else:
                raise Exception(f"Failed to fetch metadata: {resp.status_code}")
    except Exception as e:
        logger.warning(f"无法读取元数据: {e}, 将创建新的元数据")
        metadata = {
            "ref_text": "",
            "duration_sec": 0.0,
            "created_at": int(time.time()),
            "original_filename": new_name
        }

    # 更新并覆盖上传
    metadata["original_filename"] = new_name
    await storage_service.upload_file(
        bucket=BUCKET_REF_AUDIOS,
        path=metadata_path,
        file_data=json.dumps(metadata, ensure_ascii=False).encode('utf-8'),
        content_type="application/json"
    )

    return {"name": new_name}