更新

2026-01-27 16:52:40 +08:00
parent c6c4b2313f
commit ee8cb9cfd2
14 changed files with 1307 additions and 217 deletions
--- a/backend/app/api/materials.py
+++ b/backend/app/api/materials.py
@@ -1,100 +1,331 @@
-from fastapi import APIRouter, UploadFile, File, HTTPException
+from fastapi import APIRouter, UploadFile, File, HTTPException, Request, BackgroundTasks, Depends
 from app.core.config import settings
-import shutil
+from app.core.deps import get_current_user
+from app.services.storage import storage_service
 import re
 import time
+import traceback
+import os
+import aiofiles
 from pathlib import Path
+from loguru import logger

 router = APIRouter()

-
 def sanitize_filename(filename: str) -> str:
-    """清理文件名，移除不安全字符"""
-    # 移除路径分隔符和特殊字符
    safe_name = re.sub(r'[<>:"/\\|?*]', '_', filename)
-    # 限制长度
    if len(safe_name) > 100:
        ext = Path(safe_name).suffix
        safe_name = safe_name[:100 - len(ext)] + ext
    return safe_name

+async def process_and_upload(temp_file_path: str, original_filename: str, content_type: str, user_id: str):
+    """Background task to strip multipart headers and upload to Supabase"""
+    try:
+        logger.info(f"Processing raw upload: {temp_file_path} for user {user_id}")
+
+        # 1. Analyze file to find actual video content (strip multipart boundaries)
+        # This is a simplified manual parser for a SINGLE file upload.
+        # Structure:
+        # --boundary
+        # Content-Disposition: form-data; name="file"; filename="..."
+        # Content-Type: video/mp4
+        # \r\n\r\n
+        # [DATA]
+        # \r\n--boundary--
+
+        # We need to read the first few KB to find the header end
+        start_offset = 0
+        end_offset = 0
+        boundary = b""
+
+        file_size = os.path.getsize(temp_file_path)
+
+        with open(temp_file_path, 'rb') as f:
+            # Read first 4KB to find header
+            head = f.read(4096)
+
+            # Find boundary
+            first_line_end = head.find(b'\r\n')
+            if first_line_end == -1:
+                raise Exception("Could not find boundary in multipart body")
+
+            boundary = head[:first_line_end] # e.g. --boundary123
+            logger.info(f"Detected boundary: {boundary}")
+
+            # Find end of headers (\r\n\r\n)
+            header_end = head.find(b'\r\n\r\n')
+            if header_end == -1:
+                raise Exception("Could not find end of multipart headers")
+
+            start_offset = header_end + 4
+            logger.info(f"Video data starts at offset: {start_offset}")
+
+            # Find end boundary (read from end of file)
+            # It should be \r\n + boundary + -- + \r\n
+            # We seek to end-200 bytes
+            f.seek(max(0, file_size - 200))
+            tail = f.read()
+
+            # The closing boundary is usually --boundary--
+            # We look for the last occurrence of the boundary
+            last_boundary_pos = tail.rfind(boundary)
+            if last_boundary_pos != -1:
+                # The data ends before \r\n + boundary
+                # The tail buffer relative position needs to be converted to absolute
+                end_pos_in_tail = last_boundary_pos
+                # We also need to check for the preceding \r\n
+                if end_pos_in_tail >= 2 and tail[end_pos_in_tail-2:end_pos_in_tail] == b'\r\n':
+                     end_pos_in_tail -= 2
+
+                # Absolute end offset
+                end_offset = (file_size - 200) + last_boundary_pos
+                # Correction for CRLF before boundary
+                # Actually, simply: read until (file_size - len(tail) + last_boundary_pos) - 2
+                end_offset = (max(0, file_size - 200) + last_boundary_pos) - 2
+            else:
+                logger.warning("Could not find closing boundary, assuming EOF")
+                end_offset = file_size
+
+            logger.info(f"Video data ends at offset: {end_offset}. Total video size: {end_offset - start_offset}")
+
+        # 2. Extract and Upload to Supabase
+        # Since we have the file on disk, we can just pass the file object (seeked) to upload_file?
+        # Or if upload_file expects bytes/path, checking storage.py...
+        # It takes `file_data` (bytes) or file-like?
+        # supabase-py's `upload` method handles parsing if we pass a file object.
+        # But we need to pass ONLY the video slice.
+        # So we create a generator or a sliced file object?
+        # Simpler: Read the slice into memory if < 1GB? Or copy to new temp file?
+        # Copying to new temp file is safer for memory.
+
+        video_path = temp_file_path + "_video.mp4"
+        with open(temp_file_path, 'rb') as src, open(video_path, 'wb') as dst:
+            src.seek(start_offset)
+            # Copy in chunks
+            bytes_to_copy = end_offset - start_offset
+            copied = 0
+            while copied < bytes_to_copy:
+                chunk_size = min(1024*1024*10, bytes_to_copy - copied) # 10MB chunks
+                chunk = src.read(chunk_size)
+                if not chunk:
+                    break
+                dst.write(chunk)
+                copied += len(chunk)
+
+        logger.info(f"Extracted video content to {video_path}")
+
+        # 3. Upload to Supabase with user isolation
+        timestamp = int(time.time())
+        safe_name = re.sub(r'[^a-zA-Z0-9._-]', '', original_filename)
+        # 使用 user_id 作为目录前缀实现隔离
+        storage_path = f"{user_id}/{timestamp}_{safe_name}"
+
+        # Use storage service (this calls Supabase which might do its own http request)
+        # We read the cleaned video file
+        with open(video_path, 'rb') as f:
+            file_content = f.read() # Still reading into memory for simple upload call, but server has 32GB RAM so ok for 500MB
+            await storage_service.upload_file(
+                bucket=storage_service.BUCKET_MATERIALS,
+                path=storage_path,
+                file_data=file_content,
+                content_type=content_type
+            )
+
+        logger.info(f"Upload to Supabase complete: {storage_path}")
+
+        # Cleanup
+        os.remove(temp_file_path)
+        os.remove(video_path)
+
+        return storage_path
+
+    except Exception as e:
+        logger.error(f"Background upload processing failed: {e}\n{traceback.format_exc()}")
+        raise
+

@router.post("")
-async def upload_material(file: UploadFile = File(...)):
-    if not file.filename.lower().endswith(('.mp4', '.mov', '.avi')):
-        raise HTTPException(400, "Invalid format")
+async def upload_material(
+    request: Request,
+    background_tasks: BackgroundTasks,
+    current_user: dict = Depends(get_current_user)
+):
+    user_id = current_user["id"]
+    logger.info(f"ENTERED upload_material (Streaming Mode) for user {user_id}. Headers: {request.headers}")
    
-    # 使用时间戳+原始文件名（保留原始名称，避免冲突）
+    filename = "unknown_video.mp4" # Fallback
+    content_type = "video/mp4"
+    
+    # Try to parse filename from header if possible (unreliable in raw stream)
+    # We will rely on post-processing or client hint
+    # Frontend sends standard multipart.
+    
+    # Create temp file
    timestamp = int(time.time())
-    safe_name = sanitize_filename(file.filename)
-    save_path = settings.UPLOAD_DIR / "materials" / f"{timestamp}_{safe_name}"
-    
-    # Save file
-    with open(save_path, "wb") as buffer:
-        shutil.copyfileobj(file.file, buffer)
-    
-    # Calculate size
-    size_mb = save_path.stat().st_size / (1024 * 1024)
-    
-    # 提取显示名称（去掉时间戳前缀）
-    display_name = safe_name
+    temp_filename = f"upload_{timestamp}.raw"
+    temp_path = os.path.join("/tmp", temp_filename) # Use /tmp on Linux
+    # Ensure /tmp exists (it does) but verify paths
+    if os.name == 'nt': # Local dev
+        temp_path = f"d:/tmp/{temp_filename}"
+        os.makedirs("d:/tmp", exist_ok=True)
+
+    try:
+        total_size = 0
+        last_log = 0
        
-    return {
-        "id": save_path.stem,
-        "name": display_name, 
-        "path": f"uploads/materials/{save_path.name}",
-        "size_mb": size_mb,
-        "type": "video"
-    }
+        async with aiofiles.open(temp_path, 'wb') as f:
+            async for chunk in request.stream():
+                await f.write(chunk)
+                total_size += len(chunk)
+                
+                # Log progress every 20MB
+                if total_size - last_log > 20 * 1024 * 1024:
+                    logger.info(f"Receiving stream... Processed {total_size / (1024*1024):.2f} MB")
+                    last_log = total_size
+                    
+        logger.info(f"Stream reception complete. Total size: {total_size} bytes. Saved to {temp_path}")
+        
+        if total_size == 0:
+            raise HTTPException(400, "Received empty body")
+
+        # Attempt to extract filename from the saved file's first bytes?
+        # Or just accept it as "uploaded_video.mp4" for now to prove it works.
+        # We can try to regex the header in the file content we just wrote.
+        # Implemented in background task to return success immediately.
+        
+        # Wait, if we return immediately, the user's UI might not show the file yet?
+        # The prompt says "Wait for upload". 
+        # But to avoid User Waiting Timeout, maybe returning early is better?
+        # NO, user expects the file to be in the list.
+        # So we Must await the processing.
+        # But "Processing" (Strip + Upload to Supabase) takes time.
+        # Receiving took time.
+        # If we await Supabase upload, does it timeout?
+        # Supabase upload is outgoing. Usually faster/stable.
+        
+        # Let's await the processing to ensure "List Materials" shows it.
+        # We need to extract the filename for the list.
+        
+        # Quick extract filename from first 4kb
+        with open(temp_path, 'rb') as f:
+            head = f.read(4096).decode('utf-8', errors='ignore')
+            match = re.search(r'filename="([^"]+)"', head)
+            if match:
+                filename = match.group(1)
+                logger.info(f"Extracted filename from body: {filename}")
+            
+        # Run processing sync (in await)
+        storage_path = await process_and_upload(temp_path, filename, content_type, user_id)
+
+        # Get signed URL (it exists now)
+        signed_url = await storage_service.get_signed_url(
+            bucket=storage_service.BUCKET_MATERIALS,
+            path=storage_path
+        )
+        
+        size_mb = total_size / (1024 * 1024) # Approximate (includes headers)
+
+        # 从 storage_path 提取显示名
+        display_name = storage_path.split('/')[-1]  # 去掉 user_id 前缀
+        if '_' in display_name:
+            parts = display_name.split('_', 1)
+            if parts[0].isdigit():
+                display_name = parts[1]
+
+        return {
+            "id": storage_path,
+            "name": display_name,
+            "path": signed_url,
+            "size_mb": size_mb,
+            "type": "video"
+        }
+        
+    except Exception as e:
+        error_msg = f"Streaming upload failed: {str(e)}"
+        detail_msg = f"Exception: {repr(e)}\nArgs: {e.args}\n{traceback.format_exc()}"
+        logger.error(error_msg + "\n" + detail_msg)
+        
+        # Write to debug file
+        try:
+             with open("debug_upload.log", "a") as logf:
+                 logf.write(f"\n--- Error at {time.ctime()} ---\n")
+                 logf.write(detail_msg)
+                 logf.write("\n-----------------------------\n")
+        except:
+             pass
+
+        if os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except:
+                pass
+        raise HTTPException(500, f"Upload failed. Check server logs. Error: {str(e)}")
+

@router.get("")
-async def list_materials():
-    materials_dir = settings.UPLOAD_DIR / "materials"
-    files = []
-    if materials_dir.exists():
-        for f in materials_dir.glob("*"):
-            try:
-                stat = f.stat()
-                # 提取显示名称：去掉时间戳前缀 (格式: {timestamp}_{原始文件名})
-                display_name = f.name
-                if '_' in f.name:
-                    parts = f.name.split('_', 1)
-                    if parts[0].isdigit():
-                        display_name = parts[1]  # 原始文件名
-                
-                files.append({
-                    "id": f.stem,
-                    "name": display_name,
-                    "path": f"uploads/materials/{f.name}",
-                    "size_mb": stat.st_size / (1024 * 1024),
-                    "type": "video",
-                    "created_at": stat.st_ctime
-                })
-            except Exception:
-                continue
-    # Sort by creation time desc
-    files.sort(key=lambda x: x.get("created_at", 0), reverse=True)
-    return {"materials": files}
-
-
-@router.delete("/{material_id}")
-async def delete_material(material_id: str):
-    """删除素材文件"""
-    materials_dir = settings.UPLOAD_DIR / "materials"
-    
-    # 查找匹配的文件（ID 是文件名不含扩展名）
-    found = None
-    for f in materials_dir.glob("*"):
-        if f.stem == material_id:
-            found = f
-            break
-    
-    if not found:
-        raise HTTPException(404, "Material not found")
-    
+async def list_materials(current_user: dict = Depends(get_current_user)):
+    user_id = current_user["id"]
    try:
-        found.unlink()
+        # 只列出当前用户目录下的文件
+        files_obj = await storage_service.list_files(
+            bucket=storage_service.BUCKET_MATERIALS,
+            path=user_id
+        )
+        materials = []
+        for f in files_obj:
+            name = f.get('name')
+            if not name or name == '.emptyFolderPlaceholder':
+                continue
+            display_name = name
+            if '_' in name:
+                parts = name.split('_', 1)
+                if parts[0].isdigit():
+                    display_name = parts[1]
+            # 完整路径包含 user_id
+            full_path = f"{user_id}/{name}"
+            signed_url = await storage_service.get_signed_url(
+                bucket=storage_service.BUCKET_MATERIALS,
+                path=full_path
+            )
+            metadata = f.get('metadata', {})
+            size = metadata.get('size', 0)
+            # created_at 在顶层，是 ISO 字符串
+            created_at_str = f.get('created_at', '')
+            created_at = 0
+            if created_at_str:
+                from datetime import datetime
+                try:
+                    dt = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
+                    created_at = int(dt.timestamp())
+                except:
+                    pass
+            materials.append({
+                "id": full_path,  # ID 使用完整路径
+                "name": display_name,
+                "path": signed_url,
+                "size_mb": size / (1024 * 1024),
+                "type": "video",
+                "created_at": created_at
+            })
+        materials.sort(key=lambda x: x['id'], reverse=True)
+        return {"materials": materials}
+    except Exception as e:
+        logger.error(f"List materials failed: {e}")
+        return {"materials": []}
+
+
+@router.delete("/{material_id:path}")
+async def delete_material(material_id: str, current_user: dict = Depends(get_current_user)):
+    user_id = current_user["id"]
+    # 验证 material_id 属于当前用户
+    if not material_id.startswith(f"{user_id}/"):
+        raise HTTPException(403, "无权删除此素材")
+    try:
+        await storage_service.delete_file(
+            bucket=storage_service.BUCKET_MATERIALS,
+            path=material_id
+        )
        return {"success": True, "message": "素材已删除"}
    except Exception as e:
        raise HTTPException(500, f"删除失败: {str(e)}")
-