""" Video Router Handles all video download operations: - Video info extraction (yt-dlp and gallery-dl platforms) - Video/gallery downloads with progress tracking - Download status, cancel, history - Preview list management - Thumbnail serving and caching """ import asyncio import hashlib import json import time import uuid from datetime import datetime from pathlib import Path from typing import Dict, List, Literal, Optional import re from urllib.parse import urlparse from fastapi import APIRouter, BackgroundTasks, Body, Depends, HTTPException, Query, Request from fastapi.responses import Response from pydantic import BaseModel, field_validator from slowapi import Limiter from slowapi.util import get_remote_address from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state from ..core.config import settings from ..core.exceptions import ( handle_exceptions, RecordNotFoundError, ValidationError, DownloadError ) from ..core.responses import now_iso8601 from ..core.http_client import http_client from modules.universal_logger import get_logger logger = get_logger('API') router = APIRouter(prefix="/api/video", tags=["Video Downloads"]) limiter = Limiter(key_func=get_remote_address) # In-memory tracking for active downloads video_downloads: Dict[str, Dict] = {} video_downloads_lock = asyncio.Lock() # For async code import threading video_downloads_thread_lock = threading.Lock() # For sync callbacks from background threads # Cleanup threshold DOWNLOAD_CLEANUP_AGE_SECONDS = 3600 # 1 hour # Cache for downloaded temp files (for HLS proxy streaming) # Key: cache_key (platform_videoid_quality), Value: {"path": str, "expires": float, "downloading": bool} proxy_file_cache: Dict[str, Dict] = {} proxy_file_cache_lock = asyncio.Lock() PROXY_FILE_CACHE_DURATION = settings.PROXY_FILE_CACHE_DURATION # ============================================================================ # PYDANTIC MODELS # ============================================================================ class VideoDownloadRequest(BaseModel): url: str platform: Optional[str] = None tracking_video_id: Optional[str] = None class VideoPreviewAddRequest(BaseModel): platform: str video_info: Dict # ============================================================================ # HELPER FUNCTIONS # ============================================================================ async def cleanup_old_video_downloads(): """Remove old completed/failed downloads from tracking dict.""" current_time = time.time() async with video_downloads_lock: keys_to_remove = [] for key, status in video_downloads.items(): if status.get('status') in ['completed', 'failed']: if current_time - status.get('timestamp', 0) > DOWNLOAD_CLEANUP_AGE_SECONDS: keys_to_remove.append(key) for key in keys_to_remove: del video_downloads[key] async def download_and_cache_thumbnail(thumbnail_url: str) -> Optional[bytes]: """Download a thumbnail and return the binary data for caching. Always returns JPEG format. Converts webp to jpg if needed. For YouTube, prefers jpg URL over webp. """ if not thumbnail_url: return None # For YouTube, try maxresdefault first (1280x720, no black bars), fallback to hqdefault url_to_fetch = thumbnail_url if 'ytimg.com' in thumbnail_url: # Extract video_id from URL pattern: /vi/{video_id}/ or /vi_webp/{video_id}/ match = re.search(r'/vi(?:_webp)?/([^/]+)/', thumbnail_url) if match: video_id = match.group(1) # Try maxresdefault first, then hqdefault try: import httpx async with httpx.AsyncClient(timeout=10.0) as client: for quality in ['maxresdefault', 'hqdefault']: url_to_fetch = f"https://i.ytimg.com/vi/{video_id}/{quality}.jpg" response = await client.get( url_to_fetch, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} ) if response.status_code == 200 and len(response.content) > 1000: return response.content except Exception as e: logger.warning(f"Failed to cache YouTube thumbnail: {e}", module="VideoDownloader") return None try: import httpx async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( url_to_fetch, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } ) if response.status_code == 200: content = response.content content_type = response.headers.get('content-type', '') # Convert webp to jpg if needed if 'webp' in content_type or url_to_fetch.endswith('.webp'): try: from PIL import Image import io img = Image.open(io.BytesIO(content)) if img.mode in ('RGBA', 'P'): img = img.convert('RGB') output = io.BytesIO() img.save(output, format='JPEG', quality=85) content = output.getvalue() except Exception as e: logger.warning(f"Failed to convert webp to jpg: {e}", module="VideoDownloader") return content except Exception as e: logger.warning(f"Failed to cache thumbnail: {e}", module="VideoDownloader") return None # ============================================================================ # VIDEO INFO ENDPOINTS # ============================================================================ @router.post("/info") @limiter.limit("20/minute") @handle_exceptions async def get_video_info( request: Request, body: VideoDownloadRequest, current_user: Dict = Depends(get_current_user) ): """ Get video information without downloading. Supports all platforms including yt-dlp sites and gallery-dl sites. Auto-detects platform if not provided. """ from modules.universal_video_downloader import UniversalVideoDownloader, PLATFORMS, GALLERY_DL_SITES app_state = get_app_state() # Auto-detect platform if not provided platform = body.platform if not platform: # Check gallery-dl sites first gallery_site = UniversalVideoDownloader.detect_gallery_dl_site(body.url) if gallery_site: platform = gallery_site else: # Check yt-dlp platforms for p, config in PLATFORMS.items(): downloader_temp = UniversalVideoDownloader(platform=p, unified_db=app_state.db) if downloader_temp.extract_video_id(body.url): platform = p break if not platform: platform = 'youtube' # Default fallback downloader = UniversalVideoDownloader(platform=platform, unified_db=app_state.db) # Check if this is a gallery-dl site if platform in GALLERY_DL_SITES: info = downloader.get_gallery_info(body.url) if info: return { "success": True, "platform": platform, "is_gallery": True, "info": { **info, "platform": platform, "already_downloaded": False } } else: raise ValidationError("Failed to fetch gallery info", {"url": body.url}) # Check if URL is a playlist is_playlist = any(pattern in body.url.lower() for pattern in [ '/playlist', 'list=', '/channel/', '/user/', '@', '/c/' ]) if is_playlist: playlist_info = downloader.get_playlist_info(body.url) if playlist_info and playlist_info.get('playlist_videos'): for video in playlist_info['playlist_videos']: video['platform'] = platform video['already_downloaded'] = downloader._is_already_downloaded( video.get('video_id') ) if video.get('video_id') else False return { "success": True, "platform": platform, "info": playlist_info } # Single video info info = downloader.get_video_info(body.url) if not info: raise ValidationError("Failed to fetch video info", {"url": body.url}) video_id = downloader.extract_video_id(body.url) already_downloaded = downloader._is_already_downloaded(video_id) if video_id else False return { "success": True, "platform": platform, "info": { **info, "platform": platform, "upload_date": info['upload_date'].isoformat() if info.get('upload_date') else None, "already_downloaded": already_downloaded } } # ============================================================================ # DOWNLOAD ENDPOINTS # ============================================================================ @router.post("/download") @limiter.limit("10/minute") @handle_exceptions async def start_video_download( request: Request, background_tasks: BackgroundTasks, body: VideoDownloadRequest, current_user: Dict = Depends(get_current_user) ): """ Start downloading a video from any supported platform. Supports yt-dlp platforms and gallery-dl sites. Returns immediately with download ID for status polling. """ from modules.universal_video_downloader import UniversalVideoDownloader, PLATFORMS, GALLERY_DL_SITES from cache_manager import invalidate_download_cache app_state = get_app_state() # Auto-detect platform if not provided platform = body.platform is_gallery = False if not platform: gallery_site = UniversalVideoDownloader.detect_gallery_dl_site(body.url) if gallery_site: platform = gallery_site is_gallery = True else: for p, config in PLATFORMS.items(): downloader_temp = UniversalVideoDownloader(platform=p, unified_db=app_state.db) if downloader_temp.extract_video_id(body.url): platform = p break if not platform: platform = 'youtube' else: is_gallery = platform in GALLERY_DL_SITES downloader = UniversalVideoDownloader(platform=platform, unified_db=app_state.db) platform_name = GALLERY_DL_SITES.get(platform, {}).get('name') or PLATFORMS.get(platform, {}).get('name', platform) # Extract video/gallery ID if is_gallery: video_id = hashlib.sha256(body.url.encode()).hexdigest()[:12] else: video_id = downloader.extract_video_id(body.url) if not video_id: raise ValidationError(f"Invalid {platform_name} URL", {"url": body.url}) tracking_video_id = body.tracking_video_id or video_id tracking_key = f"{platform}:{tracking_video_id}" # Check if already downloading - use atomic check-and-set to prevent race condition async with video_downloads_lock: if tracking_key in video_downloads: return { "success": True, "platform": platform, "video_id": tracking_video_id, "status": video_downloads[tracking_key].copy() } # Reserve the slot immediately to prevent race condition # This will be updated with full info after video info is fetched video_downloads[tracking_key] = { "id": str(uuid.uuid4()), "platform": platform, "video_id": tracking_video_id, "url": body.url, "status": "initializing", "progress": 0, "message": "Fetching video info...", "started_at": now_iso8601() } # Check if already downloaded if not is_gallery and downloader._is_already_downloaded(video_id): # Clean up the reservation async with video_downloads_lock: video_downloads.pop(tracking_key, None) raise ValidationError("Video already downloaded", {"video_id": video_id}) # Fetch video/gallery info if is_gallery: video_info = downloader.get_gallery_info(body.url) else: video_info = downloader.get_video_info(body.url) if not video_info: # Clean up the reservation on failure async with video_downloads_lock: video_downloads.pop(tracking_key, None) raise ValidationError( f"Failed to fetch {'gallery' if is_gallery else 'video'} info", {"url": body.url} ) # Convert datetime for JSON serialization if video_info.get('upload_date'): video_info['upload_date'] = video_info['upload_date'].isoformat() if hasattr( video_info['upload_date'], 'isoformat' ) else video_info['upload_date'] # Update download status with full info (reservation was made earlier to prevent race condition) async with video_downloads_lock: download_id = video_downloads[tracking_key]["id"] # Use existing ID from reservation video_downloads[tracking_key].update({ "status": "pending", "message": "Initializing download...", "video_info": video_info, "file_path": None, "error": None, "timestamp": time.time(), "is_gallery": is_gallery }) # Progress callback (thread-safe) def progress_callback(message, percentage, speed=None, eta=None): with video_downloads_thread_lock: if tracking_key in video_downloads: video_downloads[tracking_key]["progress"] = percentage video_downloads[tracking_key]["message"] = message video_downloads[tracking_key]["speed"] = speed video_downloads[tracking_key]["eta"] = eta if percentage >= 100: video_downloads[tracking_key]["status"] = "completed" elif percentage > 0: video_downloads[tracking_key]["status"] = "downloading" # Background download task (thread-safe) def download_task(): try: with video_downloads_thread_lock: video_downloads[tracking_key]["status"] = "downloading" if is_gallery: success, file_path, metadata = downloader.download_gallery( body.url, progress_callback=progress_callback, gallery_info=video_info ) else: success, file_path, metadata = downloader.download_video( body.url, progress_callback=progress_callback ) with video_downloads_thread_lock: if success: video_downloads[tracking_key]["status"] = "completed" video_downloads[tracking_key]["progress"] = 100 video_downloads[tracking_key]["message"] = "Download complete!" video_downloads[tracking_key]["file_path"] = file_path video_downloads[tracking_key]["video_info"] = metadata video_downloads[tracking_key]["timestamp"] = time.time() else: video_downloads[tracking_key]["status"] = "failed" video_downloads[tracking_key]["message"] = "Download failed" video_downloads[tracking_key]["timestamp"] = time.time() video_downloads[tracking_key]["error"] = metadata.get('error', 'Unknown error') if metadata else 'Unknown error' if success: invalidate_download_cache() logger.info(f"{platform_name} download completed: {file_path}", module="VideoDownloader") else: logger.error(f"{platform_name} download failed: {metadata.get('error') if metadata else 'Unknown error'}", module="VideoDownloader") except Exception as e: with video_downloads_thread_lock: video_downloads[tracking_key]["status"] = "failed" video_downloads[tracking_key]["message"] = "Error during download" video_downloads[tracking_key]["error"] = str(e) logger.error(f"{platform_name} download error: {e}", module="VideoDownloader") background_tasks.add_task(download_task) return { "success": True, "platform": platform, "video_id": tracking_video_id, "download_id": download_id, "message": "Download started", "is_gallery": is_gallery } @router.get("/status/{platform}/{video_id}") @limiter.limit("60/minute") @handle_exceptions async def get_video_download_status( request: Request, platform: str, video_id: str, current_user: Dict = Depends(get_current_user) ): """Get the status of a video download.""" import random if random.random() < 0.1: await cleanup_old_video_downloads() tracking_key = f"{platform}:{video_id}" async with video_downloads_lock: if tracking_key not in video_downloads: raise RecordNotFoundError("Download not found", {"platform": platform, "video_id": video_id}) status = video_downloads[tracking_key].copy() return { "success": True, "status": status } @router.delete("/cancel/{platform}/{video_id}") @limiter.limit("20/minute") @handle_exceptions async def cancel_video_download( request: Request, platform: str, video_id: str, current_user: Dict = Depends(get_current_user) ): """Cancel a video download (removes from tracking).""" tracking_key = f"{platform}:{video_id}" async with video_downloads_lock: if tracking_key in video_downloads: del video_downloads[tracking_key] return {"success": True, "message": "Download cancelled"} raise RecordNotFoundError("Download not found", {"platform": platform, "video_id": video_id}) # ============================================================================ # HISTORY AND MANAGEMENT ENDPOINTS # ============================================================================ @router.get("/history") @limiter.limit("300/minute") @handle_exceptions async def get_video_history( request: Request, platform: Optional[str] = None, search: Optional[str] = None, limit: int = Query(50, ge=1, le=500, description="Max items to return (1-500)"), offset: int = Query(0, ge=0, description="Number of items to skip"), current_user: Dict = Depends(get_current_user) ): """Get video download history with pagination and search.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() # Build WHERE clause conditions = [] params = [] if platform: conditions.append("v.platform = ?") params.append(platform) if search: search_term = f"%{search}%" conditions.append("(v.title LIKE ? OR v.uploader LIKE ? OR v.video_id LIKE ?)") params.extend([search_term, search_term, search_term]) where_clause = " AND ".join(conditions) if conditions else "1=1" # Get total count cursor.execute(f'SELECT COUNT(*) FROM video_downloads v WHERE {where_clause}', params) total = cursor.fetchone()[0] # Get paginated results cursor.execute(f''' SELECT v.*, fi.width, fi.height FROM video_downloads v LEFT JOIN file_inventory fi ON v.file_path = fi.file_path WHERE {where_clause} ORDER BY v.download_date DESC LIMIT ? OFFSET ? ''', params + [limit, offset]) history = [] for row in cursor.fetchall(): metadata = json.loads(row['metadata']) if row['metadata'] else {} history.append({ 'id': row['id'], 'platform': row['platform'], 'video_id': row['video_id'], 'url': row['url'], 'title': row['title'], 'uploader': row['uploader'], 'upload_date': row['upload_date'], 'duration': row['duration'], 'file_path': row['file_path'], 'file_size': row['file_size'], 'download_date': row['download_date'], 'status': row['status'], 'thumbnail': metadata.get('thumbnail'), 'view_count': metadata.get('view_count'), 'like_count': metadata.get('like_count'), 'width': row['width'], 'height': row['height'], }) # Get platform stats for filter dropdown (count and size per platform) cursor.execute(''' SELECT platform, COUNT(*) as count, COALESCE(SUM(file_size), 0) as size FROM video_downloads GROUP BY platform ORDER BY platform ''') platform_stats = {row['platform']: {'count': row['count'], 'size': row['size']} for row in cursor.fetchall()} return { "success": True, "total": total, "history": history, "platforms": list(platform_stats.keys()), "platform_stats": platform_stats, "limit": limit, "offset": offset } @router.delete("/download/{platform}/{video_id}") @limiter.limit("10/minute") @handle_exceptions async def delete_video_download( request: Request, platform: str, video_id: str, current_user: Dict = Depends(require_admin) ): """Delete a video download (moves to recycle bin) - admin only.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT file_path, metadata FROM video_downloads WHERE platform = ? AND video_id = ? ''', (platform, video_id)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Download not found", {"platform": platform, "video_id": video_id}) file_path = Path(row['file_path']) metadata = json.loads(row['metadata']) if row['metadata'] else {} is_gallery = metadata.get('is_gallery', False) deleted_files = [] errors = [] media_base = Path("/opt/immich/md") if is_gallery and file_path.is_dir(): # Gallery: delete all files in directory for f in file_path.iterdir(): if f.is_file() and not f.name.endswith('.json'): resolved = f.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved.relative_to(media_base.resolve()) except ValueError: continue # Path validation passed - move to recycle bin try: recycle_id = app_state.db.move_to_recycle_bin( file_path=str(resolved), deleted_from='video_downloads', deleted_by=current_user.get('sub'), metadata={'platform': platform, 'video_id': video_id} ) if recycle_id: deleted_files.append(str(f)) else: errors.append(str(f)) except Exception as e: logger.error(f"Error deleting {f}: {e}", module="VideoDownloader") errors.append(str(f)) # Remove empty directory and json files for f in file_path.iterdir(): if f.is_file(): try: f.unlink() except OSError: pass try: file_path.rmdir() except OSError: pass elif file_path.is_file(): resolved = file_path.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved.relative_to(media_base.resolve()) except ValueError: raise ValidationError("Access denied: file outside media directory") # Path validation passed - move to recycle bin recycle_id = app_state.db.move_to_recycle_bin( file_path=str(resolved), deleted_from='video_downloads', deleted_by=current_user.get('sub'), metadata={'platform': platform, 'video_id': video_id} ) if recycle_id: deleted_files.append(str(file_path)) else: errors.append(str(file_path)) # Clean up database records with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM video_downloads WHERE platform = ? AND video_id = ?', (platform, video_id)) if is_gallery: cursor.execute('DELETE FROM file_inventory WHERE file_path LIKE ?', (f"{row['file_path']}/%",)) else: cursor.execute('DELETE FROM file_inventory WHERE file_path = ?', (row['file_path'],)) cursor.execute('DELETE FROM downloads WHERE file_path = ?', (row['file_path'],)) conn.commit() logger.info(f"Deleted video download {platform}:{video_id} - {len(deleted_files)} files", module="VideoDownloader") return { "success": True, "deleted_files": deleted_files, "errors": errors, "deleted_count": len(deleted_files) } @router.get("/gallery-files/{platform}/{video_id}") @limiter.limit("60/minute") @handle_exceptions async def get_gallery_files( request: Request, platform: str, video_id: str, current_user: Dict = Depends(get_current_user) ): """Get all files from a gallery download for lightbox display.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT file_path, metadata FROM video_downloads WHERE platform = ? AND video_id = ? ''', (platform, video_id)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Download not found", {"platform": platform, "video_id": video_id}) file_path = Path(row['file_path']) metadata = json.loads(row['metadata']) if row['metadata'] else {} is_gallery = metadata.get('is_gallery', False) files = [] video_exts = {'.mp4', '.webm', '.mov', '.avi', '.mkv', '.m4v'} image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'} # Helper to get width/height from file_inventory def get_file_dimensions(fp: str) -> tuple: with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT width, height FROM file_inventory WHERE file_path = ?', (fp,)) row = cursor.fetchone() if row: return row['width'], row['height'] return None, None if is_gallery and file_path.is_dir(): for f in sorted(file_path.iterdir()): if f.is_file(): ext = f.suffix.lower() if ext in video_exts or ext in image_exts: media_type = 'video' if ext in video_exts else 'image' width, height = get_file_dimensions(str(f)) files.append({ 'file_path': str(f), 'filename': f.name, 'file_size': f.stat().st_size, 'media_type': media_type, 'width': width, 'height': height }) elif file_path.is_file(): ext = file_path.suffix.lower() media_type = 'video' if ext in video_exts else 'image' width, height = get_file_dimensions(str(file_path)) files.append({ 'file_path': str(file_path), 'filename': file_path.name, 'file_size': file_path.stat().st_size, 'media_type': media_type, 'width': width, 'height': height }) return { "success": True, "files": files, "is_gallery": is_gallery, "video_id": video_id, "platform": platform, "title": metadata.get('title', ''), "uploader": metadata.get('uploader', '') } # ============================================================================ # PREVIEW LIST ENDPOINTS # ============================================================================ @router.get("/preview-list") @limiter.limit("300/minute") @handle_exceptions async def get_video_preview_list( request: Request, platform: Optional[str] = None, limit: int = Query(100, ge=1, le=500, description="Max items to return (1-500)"), offset: int = Query(0, ge=0, description="Number of items to skip"), current_user: Dict = Depends(get_current_user) ): """Get video preview list with pagination.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() if platform: cursor.execute('SELECT COUNT(*) FROM video_preview_list WHERE platform = ?', (platform,)) else: cursor.execute('SELECT COUNT(*) FROM video_preview_list') total = cursor.fetchone()[0] if platform: cursor.execute(''' SELECT * FROM video_preview_list WHERE platform = ? ORDER BY added_date DESC LIMIT ? OFFSET ? ''', (platform, limit, offset)) else: cursor.execute(''' SELECT * FROM video_preview_list ORDER BY added_date DESC LIMIT ? OFFSET ? ''', (limit, offset)) preview_list = [] for row in cursor.fetchall(): preview_list.append({ 'platform': row['platform'], 'video_id': row['video_id'], 'url': row['url'], 'title': row['title'], 'uploader': row['uploader'], 'upload_date': row['upload_date'], 'duration': row['duration'], 'description': row['description'], 'thumbnail': row['thumbnail'], 'view_count': row['view_count'], 'like_count': row['like_count'], 'already_downloaded': bool(row['already_downloaded']), }) return { "success": True, "preview_list": preview_list, "total": total, "limit": limit, "offset": offset } @router.post("/preview-list/add") @limiter.limit("50/minute") @handle_exceptions async def add_to_video_preview_list( request: Request, body: VideoPreviewAddRequest, current_user: Dict = Depends(get_current_user) ): """Add a video to the preview list.""" app_state = get_app_state() info = body.video_info platform = body.platform # Download and cache thumbnail thumbnail_url = info.get('thumbnail') thumbnail_data = await download_and_cache_thumbnail(thumbnail_url) with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute(''' INSERT OR REPLACE INTO video_preview_list (platform, video_id, url, title, uploader, upload_date, duration, description, thumbnail, thumbnail_data, view_count, like_count, already_downloaded, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( platform, info.get('video_id'), info.get('url', body.video_info.get('url', '')), info.get('title'), info.get('uploader'), info.get('upload_date'), info.get('duration'), info.get('description'), thumbnail_url, thumbnail_data, info.get('view_count'), info.get('like_count'), 1 if info.get('already_downloaded') else 0, json.dumps(info) )) conn.commit() return {"success": True, "thumbnail_cached": thumbnail_data is not None} @router.delete("/preview-list/{platform}/{video_id}") @limiter.limit("50/minute") @handle_exceptions async def remove_from_video_preview_list( request: Request, platform: str, video_id: str, current_user: Dict = Depends(get_current_user) ): """Remove a video from the preview list.""" app_state = get_app_state() with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('DELETE FROM video_preview_list WHERE platform = ? AND video_id = ?', (platform, video_id)) deleted = cursor.rowcount conn.commit() logger.debug(f"Removed {deleted} video(s) from preview list: {platform}:{video_id}", module="VideoDownloader") return {"success": True} # ============================================================================ # THUMBNAIL ENDPOINTS # ============================================================================ @router.get("/thumbnail/{platform}/{video_id}") @limiter.limit("500/minute") @handle_exceptions async def get_video_thumbnail( request: Request, platform: str, video_id: str, source: str = "preview", token: str = None, current_user: Dict = Depends(get_current_user_media) ): """ Serve cached video thumbnail from database. Falls back to fetching and caching if not available. """ app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() if source == "preview": cursor.execute(''' SELECT thumbnail_data, thumbnail FROM video_preview_list WHERE platform = ? AND video_id = ? ''', (platform, video_id)) elif source == "queue": cursor.execute(''' SELECT thumbnail_data, thumbnail FROM video_download_queue WHERE platform = ? AND video_id = ? ''', (platform, video_id)) else: cursor.execute(''' SELECT thumbnail_data, metadata FROM video_downloads WHERE platform = ? AND video_id = ? ''', (platform, video_id)) row = cursor.fetchone() if not row: raise RecordNotFoundError("Video not found", {"platform": platform, "video_id": video_id}) thumbnail_data = row['thumbnail_data'] # Extract thumbnail URL if source in ("preview", "queue"): thumbnail_url = row['thumbnail'] else: try: metadata = json.loads(row['metadata']) if row['metadata'] else {} thumbnail_url = metadata.get('thumbnail', '') except (json.JSONDecodeError, TypeError): thumbnail_url = '' # Serve cached data if thumbnail_data: return Response( content=thumbnail_data, media_type="image/jpeg", headers={ 'Cache-Control': 'public, max-age=604800', 'Access-Control-Allow-Origin': '*' } ) # Fallback: fetch and cache if thumbnail_url: cached_data = await download_and_cache_thumbnail(thumbnail_url) if cached_data: # Determine target table based on source if source == "preview": table = "video_preview_list" elif source == "queue": table = "video_download_queue" else: table = "video_downloads" with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() if table == "video_preview_list": cursor.execute(''' UPDATE video_preview_list SET thumbnail_data = ? WHERE platform = ? AND video_id = ? ''', (cached_data, platform, video_id)) elif table == "video_download_queue": cursor.execute(''' UPDATE video_download_queue SET thumbnail_data = ? WHERE platform = ? AND video_id = ? ''', (cached_data, platform, video_id)) else: cursor.execute(''' UPDATE video_downloads SET thumbnail_data = ? WHERE platform = ? AND video_id = ? ''', (cached_data, platform, video_id)) conn.commit() return Response( content=cached_data, media_type="image/jpeg", headers={ 'Cache-Control': 'public, max-age=604800', 'Access-Control-Allow-Origin': '*' } ) raise RecordNotFoundError("Thumbnail not available", {"platform": platform, "video_id": video_id}) @router.get("/thumbnail-proxy") @limiter.limit("100/minute") @handle_exceptions async def proxy_video_thumbnail( request: Request, url: str, current_user: Dict = Depends(get_current_user) ): """ Proxy video thumbnails to bypass referer restrictions. Only allows specific trusted domains. """ allowed_domains = [ 'hdslb.com', 'i0.hdslb.com', 'i1.hdslb.com', 'i2.hdslb.com', 'ytimg.com', 'vimeocdn.com', 'dmcdn.net' ] parsed = urlparse(url) # Security: Validate scheme is http or https only if parsed.scheme not in ('http', 'https'): raise ValidationError("Invalid URL scheme", {"scheme": parsed.scheme}) # Security: Proper domain validation - must match exactly or be a subdomain # Using endswith with '.' prefix prevents bypass like evil.hdslb.com.attacker.com netloc = parsed.netloc.lower() domain_valid = False for domain in allowed_domains: if netloc == domain or netloc.endswith('.' + domain): domain_valid = True break if not domain_valid: raise ValidationError("Domain not allowed", {"domain": parsed.netloc}) response = await http_client.get( url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }, timeout=10.0 ) if response.status_code != 200: raise DownloadError(f"Failed to fetch thumbnail: {response.status_code}") content_type = response.headers.get('content-type', 'image/jpeg') return Response( content=response.content, media_type=content_type, headers={ 'Cache-Control': 'public, max-age=86400', 'Access-Control-Allow-Origin': '*' } ) # ============================================================================ # VIDEO STREAMING (yt-dlp direct URL extraction) # ============================================================================ # Cache for stream URLs (video_id -> {url, audio_url, expires, quality}) stream_url_cache: Dict[str, Dict] = {} DEFAULT_CACHE_DURATION_HOURS = 5 # Default: 5 hours (YouTube URLs typically valid for 6 hours) def get_stream_cache_duration() -> int: """Get cache duration from settings (in seconds).""" import sqlite3 try: app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT value FROM settings WHERE key = 'video_streaming'") row = cursor.fetchone() if row: settings = json.loads(row[0]) hours = settings.get('cache_duration_hours', DEFAULT_CACHE_DURATION_HOURS) return int(hours) * 60 * 60 except (sqlite3.Error, json.JSONDecodeError, ValueError, TypeError): # Database, JSON parsing, or type conversion errors - use default pass return DEFAULT_CACHE_DURATION_HOURS * 60 * 60 # Valid quality values for video streaming VALID_QUALITIES = {"360", "480", "720", "1080", "best"} # Valid platforms for video streaming VALID_STREAM_PLATFORMS = {"youtube", "dailymotion", "bilibili", "twitter", "vimeo", "twitch"} # Regex for valid video IDs (alphanumeric, hyphens, underscores) VIDEO_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_\-]{1,64}$') class StreamUrlRequest(BaseModel): video_id: str quality: Optional[str] = "720" platform: Optional[str] = "youtube" @field_validator('video_id') @classmethod def validate_video_id(cls, v: str) -> str: if not v or not VIDEO_ID_PATTERN.match(v): raise ValueError('Invalid video ID format (alphanumeric, hyphens, underscores only, max 64 chars)') return v @field_validator('quality') @classmethod def validate_quality(cls, v: Optional[str]) -> str: if v is None: return "720" if v not in VALID_QUALITIES: raise ValueError(f'Invalid quality. Must be one of: {", ".join(sorted(VALID_QUALITIES))}') return v @field_validator('platform') @classmethod def validate_platform(cls, v: Optional[str]) -> str: if v is None: return "youtube" if v not in VALID_STREAM_PLATFORMS: raise ValueError(f'Invalid platform. Must be one of: {", ".join(sorted(VALID_STREAM_PLATFORMS))}') return v @router.post("/stream-url") @limiter.limit("30/minute") @handle_exceptions async def get_stream_url( request: Request, body: StreamUrlRequest, current_user: Dict = Depends(get_current_user) ): """ Get direct streaming URL for a YouTube video using yt-dlp. Returns a direct video URL that can be used in an HTML5 video player. URLs are cached for 5 hours to reduce yt-dlp calls. """ import subprocess video_id = body.video_id quality = body.quality or "720" platform = body.platform or "youtube" cache_key = f"{platform}_{video_id}_{quality}" # Check cache first if cache_key in stream_url_cache: cached = stream_url_cache[cache_key] if time.time() < cached['expires']: logger.debug(f"Stream URL cache hit for {video_id}", module="VideoStream") audio_url = cached.get('audio_url') needs_proxy = audio_url is not None return { "success": True, "video_url": cached['video_url'], "audio_url": audio_url, "quality": cached['quality'], "cached": True, "expires_in": int(cached['expires'] - time.time()), "needs_proxy": needs_proxy, "proxy_url": f"/api/video/stream-proxy/{platform}/{video_id}?quality={quality}" if needs_proxy else None } # Build format selector based on quality # For streaming/preview, we use progressive formats (combined video+audio) which can be # directly proxied without IP restrictions on individual segments. # Progressive formats: 22 (720p), 18 (360p) - max quality is 720p # For higher quality, users should download the video or use the embedded player. if platform == "youtube": # YouTube progressive formats with combined audio if quality in ["best", "1080", "720"]: format_selector = "22/18/best[ext=mp4]/best" # 720p or 360p progressive elif quality == "480": format_selector = "18/22/best[ext=mp4]/best" # Prefer 360p else: # 360 or fallback format_selector = "18/best[ext=mp4]/best" # 360p progressive elif platform == "dailymotion": # Dailymotion - use http-480 or similar progressive if available if quality in ["best", "1080", "720"]: format_selector = "http-720/http-480/best[ext=mp4]/best" elif quality == "480": format_selector = "http-480/http-720/best[ext=mp4]/best" else: format_selector = "http-380/http-480/best[ext=mp4]/best" else: # Other platforms - try to get progressive mp4 if quality == "best": format_selector = "best[ext=mp4][acodec!=none]/best[ext=mp4]/best" elif quality == "1080": format_selector = "best[height<=1080][ext=mp4][acodec!=none]/best[height<=1080][ext=mp4]/best[height<=1080]" elif quality == "720": format_selector = "best[height<=720][ext=mp4][acodec!=none]/best[height<=720][ext=mp4]/best[height<=720]" elif quality == "480": format_selector = "best[height<=480][ext=mp4][acodec!=none]/best[height<=480][ext=mp4]/best[height<=480]" else: format_selector = "best[height<=360][ext=mp4][acodec!=none]/best[height<=360][ext=mp4]/best[height<=360]" # Construct source URL based on platform if platform == "dailymotion": source_url = f"https://www.dailymotion.com/video/{video_id}" else: source_url = f"https://www.youtube.com/watch?v={video_id}" try: # Get direct URLs using yt-dlp result = subprocess.run( [ "/opt/media-downloader/venv/bin/yt-dlp", "-g", # Get URL only "-f", format_selector, "--no-warnings", source_url ], capture_output=True, text=True, timeout=30 ) if result.returncode != 0: error_msg = result.stderr.strip() if result.stderr else "Unknown error" logger.error(f"yt-dlp failed for {video_id}: {error_msg}", module="VideoStream") raise DownloadError(f"Failed to get stream URL: {error_msg}") urls = result.stdout.strip().split('\n') # yt-dlp returns video URL first, then audio URL if separate video_url = urls[0] if urls else None audio_url = urls[1] if len(urls) > 1 else None if not video_url: raise DownloadError("No stream URL returned") # Get actual quality from the format actual_quality = quality # Get cache duration from settings cache_duration = get_stream_cache_duration() # Cache the result stream_url_cache[cache_key] = { 'video_url': video_url, 'audio_url': audio_url, 'quality': actual_quality, 'expires': time.time() + cache_duration } # Cleanup old cache entries current_time = time.time() expired_keys = [k for k, v in stream_url_cache.items() if current_time >= v['expires']] for k in expired_keys: del stream_url_cache[k] logger.info(f"Got stream URL for {video_id} at {quality}p (cache: {cache_duration // 3600}h)", module="VideoStream") # If we have separate audio, recommend using proxy for reliable playback # (separate streams have IP restrictions that break in browsers) needs_proxy = audio_url is not None return { "success": True, "video_url": video_url, "audio_url": audio_url, "quality": actual_quality, "cached": False, "expires_in": cache_duration, "needs_proxy": needs_proxy, "proxy_url": f"/api/video/stream-proxy/{platform}/{video_id}?quality={quality}" if needs_proxy else None } except subprocess.TimeoutExpired: logger.error(f"yt-dlp timeout for {video_id}", module="VideoStream") raise DownloadError("Timeout getting stream URL") except Exception as e: logger.error(f"Stream URL error for {video_id}: {e}", module="VideoStream") raise DownloadError(f"Failed to get stream URL: {str(e)}") @router.get("/stream-url/{video_id}") @limiter.limit("30/minute") @handle_exceptions async def get_stream_url_simple( request: Request, video_id: str, quality: str = Query("720", description="Video quality: 360, 480, 720, 1080, best"), current_user: Dict = Depends(get_current_user) ): """ GET version of stream URL endpoint for simpler usage. """ body = StreamUrlRequest(video_id=video_id, quality=quality) return await get_stream_url(request, body, current_user) @router.get("/stream-proxy/{platform}/{video_id}") @limiter.limit("60/minute") @handle_exceptions async def proxy_video_stream( request: Request, platform: str, video_id: str, quality: str = Query("720", description="Video quality"), current_user: Dict = Depends(get_current_user) ): """ Proxy video stream through the server. This endpoint streams the video through the server, bypassing IP restrictions that prevent direct YouTube/Dailymotion URLs from working in browsers. For HLS sources (like Dailymotion), uses yt-dlp to remux on-the-fly. For direct URLs (like YouTube progressive), proxies the stream directly. """ import httpx import asyncio from starlette.responses import StreamingResponse cache_key = f"{platform}_{video_id}_{quality}" # Get URL from cache or fetch new one if cache_key not in stream_url_cache or time.time() >= stream_url_cache[cache_key].get('expires', 0): # Fetch new URL body = StreamUrlRequest(video_id=video_id, quality=quality, platform=platform) await get_stream_url(request, body, current_user) cached = stream_url_cache.get(cache_key) if not cached or not cached.get('video_url'): raise DownloadError("No stream URL available") video_url = cached['video_url'] # For HLS streams (.m3u8), use yt-dlp to download then serve # This handles Dailymotion and other HLS-only sources if ".m3u8" in video_url: import os # Construct source URL if platform == "dailymotion": source_url = f"https://www.dailymotion.com/video/{video_id}" elif platform == "youtube": source_url = f"https://www.youtube.com/watch?v={video_id}" else: source_url = video_url # Build format selector if quality == "best" or quality == "1080": format_sel = "best[ext=mp4]/best" elif quality == "720": format_sel = "best[height<=720][ext=mp4]/best[height<=720]/best" elif quality == "480": format_sel = "best[height<=480][ext=mp4]/best[height<=480]/best" else: format_sel = "best[height<=360][ext=mp4]/best[height<=360]/best" # Check if we have a cached file for this video async with proxy_file_cache_lock: cached_file = proxy_file_cache.get(cache_key) if cached_file: # Check if file exists and cache is still valid if os.path.exists(cached_file['path']) and time.time() < cached_file['expires']: actual_path = cached_file['path'] else: # Cache expired or file missing, remove it if os.path.exists(cached_file.get('path', '')): try: os.unlink(cached_file['path']) except OSError: pass # Best effort cleanup of cache file del proxy_file_cache[cache_key] cached_file = None # Download if not cached if not cached_file: temp_path = f"/tmp/proxy_video_{cache_key}.mp4" # Download with yt-dlp to temp file logger.debug(f"Downloading HLS video for proxy: {source_url}", module="VideoProxy") dl_process = await asyncio.create_subprocess_exec( "/opt/media-downloader/venv/bin/yt-dlp", "-f", format_sel, "-o", temp_path, "--no-warnings", "--merge-output-format", "mp4", "--force-overwrites", source_url, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await dl_process.communicate() if dl_process.returncode != 0: logger.error(f"yt-dlp failed: {stderr.decode()}", module="VideoProxy") raise DownloadError("Failed to download video") # Find the actual file (yt-dlp may change extension) actual_path = temp_path if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0: base_path = temp_path.rsplit('.', 1)[0] for ext in ['.mp4', '.webm', '.mkv']: test_path = base_path + ext if os.path.exists(test_path) and os.path.getsize(test_path) > 0: actual_path = test_path break else: raise DownloadError("Video file not found after download") # Cache the file path async with proxy_file_cache_lock: proxy_file_cache[cache_key] = { 'path': actual_path, 'expires': time.time() + PROXY_FILE_CACHE_DURATION } logger.debug(f"Downloaded and cached: {actual_path}", module="VideoProxy") # Now serve the file with proper Range support file_size = os.path.getsize(actual_path) range_header = request.headers.get("Range") start = 0 end = file_size - 1 if range_header: # Parse Range header (e.g., "bytes=0-" or "bytes=100-500") range_match = range_header.replace("bytes=", "").split("-") if range_match[0]: start = int(range_match[0]) if len(range_match) > 1 and range_match[1]: end = min(int(range_match[1]), file_size - 1) content_length = end - start + 1 # Create a generator that reads from the file file_path_for_stream = actual_path # Capture in closure async def file_stream_generator(): with open(file_path_for_stream, 'rb') as f: f.seek(start) remaining = content_length while remaining > 0: chunk_size = min(65536, remaining) chunk = f.read(chunk_size) if not chunk: break remaining -= len(chunk) yield chunk headers = { "Accept-Ranges": "bytes", "Cache-Control": "no-cache", "Content-Length": str(content_length), } if range_header: headers["Content-Range"] = f"bytes {start}-{end}/{file_size}" return StreamingResponse( file_stream_generator(), status_code=206, media_type="video/mp4", headers=headers ) return StreamingResponse( file_stream_generator(), media_type="video/mp4", headers=headers ) # For direct video URLs, proxy the stream async def stream_generator(): async with httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=None)) as client: async with client.stream("GET", video_url, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Accept": "*/*", "Accept-Encoding": "identity", # Don't compress - we're proxying "Range": request.headers.get("Range", "bytes=0-"), }) as response: async for chunk in response.aiter_bytes(chunk_size=65536): yield chunk # Determine content type content_type = "video/mp4" if ".webm" in video_url: content_type = "video/webm" return StreamingResponse( stream_generator(), media_type=content_type, headers={ "Accept-Ranges": "bytes", "Cache-Control": "no-cache", } ) @router.get("/streaming/settings") @limiter.limit("60/minute") @handle_exceptions async def get_streaming_settings( request: Request, current_user: Dict = Depends(get_current_user) ): """Get video streaming settings.""" app_state = get_app_state() # Default settings default_settings = { "enabled": True, "default_quality": "720", "proxy_mode": False, # False = redirect mode (faster), True = proxy through server "cache_duration_hours": 5 } try: with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT value FROM settings WHERE key = 'video_streaming'") row = cursor.fetchone() if row: settings_data = json.loads(row[0]) return {"success": True, "settings": {**default_settings, **settings_data}} except Exception as e: logger.debug(f"Could not load streaming settings: {e}", module="VideoStream") return {"success": True, "settings": default_settings} @router.post("/streaming/settings") @limiter.limit("10/minute") @handle_exceptions async def update_streaming_settings( request: Request, settings_data: Dict = Body(...), current_user: Dict = Depends(require_admin) ): """Update video streaming settings (admin only).""" app_state = get_app_state() # Validate settings allowed_keys = {"enabled", "default_quality", "default_player", "proxy_mode", "cache_duration_hours", "cache_duration"} filtered_settings = {k: v for k, v in settings_data.items() if k in allowed_keys} # Normalize cache_duration to cache_duration_hours if "cache_duration" in filtered_settings and "cache_duration_hours" not in filtered_settings: filtered_settings["cache_duration_hours"] = filtered_settings.pop("cache_duration") # Validate quality if "default_quality" in filtered_settings: if filtered_settings["default_quality"] not in ["360", "480", "720", "1080", "best"]: raise ValidationError("Invalid quality setting", {"allowed": ["360", "480", "720", "1080", "best"]}) # Validate player if "default_player" in filtered_settings: if filtered_settings["default_player"] not in ["native", "youtube"]: raise ValidationError("Invalid player setting", {"allowed": ["native", "youtube"]}) with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() # Get existing settings cursor.execute("SELECT value FROM settings WHERE key = 'video_streaming'") row = cursor.fetchone() existing = json.loads(row[0]) if row else {} # Merge settings merged = {**existing, **filtered_settings} cursor.execute( "INSERT OR REPLACE INTO settings (key, value, value_type, category, description) VALUES (?, ?, ?, ?, ?)", ("video_streaming", json.dumps(merged), "json", "video", "Video streaming settings for native player") ) conn.commit() logger.info(f"Updated video streaming settings: {filtered_settings}", module="VideoStream") return {"success": True, "settings": merged}