""" Media Router Handles all media file operations: - Thumbnail generation and caching - Media preview/serving - Metadata retrieval - Gallery listing - Batch operations (delete, move, download) - Cache management """ import hashlib import json import shutil import sqlite3 import subprocess import tempfile import zipfile from datetime import datetime from pathlib import Path from typing import Dict, List, Optional from fastapi import APIRouter, BackgroundTasks, Body, Depends, Query, Request from fastapi.responses import FileResponse, Response from PIL import Image from pydantic import BaseModel from slowapi import Limiter from slowapi.util import get_remote_address from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state from ..core.config import settings from ..core.exceptions import ( handle_exceptions, MediaFileNotFoundError as CustomFileNotFoundError, FileOperationError, ValidationError ) from ..core.responses import now_iso8601 from ..core.path_tokens import encode_path, decode_path from modules.universal_logger import get_logger from ..core.utils import ( get_media_dimensions, get_media_dimensions_batch, validate_file_path, generate_image_thumbnail as shared_generate_image_thumbnail, generate_video_thumbnail as shared_generate_video_thumbnail, get_or_create_thumbnail as shared_get_or_create_thumbnail, ThumbnailLRUCache, ALLOWED_PATHS ) logger = get_logger('API') router = APIRouter(prefix="/api/media", tags=["Media"]) limiter = Limiter(key_func=get_remote_address) # Use centralized paths from config MEDIA_BASE = settings.MEDIA_BASE_PATH REVIEW_BASE = settings.REVIEW_PATH RECYCLE_BASE = settings.RECYCLE_PATH # Global thumbnail memory cache (500 items or 100MB max) # Using shared ThumbnailLRUCache from core/utils.py _thumbnail_cache = ThumbnailLRUCache(max_size=500, max_memory_mb=100) # ============================================================================ # PYDANTIC MODELS # ============================================================================ class BatchMoveRequest(BaseModel): file_paths: List[str] destination: str class BatchDeleteRequest(BaseModel): file_paths: List[str] class UpdateDateRequest(BaseModel): ids: List[int] # file_inventory IDs new_date: str # ISO datetime "2024-06-15T14:30:00" update_file: bool = True # Also update filesystem/EXIF timestamps date_type: str = "post_date" # "post_date" or "download_date" # ============================================================================ # HELPER FUNCTIONS (validate_file_path from core/utils.py) # ============================================================================ # Thumbnail generation functions are now in core/utils.py (shared across routers). # Local aliases for backward compatibility within this module: generate_image_thumbnail = shared_generate_image_thumbnail generate_video_thumbnail = shared_generate_video_thumbnail get_or_create_thumbnail = shared_get_or_create_thumbnail def update_file_path_in_all_tables(db, old_path: str, new_path: str): """ Update file path in all relevant database tables when a file is moved. """ try: with db.get_connection(for_write=True) as conn: cursor = conn.cursor() cursor.execute('UPDATE downloads SET file_path = ? WHERE file_path = ?', (new_path, old_path)) downloads_updated = cursor.rowcount cursor.execute('UPDATE instagram_perceptual_hashes SET file_path = ? WHERE file_path = ?', (new_path, old_path)) perceptual_updated = cursor.rowcount cursor.execute('UPDATE face_recognition_scans SET file_path = ? WHERE file_path = ?', (new_path, old_path)) face_updated = cursor.rowcount try: cursor.execute('UPDATE semantic_embeddings SET file_path = ? WHERE file_path = ?', (new_path, old_path)) embeddings_updated = cursor.rowcount except sqlite3.OperationalError: embeddings_updated = 0 conn.commit() if downloads_updated or perceptual_updated or face_updated or embeddings_updated: logger.debug( f"Updated file paths: downloads={downloads_updated}, " f"perceptual={perceptual_updated}, face={face_updated}, " f"embeddings={embeddings_updated}", module="Database" ) except Exception as e: logger.warning(f"Failed to update file paths in tables: {e}", module="Database") # Also update thumbnails.db cache (uses path-based hash) try: thumb_db_path = settings.PROJECT_ROOT / 'database' / 'thumbnails.db' old_hash = hashlib.sha256(old_path.encode()).hexdigest() new_hash = hashlib.sha256(new_path.encode()).hexdigest() with sqlite3.connect(str(thumb_db_path), timeout=10.0) as thumb_conn: cursor = thumb_conn.cursor() # Get thumbnail data from old path cursor.execute("SELECT thumbnail_data, file_mtime FROM thumbnails WHERE file_hash = ?", (old_hash,)) row = cursor.fetchone() if row: thumbnail_data, file_mtime = row # Insert with new hash cursor.execute(""" INSERT OR REPLACE INTO thumbnails (file_hash, file_path, thumbnail_data, created_at, file_mtime) VALUES (?, ?, ?, ?, ?) """, (new_hash, new_path, thumbnail_data, now_iso8601(), file_mtime)) # Delete old entry cursor.execute("DELETE FROM thumbnails WHERE file_hash = ?", (old_hash,)) thumb_conn.commit() logger.debug(f"Migrated thumbnail cache for moved file", module="Database") except Exception as e: logger.warning(f"Failed to update thumbnail cache: {e}", module="Database") # ============================================================================ # THUMBNAIL AND PREVIEW ENDPOINTS # ============================================================================ @router.get("/thumbnail") @limiter.limit("5000/minute") @handle_exceptions async def get_media_thumbnail( request: Request, file_path: str = None, media_type: str = None, token: str = None, t: str = None, current_user: Dict = Depends(get_current_user_media) ): """ Get or generate thumbnail for media file. Uses 3-tier caching: 1. In-memory LRU cache (fastest, ~500 items) 2. Thumbnail database (fast, persistent) 3. Generate on-demand (slowest, for new files) Cache key uses content hash (SHA256) so thumbnails survive file moves. Args: file_path: Path to the media file media_type: 'image' or 'video' t: Encrypted file token (alternative to file_path) """ if t: file_path = decode_path(t) resolved_path = validate_file_path(file_path) app_state = get_app_state() # Cache key: use path (avoids slow file_inventory DB lookup on every request) cache_key = str(resolved_path) # 1. Check in-memory LRU cache first (fastest — no disk/DB access) thumbnail_data = _thumbnail_cache.get(cache_key) if thumbnail_data: return Response( content=thumbnail_data, media_type="image/jpeg", headers={ "Cache-Control": "public, max-age=86400, immutable", "Vary": "Accept-Encoding" } ) # For videos, check if we have a cached platform thumbnail if media_type == 'video': try: with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute('SELECT thumbnail_data FROM video_downloads WHERE file_path = ?', (str(resolved_path),)) row = cursor.fetchone() if row and row['thumbnail_data']: thumbnail_data = row['thumbnail_data'] # Add to in-memory cache _thumbnail_cache.put(cache_key, thumbnail_data) return Response( content=thumbnail_data, media_type="image/jpeg", headers={ "Cache-Control": "public, max-age=86400, immutable", "Vary": "Accept-Encoding" } ) except Exception as e: logger.debug(f"Error checking cached thumbnail: {e}", module="MediaThumbnail") # 2. Get from database cache or generate thumbnail_data = get_or_create_thumbnail(resolved_path, media_type) if not thumbnail_data: raise FileOperationError("Failed to generate thumbnail") # Add to in-memory cache for faster subsequent requests _thumbnail_cache.put(cache_key, thumbnail_data) # Cache thumbnails for 1 day - they don't change often # immutable flag tells browsers the content will never change for this URL return Response( content=thumbnail_data, media_type="image/jpeg", headers={ "Cache-Control": "public, max-age=86400, immutable", "Vary": "Accept-Encoding" } ) @router.get("/preview") @limiter.limit("5000/minute") @handle_exceptions async def get_media_preview( request: Request, file_path: str = None, token: str = None, t: str = None, current_user: Dict = Depends(get_current_user_media) ): """Serve a media file for preview.""" if t: file_path = decode_path(t) resolved_path = validate_file_path(file_path) if not resolved_path.exists() or not resolved_path.is_file(): raise CustomFileNotFoundError("File not found", {"path": str(file_path)}) # Cache media files for 1 hour - content doesn't change return FileResponse( str(resolved_path), headers={"Cache-Control": "public, max-age=3600"} ) @router.get("/metadata") @limiter.limit("5000/minute") @handle_exceptions async def get_media_metadata( request: Request, file_path: str = None, t: str = None, current_user: Dict = Depends(get_current_user) ): """ Get cached metadata for a media file (resolution, duration, etc.). """ if t: file_path = decode_path(t) elif not file_path: raise ValidationError("Either 't' or 'file_path' is required") resolved_path = validate_file_path(file_path) if not resolved_path.exists() or not resolved_path.is_file(): raise CustomFileNotFoundError("File not found", {"path": str(file_path)}) app_state = get_app_state() # Get metadata from cache metadata_db_path = settings.PROJECT_ROOT / 'database' / 'media_metadata.db' file_hash = hashlib.sha256(str(resolved_path).encode()).hexdigest() try: with sqlite3.connect(str(metadata_db_path)) as conn: cursor = conn.execute( """SELECT width, height, file_size, duration, format, created_at FROM media_metadata WHERE file_hash = ?""", (file_hash,) ) result = cursor.fetchone() except Exception: result = None if result: width, height, file_size, duration, format_type, created_at = result return { "file_path": str(resolved_path), "width": width, "height": height, "file_size": file_size, "duration": duration, "format": format_type, "cached": True, "cached_at": created_at } # Not in metadata cache - try file_inventory first width, height, duration = None, None, None try: with app_state.db.get_connection() as conn: cursor = conn.execute( """SELECT width, height, file_size, platform, source FROM file_inventory WHERE file_path = ?""", (str(resolved_path),) ) inv_result = cursor.fetchone() if inv_result and inv_result[0] and inv_result[1]: return { "file_path": str(resolved_path), "width": inv_result[0], "height": inv_result[1], "file_size": inv_result[2] or resolved_path.stat().st_size, "platform": inv_result[3], "source": inv_result[4], "cached": True, "source_table": "file_inventory" } except Exception as e: logger.debug(f"Error reading file_inventory cache: {e}", module="MediaInfo") # Fall back to dynamic extraction file_ext = resolved_path.suffix.lower() try: if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.heic', '.heif']: try: with Image.open(str(resolved_path)) as img: width, height = img.size except Exception as e: logger.debug(f"Error reading image dimensions: {e}", module="MediaInfo") elif file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']: # Skip ffprobe fallback for performance - rely on cached dimensions only # Videos without cached dimensions will show without width/height pass except Exception: pass return { "file_path": str(resolved_path), "width": width, "height": height, "duration": duration, "file_size": resolved_path.stat().st_size, "cached": False } @router.get("/embedded-metadata") @limiter.limit("1000/minute") @handle_exceptions async def get_embedded_metadata( request: Request, file_path: str = None, t: str = None, current_user: Dict = Depends(get_current_user) ): """ Read descriptive metadata embedded in the actual file. For videos: Uses ffprobe to read title, artist, description, comment, date For images: Uses exiftool to read EXIF data (ImageDescription, Artist, etc.) This is different from /metadata which returns technical info (resolution, duration). """ if t: file_path = decode_path(t) resolved_path = validate_file_path(file_path) if not resolved_path.exists() or not resolved_path.is_file(): raise CustomFileNotFoundError("File not found", {"path": str(file_path)}) file_ext = resolved_path.suffix.lower() metadata = { "file_path": str(resolved_path), "title": None, "artist": None, "description": None, "comment": None, "date": None, "source": None } try: if file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v', '.m4a', '.mp3']: # Use ffprobe for video/audio files result = subprocess.run( ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', str(resolved_path)], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: data = json.loads(result.stdout) tags = data.get('format', {}).get('tags', {}) # ffprobe returns tags in various cases, normalize to lowercase lookup tags_lower = {k.lower(): v for k, v in tags.items()} metadata['title'] = tags_lower.get('title') metadata['artist'] = tags_lower.get('artist') or tags_lower.get('album_artist') metadata['description'] = tags_lower.get('description') or tags_lower.get('synopsis') metadata['comment'] = tags_lower.get('comment') metadata['date'] = tags_lower.get('date') or tags_lower.get('creation_time') # Try to extract source URL from comment or purl if metadata['comment'] and metadata['comment'].startswith('http'): metadata['source'] = metadata['comment'] elif tags_lower.get('purl'): metadata['source'] = tags_lower.get('purl') elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.heic', '.heif']: # Use exiftool for images result = subprocess.run( ['exiftool', '-j', '-ImageDescription', '-XPComment', '-Artist', '-DateTimeOriginal', '-UserComment', '-Caption-Abstract', str(resolved_path)], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: data = json.loads(result.stdout) if data and len(data) > 0: exif = data[0] metadata['title'] = exif.get('ImageDescription') or exif.get('Caption-Abstract') metadata['artist'] = exif.get('Artist') metadata['description'] = exif.get('XPComment') metadata['comment'] = exif.get('UserComment') metadata['date'] = exif.get('DateTimeOriginal') # Check if comment contains URL if metadata['comment'] and str(metadata['comment']).startswith('http'): metadata['source'] = metadata['comment'] except subprocess.TimeoutExpired: logger.warning(f"Timeout reading embedded metadata: {file_path}", module="Media") except Exception as e: logger.warning(f"Error reading embedded metadata: {e}", module="Media") return metadata # ============================================================================ # CACHE MANAGEMENT ENDPOINTS # ============================================================================ @router.post("/cache/rebuild") @limiter.limit("5/minute") @handle_exceptions async def rebuild_media_cache( request: Request, current_user: Dict = Depends(get_current_user) ): """Trigger thumbnail and metadata cache rebuild.""" script_path = settings.PROJECT_ROOT / 'modules' / 'thumbnail_cache_builder.py' if not script_path.exists(): raise CustomFileNotFoundError("Cache builder script not found") # Run in background subprocess.Popen( ['/usr/bin/python3', str(script_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True ) return { "success": True, "message": "Cache rebuild started in background" } @router.get("/cache/stats") @limiter.limit("5000/minute") @handle_exceptions async def get_cache_stats( request: Request, current_user: Dict = Depends(get_current_user) ): """Get statistics about the media cache.""" thumb_db_path = settings.PROJECT_ROOT / 'database' / 'thumbnails.db' metadata_db_path = settings.PROJECT_ROOT / 'database' / 'media_metadata.db' stats = { "thumbnails": { "exists": True, "count": 0, "size_bytes": 0 }, "metadata": { "exists": True, "count": 0, "size_bytes": 0 } } try: with sqlite3.connect(str(thumb_db_path)) as conn: cursor = conn.execute("SELECT COUNT(*) FROM thumbnails") stats["thumbnails"]["count"] = cursor.fetchone()[0] except Exception: stats["thumbnails"]["exists"] = False try: with sqlite3.connect(str(metadata_db_path)) as conn: cursor = conn.execute("SELECT COUNT(*) FROM media_metadata") stats["metadata"]["count"] = cursor.fetchone()[0] except Exception: stats["metadata"]["exists"] = False return stats # ============================================================================ # BATCH OPERATIONS # ============================================================================ MAX_BATCH_SIZE = 500 # Maximum files per batch operation @router.post("/batch-delete") @limiter.limit("10/minute") @handle_exceptions async def batch_delete_media( request: Request, current_user: Dict = Depends(require_admin), file_paths: List[str] = Body(...) ): """ Move multiple media files to recycle bin (admin only). Maximum 500 files per request. """ # Security: Limit batch size to prevent DoS if len(file_paths) > MAX_BATCH_SIZE: raise ValidationError(f"Batch size exceeds maximum of {MAX_BATCH_SIZE} files") app_state = get_app_state() deleted = [] errors = [] for file_path in file_paths: try: requested_path = Path(file_path) resolved_path = requested_path.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved_path.relative_to(MEDIA_BASE.resolve()) except ValueError: errors.append({"file": file_path, "error": "Access denied"}) continue if resolved_path.exists() and resolved_path.is_file(): recycle_id = app_state.db.move_to_recycle_bin( file_path=str(resolved_path), deleted_from='media', deleted_by=current_user.get('sub'), metadata={} ) if recycle_id: deleted.append(file_path) else: errors.append({"file": file_path, "error": "Failed to move to recycle bin"}) else: errors.append({"file": file_path, "error": "File not found"}) except Exception as e: errors.append({"file": file_path, "error": str(e)}) # Broadcast update try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: await app_state.websocket_manager.broadcast({ "type": "batch_delete_completed", "deleted_count": len(deleted), "error_count": len(errors), "timestamp": now_iso8601() }) except Exception: pass return { "success": True, "deleted": deleted, "errors": errors, "deleted_count": len(deleted), "error_count": len(errors) } @router.post("/update-date") @limiter.limit("30/minute") @handle_exceptions async def update_media_date( request: Request, data: UpdateDateRequest, current_user: Dict = Depends(require_admin) ): """ Update the date (post_date or download_date) for media files. Optionally updates file timestamps (EXIF, video metadata, filesystem). """ from modules.date_utils import DateHandler app_state = get_app_state() results = [] success_count = 0 failed_count = 0 # Parse the new date try: new_date = datetime.fromisoformat(data.new_date.replace('Z', '+00:00')) except ValueError: raise ValidationError(f"Invalid date format: {data.new_date}. Use ISO format like 2024-06-15T14:30:00") # Validate date_type if data.date_type not in ("post_date", "download_date"): raise ValidationError(f"Invalid date_type: {data.date_type}. Must be 'post_date' or 'download_date'") # Use connection pool for main database access with app_state.db.get_connection(for_write=True) as conn: cursor = conn.cursor() for file_id in data.ids: try: # Get file info from file_inventory cursor.execute(""" SELECT id, file_path, filename, platform FROM file_inventory WHERE id = ? """, (file_id,)) row = cursor.fetchone() if not row: results.append({"id": file_id, "success": False, "error": "File not found in inventory"}) failed_count += 1 continue file_path = row['file_path'] filename = row['filename'] # Update downloads table # Use explicit SQL statements instead of f-string interpolation to prevent SQL injection date_value = new_date.strftime('%Y-%m-%d %H:%M:%S') if data.date_type == "post_date": cursor.execute(""" UPDATE downloads SET post_date = ? WHERE file_path = ? OR filename = ? """, (date_value, file_path, filename)) else: # download_date (already validated above) cursor.execute(""" UPDATE downloads SET download_date = ? WHERE file_path = ? OR filename = ? """, (date_value, file_path, filename)) rows_updated = cursor.rowcount # If no downloads record matched, try by filename only if rows_updated == 0: if data.date_type == "post_date": cursor.execute(""" UPDATE downloads SET post_date = ? WHERE filename = ? """, (date_value, filename)) else: # download_date cursor.execute(""" UPDATE downloads SET download_date = ? WHERE filename = ? """, (date_value, filename)) rows_updated = cursor.rowcount # If still no match, insert a downloads record so the date is persisted if rows_updated == 0: import hashlib platform = row['platform'] or 'unknown' url_hash = hashlib.sha256(file_path.encode()).hexdigest() url = f"file://{file_path}" if data.date_type == "post_date": cursor.execute(""" INSERT INTO downloads (url_hash, url, filename, file_path, platform, post_date, download_date, status) VALUES (?, ?, ?, ?, ?, ?, ?, 'completed') """, (url_hash, url, filename, file_path, platform, date_value, date_value)) else: # download_date cursor.execute(""" INSERT INTO downloads (url_hash, url, filename, file_path, platform, download_date, status) VALUES (?, ?, ?, ?, ?, ?, 'completed') """, (url_hash, url, filename, file_path, platform, date_value)) rows_updated = 1 logger.info(f"Created downloads record for {filename} to store {data.date_type}") # Update file timestamps if requested file_update_success = True if data.update_file and Path(file_path).exists(): try: file_update_success = DateHandler.update_file_timestamps(file_path, new_date) if file_update_success: logger.info(f"Updated file timestamps for {filename}") else: logger.warning(f"Failed to update file timestamps for {filename}") except Exception as e: logger.error(f"Error updating file timestamps for {filename}: {e}") file_update_success = False results.append({ "id": file_id, "success": True, "db_rows_updated": rows_updated, "file_updated": file_update_success if data.update_file else None }) success_count += 1 except Exception as e: logger.error(f"Error updating date for file {file_id}: {e}") results.append({"id": file_id, "success": False, "error": str(e)}) failed_count += 1 return { "success": True, "results": results, "success_count": success_count, "failed_count": failed_count } def process_batch_move_background(file_paths: List[str], destination: str, app_state): """Background task to process batch file moves.""" dest_path = Path(destination) if not dest_path.is_absolute(): dest_path = MEDIA_BASE / destination dest_path.mkdir(parents=True, exist_ok=True) moved_count = 0 error_count = 0 for file_path in file_paths: filename = Path(file_path).name try: requested_path = Path(file_path) resolved_path = requested_path.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved_path.relative_to(MEDIA_BASE.resolve()) except ValueError: try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: app_state.websocket_manager.broadcast_sync({ "type": "batch_move_progress", "filename": filename, "success": False, "error": "Access denied" }) except Exception: pass error_count += 1 continue if resolved_path.exists() and resolved_path.is_file(): dest_file = dest_path / resolved_path.name shutil.move(str(resolved_path), str(dest_file)) # Update file_inventory try: app_state.db.update_file_inventory_location( file_path=str(resolved_path), new_location='final', new_file_path=str(dest_file) ) except Exception as e: logger.warning(f"Failed to update file_inventory for {filename}: {e}", module="API") # Update paths in all tables update_file_path_in_all_tables(app_state.db, str(resolved_path), str(dest_file)) try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: app_state.websocket_manager.broadcast_sync({ "type": "batch_move_progress", "filename": filename, "success": True, "destination": str(dest_file) }) except Exception: pass moved_count += 1 else: try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: app_state.websocket_manager.broadcast_sync({ "type": "batch_move_progress", "filename": filename, "success": False, "error": "File not found" }) except Exception: pass error_count += 1 except Exception as e: try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: app_state.websocket_manager.broadcast_sync({ "type": "batch_move_progress", "filename": filename, "success": False, "error": str(e) }) except Exception: pass error_count += 1 # Send completion update try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: app_state.websocket_manager.broadcast_sync({ "type": "batch_move_completed", "moved_count": moved_count, "error_count": error_count, "timestamp": now_iso8601() }) except Exception: pass @router.post("/batch-move") @limiter.limit("10/minute") @handle_exceptions async def batch_move_media( request: Request, background_tasks: BackgroundTasks, current_user: Dict = Depends(require_admin), move_data: BatchMoveRequest = Body(...) ): """ Move multiple media files to a different directory (admin only, async with progress updates). Maximum 500 files per request. """ # Security: Limit batch size to prevent DoS if len(move_data.file_paths) > MAX_BATCH_SIZE: raise ValidationError(f"Batch size exceeds maximum of {MAX_BATCH_SIZE} files") app_state = get_app_state() dest_path = Path(move_data.destination) if not dest_path.is_absolute(): dest_path = MEDIA_BASE / move_data.destination # Use relative_to() for safe path validation (prevents symlink bypass) try: dest_path.resolve().relative_to(MEDIA_BASE.resolve()) except ValueError: raise ValidationError( "Destination must be within media directory", {"destination": move_data.destination} ) # Queue batch move in background background_tasks.add_task( process_batch_move_background, move_data.file_paths, move_data.destination, app_state ) return { "success": True, "processing": True, "file_count": len(move_data.file_paths), "message": "Batch move started, processing in background" } @router.post("/batch-download") @limiter.limit("10/minute") @handle_exceptions async def batch_download_media( request: Request, current_user: Dict = Depends(get_current_user), file_paths: List[str] = Body(...) ): """Create a zip file of selected media files.""" temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.zip') temp_file.close() with zipfile.ZipFile(temp_file.name, 'w', zipfile.ZIP_DEFLATED) as zipf: for file_path in file_paths: try: requested_path = Path(file_path) resolved_path = requested_path.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved_path.relative_to(MEDIA_BASE.resolve()) except ValueError: continue if resolved_path.exists() and resolved_path.is_file(): arcname = resolved_path.name zipf.write(resolved_path, arcname) except Exception: continue return FileResponse( temp_file.name, media_type='application/zip', filename=f'media-{datetime.now().strftime("%Y%m%d-%H%M%S")}.zip' ) # ============================================================================ # FILE STATUS AND LOCATION ENDPOINTS # ============================================================================ @router.post("/check-status") @limiter.limit("1000/minute") @handle_exceptions async def check_file_statuses( request: Request, current_user: Dict = Depends(get_current_user), file_paths: List[str] = Body(..., embed=True) ): """ Check the status of multiple files - returns location (media/review/recycle/deleted). """ import os app_state = get_app_state() results = {} with app_state.db.get_connection() as conn: cursor = conn.cursor() for file_path in file_paths: cursor.execute(''' SELECT location, file_path FROM file_inventory WHERE file_path = ? ''', (file_path,)) row = cursor.fetchone() if row: location = row[0] current_path = row[1] if location == 'final': if os.path.exists(current_path): results[file_path] = {'status': 'media', 'current_path': current_path} else: results[file_path] = {'status': 'missing', 'current_path': current_path} elif location == 'review': if os.path.exists(current_path): results[file_path] = {'status': 'review', 'current_path': current_path} else: results[file_path] = {'status': 'missing', 'current_path': current_path} else: results[file_path] = {'status': location, 'current_path': current_path} else: # Not in file_inventory, check recycle bin cursor.execute(''' SELECT id, original_path FROM recycle_bin WHERE original_path = ? ''', (file_path,)) recycle_row = cursor.fetchone() if recycle_row: results[file_path] = {'status': 'recycle', 'recycle_id': recycle_row[0]} elif os.path.exists(file_path): # File exists on disk but not tracked - treat as media results[file_path] = {'status': 'media', 'current_path': file_path} else: results[file_path] = {'status': 'deleted'} return {"file_statuses": results} @router.post("/move-to-review") @limiter.limit("10/minute") @handle_exceptions async def move_to_review( request: Request, current_user: Dict = Depends(require_admin), file_paths: List[str] = Body(..., embed=True) ): """Move media files to review queue. Requires admin privileges.""" app_state = get_app_state() moved = [] errors = [] for file_path in file_paths: try: source_path = Path(file_path) resolved_path = source_path.resolve() # Use relative_to() for safe path validation (prevents symlink bypass) try: resolved_path.relative_to(MEDIA_BASE.resolve()) except ValueError: errors.append({"file": file_path, "error": "Access denied"}) continue if not resolved_path.exists() or not resolved_path.is_file(): errors.append({"file": file_path, "error": "File not found"}) continue # Maintain folder structure relative to media_base relative_path = resolved_path.relative_to(MEDIA_BASE) dest_path = REVIEW_BASE / relative_path dest_path.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(resolved_path), str(dest_path)) # Update file_inventory try: app_state.db.update_file_inventory_location( file_path=str(resolved_path), new_location='review', new_file_path=str(dest_path) ) except Exception as e: logger.warning(f"Failed to update file_inventory: {e}", module="API") # Update paths in all tables update_file_path_in_all_tables(app_state.db, str(resolved_path), str(dest_path)) moved.append(file_path) except Exception as e: errors.append({"file": file_path, "error": str(e)}) # Broadcast update try: if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager: await app_state.websocket_manager.broadcast({ "type": "move_to_review_completed", "moved_count": len(moved), "error_count": len(errors), "timestamp": now_iso8601() }) except Exception: pass return { "success": True, "moved": moved, "errors": errors, "moved_count": len(moved), "error_count": len(errors) } # ============================================================================ # GALLERY ENDPOINT # ============================================================================ @router.get("/gallery") @limiter.limit("5000/minute") @handle_exceptions async def get_media_gallery( request: Request, current_user: Dict = Depends(get_current_user), platform: Optional[str] = None, source: Optional[str] = None, media_type: str = Query("all", pattern="^(all|image|video)$"), limit: int = Query(50, ge=1, le=500, description="Max items to return (1-500)"), offset: int = Query(0, ge=0, description="Number of items to skip"), face_recognition: Optional[str] = None, sort_by: str = Query("post_date", pattern="^(post_date|download_date|file_size|filename)$"), sort_order: str = Query("desc", pattern="^(asc|desc)$"), date_from: Optional[str] = Query(None, pattern="^\\d{4}-\\d{2}-\\d{2}$", description="Filter by post date from (YYYY-MM-DD)"), date_to: Optional[str] = Query(None, pattern="^\\d{4}-\\d{2}-\\d{2}$", description="Filter by post date to (YYYY-MM-DD)"), size_min: Optional[int] = Query(None, ge=0, description="Minimum file size in bytes"), size_max: Optional[int] = Query(None, ge=0, description="Maximum file size in bytes"), search: Optional[str] = Query(None, max_length=200, description="Search filename"), shuffle: bool = Query(False, description="Shuffle results deterministically"), shuffle_seed: Optional[int] = Query(None, description="Seed for deterministic shuffle"), ): """Get media files for gallery view (database-first).""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() # Build query with filters # Only join face_recognition_scans if filtering by face_recognition (30-40% faster when not filtering) if face_recognition: query = ''' SELECT fi.id, fi.file_path, fi.filename, fi.platform, fi.source, fi.content_type as media_type, fi.file_size, fi.width, fi.height, fi.video_id, COALESCE(d_date.max_download_date, fi.created_date) as download_date, COALESCE(d_post.max_post_date, fi.created_date) as post_date, frs.has_match as face_has_match, frs.matched_person as face_matched_person, frs.confidence as face_confidence, frs.scan_date as face_scan_date FROM file_inventory fi LEFT JOIN ( SELECT filename, MAX(download_date) as max_download_date FROM downloads GROUP BY filename ) d_date ON d_date.filename = fi.filename LEFT JOIN ( SELECT file_path, MAX(post_date) as max_post_date FROM downloads GROUP BY file_path ) d_post ON d_post.file_path = fi.file_path LEFT JOIN face_recognition_scans frs ON frs.file_path = fi.file_path WHERE fi.location = 'final' ''' else: query = ''' SELECT fi.id, fi.file_path, fi.filename, fi.platform, fi.source, fi.content_type as media_type, fi.file_size, fi.width, fi.height, fi.video_id, COALESCE(d_date.max_download_date, fi.created_date) as download_date, COALESCE(d_post.max_post_date, fi.created_date) as post_date, NULL as face_has_match, NULL as face_matched_person, NULL as face_confidence, NULL as face_scan_date FROM file_inventory fi LEFT JOIN ( SELECT filename, MAX(download_date) as max_download_date FROM downloads GROUP BY filename ) d_date ON d_date.filename = fi.filename LEFT JOIN ( SELECT file_path, MAX(post_date) as max_post_date FROM downloads GROUP BY file_path ) d_post ON d_post.file_path = fi.file_path WHERE fi.location = 'final' ''' params = [] if platform: query += ' AND fi.platform = ?' params.append(platform) if source: query += ' AND fi.source = ?' params.append(source) if media_type != "all": query += ' AND fi.content_type = ?' params.append(media_type) if date_from: query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) >= ?' params.append(date_from) if date_to: query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) <= ?' params.append(date_to) if size_min is not None: query += ' AND fi.file_size >= ?' params.append(size_min) if size_max is not None: query += ' AND fi.file_size <= ?' params.append(size_max) if search: search_term = f'%{search}%' query += ' AND (fi.filename LIKE ? OR fi.platform LIKE ? OR fi.source LIKE ? OR fi.content_type LIKE ?)' params.extend([search_term, search_term, search_term, search_term]) # Apply face recognition filter if face_recognition: if face_recognition == 'matched': query += ' AND frs.has_match = 1' elif face_recognition == 'no_match': query += ' AND frs.file_path IS NOT NULL AND frs.has_match = 0' elif face_recognition == 'not_scanned': query += ' AND frs.file_path IS NULL' # Build count query (only join face_recognition_scans if filtering) if face_recognition: count_query = ''' SELECT COUNT(*) FROM file_inventory fi LEFT JOIN ( SELECT file_path, MAX(post_date) as max_post_date FROM downloads GROUP BY file_path ) d_post ON d_post.file_path = fi.file_path LEFT JOIN face_recognition_scans frs ON frs.file_path = fi.file_path WHERE fi.location = 'final' ''' else: count_query = ''' SELECT COUNT(*) FROM file_inventory fi LEFT JOIN ( SELECT file_path, MAX(post_date) as max_post_date FROM downloads GROUP BY file_path ) d_post ON d_post.file_path = fi.file_path WHERE fi.location = 'final' ''' count_params = [] if platform: count_query += ' AND fi.platform = ?' count_params.append(platform) if source: count_query += ' AND fi.source = ?' count_params.append(source) if media_type != "all": count_query += ' AND fi.content_type = ?' count_params.append(media_type) if date_from: count_query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) >= ?' count_params.append(date_from) if date_to: count_query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) <= ?' count_params.append(date_to) if size_min is not None: count_query += ' AND fi.file_size >= ?' count_params.append(size_min) if size_max is not None: count_query += ' AND fi.file_size <= ?' count_params.append(size_max) if search: search_term = f'%{search}%' count_query += ' AND (fi.filename LIKE ? OR fi.platform LIKE ? OR fi.source LIKE ? OR fi.content_type LIKE ?)' count_params.extend([search_term, search_term, search_term, search_term]) if face_recognition: if face_recognition == 'matched': count_query += ' AND frs.has_match = 1' elif face_recognition == 'no_match': count_query += ' AND frs.file_path IS NOT NULL AND frs.has_match = 0' elif face_recognition == 'not_scanned': count_query += ' AND frs.file_path IS NULL' cursor.execute(count_query, count_params) total = cursor.fetchone()[0] # Add sorting if shuffle: # Deterministic shuffle using PostgreSQL md5 hash seed = shuffle_seed if shuffle_seed is not None else 42 query += ' ORDER BY md5(fi.id::text || ?::text), fi.id' params.append(str(seed)) else: field_mapping = { 'post_date': 'post_date', 'download_date': 'download_date', 'file_size': 'fi.file_size', 'filename': 'fi.filename', 'source': 'fi.source', 'platform': 'fi.platform' } db_sort_field = field_mapping.get(sort_by, 'post_date') sort_direction = 'DESC' if sort_order.lower() == 'desc' else 'ASC' query += f' ORDER BY {db_sort_field} {sort_direction}' # Add pagination query += ' LIMIT ? OFFSET ?' params.extend([limit, offset]) cursor.execute(query, params) rows = cursor.fetchall() # Batch fetch dimensions for items missing width/height (avoids N+1 queries) paths_needing_dimensions = [ row['file_path'] for row in rows if row['width'] is None or row['height'] is None ] dimensions_cache = get_media_dimensions_batch(paths_needing_dimensions) if paths_needing_dimensions else {} # Convert to list of dicts media = [] for row in rows: # Use cached dimensions or existing values if row['width'] is not None and row['height'] is not None: width, height = row['width'], row['height'] else: width, height = dimensions_cache.get(row['file_path'], (None, None)) has_face_data = row['face_has_match'] is not None face_recognition_data = { 'scanned': has_face_data, 'matched': bool(row['face_has_match']) if has_face_data else False, 'person_name': row['face_matched_person'] if has_face_data else None, 'confidence': row['face_confidence'] if has_face_data else None, 'scan_date': row['face_scan_date'] if has_face_data else None } fp = row['file_path'] item = { "id": row['id'], "platform": row['platform'], "source": row['source'] or 'unknown', "filename": row['filename'], "file_path": fp, "file_token": encode_path(fp) if fp else None, "file_size": row['file_size'] or 0, "media_type": row['media_type'] or 'image', "download_date": row['download_date'], "post_date": row['post_date'] if row['post_date'] else '', "width": width, "height": height, "video_id": row['video_id'], "face_recognition": face_recognition_data } media.append(item) return { "media": media, "total": total, "limit": limit, "offset": offset } @router.get("/gallery/date-range") @limiter.limit("60/minute") @handle_exceptions async def get_media_gallery_date_range( request: Request, current_user: Dict = Depends(get_current_user), media_type: Optional[str] = Query(None, pattern="^(image|video)$"), ): """Get year/month distribution of media for timeline scrubber.""" app_state = get_app_state() with app_state.db.get_connection() as conn: cursor = conn.cursor() query = ''' SELECT EXTRACT(YEAR FROM COALESCE(d.max_post_date, fi.created_date)::timestamp) as year, EXTRACT(MONTH FROM COALESCE(d.max_post_date, fi.created_date)::timestamp) as month, COUNT(*) as count FROM file_inventory fi LEFT JOIN ( SELECT file_path, MAX(post_date) as max_post_date FROM downloads WHERE status = 'completed' GROUP BY file_path ) d ON fi.file_path = d.file_path WHERE fi.location = 'final' ''' params = [] if media_type == 'image': query += " AND fi.content_type = 'image'" elif media_type == 'video': query += " AND fi.content_type = 'video'" query += ''' GROUP BY year, month ORDER BY year DESC, month DESC ''' cursor.execute(query, params) rows = cursor.fetchall() ranges = [{"year": int(row["year"]), "month": int(row["month"]), "count": row["count"]} for row in rows] return {"ranges": ranges}