media-downloader/web/backend/routers/media.py

"""
Media Router

Handles all media file operations:
- Thumbnail generation and caching
- Media preview/serving
- Metadata retrieval
- Gallery listing
- Batch operations (delete, move, download)
- Cache management
"""

import hashlib
import json
import shutil
import sqlite3
import subprocess
import tempfile
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional

from fastapi import APIRouter, BackgroundTasks, Body, Depends, Query, Request
from fastapi.responses import FileResponse, Response
from PIL import Image
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address

from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
    handle_exceptions,
    MediaFileNotFoundError as CustomFileNotFoundError,
    FileOperationError,
    ValidationError
)
from ..core.responses import now_iso8601
from modules.universal_logger import get_logger
from ..core.utils import (
    get_media_dimensions,
    get_media_dimensions_batch,
    validate_file_path,
    generate_image_thumbnail as shared_generate_image_thumbnail,
    generate_video_thumbnail as shared_generate_video_thumbnail,
    get_or_create_thumbnail as shared_get_or_create_thumbnail,
    ThumbnailLRUCache,
    ALLOWED_PATHS
)

logger = get_logger('API')

router = APIRouter(prefix="/api/media", tags=["Media"])
limiter = Limiter(key_func=get_remote_address)

# Use centralized paths from config
MEDIA_BASE = settings.MEDIA_BASE_PATH
REVIEW_BASE = settings.REVIEW_PATH
RECYCLE_BASE = settings.RECYCLE_PATH


# Global thumbnail memory cache (500 items or 100MB max)
# Using shared ThumbnailLRUCache from core/utils.py
_thumbnail_cache = ThumbnailLRUCache(max_size=500, max_memory_mb=100)


# ============================================================================
# PYDANTIC MODELS
# ============================================================================

class BatchMoveRequest(BaseModel):
    file_paths: List[str]
    destination: str


class BatchDeleteRequest(BaseModel):
    file_paths: List[str]


class UpdateDateRequest(BaseModel):
    ids: List[int]  # file_inventory IDs
    new_date: str  # ISO datetime "2024-06-15T14:30:00"
    update_file: bool = True  # Also update filesystem/EXIF timestamps
    date_type: str = "post_date"  # "post_date" or "download_date"


# ============================================================================
# HELPER FUNCTIONS (validate_file_path from core/utils.py)
# ============================================================================

# Thumbnail generation functions are now in core/utils.py (shared across routers).
# Local aliases for backward compatibility within this module:
generate_image_thumbnail = shared_generate_image_thumbnail
generate_video_thumbnail = shared_generate_video_thumbnail
get_or_create_thumbnail = shared_get_or_create_thumbnail


def update_file_path_in_all_tables(db, old_path: str, new_path: str):
    """
    Update file path in all relevant database tables when a file is moved.
    """
    try:
        with db.get_connection(for_write=True) as conn:
            cursor = conn.cursor()

            cursor.execute('UPDATE downloads SET file_path = ? WHERE file_path = ?',
                          (new_path, old_path))
            downloads_updated = cursor.rowcount

            cursor.execute('UPDATE instagram_perceptual_hashes SET file_path = ? WHERE file_path = ?',
                          (new_path, old_path))
            perceptual_updated = cursor.rowcount

            cursor.execute('UPDATE face_recognition_scans SET file_path = ? WHERE file_path = ?',
                          (new_path, old_path))
            face_updated = cursor.rowcount

            try:
                cursor.execute('UPDATE semantic_embeddings SET file_path = ? WHERE file_path = ?',
                              (new_path, old_path))
                embeddings_updated = cursor.rowcount
            except sqlite3.OperationalError:
                embeddings_updated = 0

            conn.commit()

            if downloads_updated or perceptual_updated or face_updated or embeddings_updated:
                logger.debug(
                    f"Updated file paths: downloads={downloads_updated}, "
                    f"perceptual={perceptual_updated}, face={face_updated}, "
                    f"embeddings={embeddings_updated}",
                    module="Database"
                )

    except Exception as e:
        logger.warning(f"Failed to update file paths in tables: {e}", module="Database")

    # Also update thumbnails.db cache (uses path-based hash)
    try:
        thumb_db_path = settings.PROJECT_ROOT / 'database' / 'thumbnails.db'
        old_hash = hashlib.sha256(old_path.encode()).hexdigest()
        new_hash = hashlib.sha256(new_path.encode()).hexdigest()

        with sqlite3.connect(str(thumb_db_path), timeout=10.0) as thumb_conn:
                cursor = thumb_conn.cursor()

                # Get thumbnail data from old path
                cursor.execute("SELECT thumbnail_data, file_mtime FROM thumbnails WHERE file_hash = ?", (old_hash,))
                row = cursor.fetchone()

                if row:
                    thumbnail_data, file_mtime = row
                    # Insert with new hash
                    cursor.execute("""
                        INSERT OR REPLACE INTO thumbnails
                        (file_hash, file_path, thumbnail_data, created_at, file_mtime)
                        VALUES (?, ?, ?, ?, ?)
                    """, (new_hash, new_path, thumbnail_data, now_iso8601(), file_mtime))
                    # Delete old entry
                    cursor.execute("DELETE FROM thumbnails WHERE file_hash = ?", (old_hash,))
                    thumb_conn.commit()
                    logger.debug(f"Migrated thumbnail cache for moved file", module="Database")
    except Exception as e:
        logger.warning(f"Failed to update thumbnail cache: {e}", module="Database")


# ============================================================================
# THUMBNAIL AND PREVIEW ENDPOINTS
# ============================================================================

@router.get("/thumbnail")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_media_thumbnail(
    request: Request,
    file_path: str = None,
    media_type: str = None,
    token: str = None,
    current_user: Dict = Depends(get_current_user_media)
):
    """
    Get or generate thumbnail for media file.

    Uses 3-tier caching:
    1. In-memory LRU cache (fastest, ~500 items)
    2. Thumbnail database (fast, persistent)
    3. Generate on-demand (slowest, for new files)

    Cache key uses content hash (SHA256) so thumbnails survive file moves.

    Args:
        file_path: Path to the media file
        media_type: 'image' or 'video'
    """
    resolved_path = validate_file_path(file_path)

    app_state = get_app_state()

    # Cache key: use path (avoids slow file_inventory DB lookup on every request)
    cache_key = str(resolved_path)

    # 1. Check in-memory LRU cache first (fastest — no disk/DB access)
    thumbnail_data = _thumbnail_cache.get(cache_key)
    if thumbnail_data:
        return Response(
            content=thumbnail_data,
            media_type="image/jpeg",
            headers={
                "Cache-Control": "public, max-age=86400, immutable",
                "Vary": "Accept-Encoding"
            }
        )

    # For videos, check if we have a cached platform thumbnail
    if media_type == 'video':
        try:
            with app_state.db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute('SELECT thumbnail_data FROM video_downloads WHERE file_path = ?',
                             (str(resolved_path),))
                row = cursor.fetchone()
                if row and row['thumbnail_data']:
                    thumbnail_data = row['thumbnail_data']
                    # Add to in-memory cache
                    _thumbnail_cache.put(cache_key, thumbnail_data)
                    return Response(
                        content=thumbnail_data,
                        media_type="image/jpeg",
                        headers={
                            "Cache-Control": "public, max-age=86400, immutable",
                            "Vary": "Accept-Encoding"
                        }
                    )
        except Exception as e:
            logger.debug(f"Error checking cached thumbnail: {e}", module="MediaThumbnail")

    # 2. Get from database cache or generate
    thumbnail_data = get_or_create_thumbnail(resolved_path, media_type)

    if not thumbnail_data:
        raise FileOperationError("Failed to generate thumbnail")

    # Add to in-memory cache for faster subsequent requests
    _thumbnail_cache.put(cache_key, thumbnail_data)

    # Cache thumbnails for 1 day - they don't change often
    # immutable flag tells browsers the content will never change for this URL
    return Response(
        content=thumbnail_data,
        media_type="image/jpeg",
        headers={
            "Cache-Control": "public, max-age=86400, immutable",
            "Vary": "Accept-Encoding"
        }
    )


@router.get("/preview")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_media_preview(
    request: Request,
    file_path: str,
    token: str = None,
    current_user: Dict = Depends(get_current_user_media)
):
    """Serve a media file for preview."""
    resolved_path = validate_file_path(file_path)

    if not resolved_path.exists() or not resolved_path.is_file():
        raise CustomFileNotFoundError("File not found", {"path": str(file_path)})

    # Cache media files for 1 hour - content doesn't change
    return FileResponse(
        str(resolved_path),
        headers={"Cache-Control": "public, max-age=3600"}
    )


@router.get("/metadata")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_media_metadata(
    request: Request,
    file_path: str,
    current_user: Dict = Depends(get_current_user)
):
    """
    Get cached metadata for a media file (resolution, duration, etc.).
    """
    resolved_path = validate_file_path(file_path)

    if not resolved_path.exists() or not resolved_path.is_file():
        raise CustomFileNotFoundError("File not found", {"path": str(file_path)})

    app_state = get_app_state()

    # Get metadata from cache
    metadata_db_path = settings.PROJECT_ROOT / 'database' / 'media_metadata.db'

    file_hash = hashlib.sha256(str(resolved_path).encode()).hexdigest()

    try:
        with sqlite3.connect(str(metadata_db_path)) as conn:
            cursor = conn.execute(
                """SELECT width, height, file_size, duration, format, created_at
                   FROM media_metadata WHERE file_hash = ?""",
                (file_hash,)
            )
            result = cursor.fetchone()
    except Exception:
        result = None

    if result:
        width, height, file_size, duration, format_type, created_at = result
        return {
            "file_path": str(resolved_path),
            "width": width,
            "height": height,
            "file_size": file_size,
            "duration": duration,
            "format": format_type,
            "cached": True,
            "cached_at": created_at
        }

    # Not in metadata cache - try file_inventory first
    width, height, duration = None, None, None

    try:
        with app_state.db.get_connection() as conn:
            cursor = conn.execute(
                """SELECT width, height, file_size, platform, source FROM file_inventory WHERE file_path = ?""",
                (str(resolved_path),)
            )
            inv_result = cursor.fetchone()
            if inv_result and inv_result[0] and inv_result[1]:
                return {
                    "file_path": str(resolved_path),
                    "width": inv_result[0],
                    "height": inv_result[1],
                    "file_size": inv_result[2] or resolved_path.stat().st_size,
                    "platform": inv_result[3],
                    "source": inv_result[4],
                    "cached": True,
                    "source_table": "file_inventory"
                }
    except Exception as e:
        logger.debug(f"Error reading file_inventory cache: {e}", module="MediaInfo")

    # Fall back to dynamic extraction
    file_ext = resolved_path.suffix.lower()

    try:
        if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.heic', '.heif']:
            try:
                with Image.open(str(resolved_path)) as img:
                    width, height = img.size
            except Exception as e:
                logger.debug(f"Error reading image dimensions: {e}", module="MediaInfo")
        elif file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
            # Skip ffprobe fallback for performance - rely on cached dimensions only
            # Videos without cached dimensions will show without width/height
            pass
    except Exception:
        pass

    return {
        "file_path": str(resolved_path),
        "width": width,
        "height": height,
        "duration": duration,
        "file_size": resolved_path.stat().st_size,
        "cached": False
    }


@router.get("/embedded-metadata")
@limiter.limit("1000/minute")
@handle_exceptions
async def get_embedded_metadata(
    request: Request,
    file_path: str,
    current_user: Dict = Depends(get_current_user)
):
    """
    Read descriptive metadata embedded in the actual file.

    For videos: Uses ffprobe to read title, artist, description, comment, date
    For images: Uses exiftool to read EXIF data (ImageDescription, Artist, etc.)

    This is different from /metadata which returns technical info (resolution, duration).
    """
    resolved_path = validate_file_path(file_path)

    if not resolved_path.exists() or not resolved_path.is_file():
        raise CustomFileNotFoundError("File not found", {"path": str(file_path)})

    file_ext = resolved_path.suffix.lower()
    metadata = {
        "file_path": str(resolved_path),
        "title": None,
        "artist": None,
        "description": None,
        "comment": None,
        "date": None,
        "source": None
    }

    try:
        if file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v', '.m4a', '.mp3']:
            # Use ffprobe for video/audio files
            result = subprocess.run(
                ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', str(resolved_path)],
                capture_output=True, text=True, timeout=10
            )
            if result.returncode == 0:
                data = json.loads(result.stdout)
                tags = data.get('format', {}).get('tags', {})

                # ffprobe returns tags in various cases, normalize to lowercase lookup
                tags_lower = {k.lower(): v for k, v in tags.items()}

                metadata['title'] = tags_lower.get('title')
                metadata['artist'] = tags_lower.get('artist') or tags_lower.get('album_artist')
                metadata['description'] = tags_lower.get('description') or tags_lower.get('synopsis')
                metadata['comment'] = tags_lower.get('comment')
                metadata['date'] = tags_lower.get('date') or tags_lower.get('creation_time')

                # Try to extract source URL from comment or purl
                if metadata['comment'] and metadata['comment'].startswith('http'):
                    metadata['source'] = metadata['comment']
                elif tags_lower.get('purl'):
                    metadata['source'] = tags_lower.get('purl')

        elif file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.heic', '.heif']:
            # Use exiftool for images
            result = subprocess.run(
                ['exiftool', '-j', '-ImageDescription', '-XPComment', '-Artist',
                 '-DateTimeOriginal', '-UserComment', '-Caption-Abstract', str(resolved_path)],
                capture_output=True, text=True, timeout=10
            )
            if result.returncode == 0:
                data = json.loads(result.stdout)
                if data and len(data) > 0:
                    exif = data[0]
                    metadata['title'] = exif.get('ImageDescription') or exif.get('Caption-Abstract')
                    metadata['artist'] = exif.get('Artist')
                    metadata['description'] = exif.get('XPComment')
                    metadata['comment'] = exif.get('UserComment')
                    metadata['date'] = exif.get('DateTimeOriginal')

                    # Check if comment contains URL
                    if metadata['comment'] and str(metadata['comment']).startswith('http'):
                        metadata['source'] = metadata['comment']

    except subprocess.TimeoutExpired:
        logger.warning(f"Timeout reading embedded metadata: {file_path}", module="Media")
    except Exception as e:
        logger.warning(f"Error reading embedded metadata: {e}", module="Media")

    return metadata


# ============================================================================
# CACHE MANAGEMENT ENDPOINTS
# ============================================================================

@router.post("/cache/rebuild")
@limiter.limit("5/minute")
@handle_exceptions
async def rebuild_media_cache(
    request: Request,
    current_user: Dict = Depends(get_current_user)
):
    """Trigger thumbnail and metadata cache rebuild."""
    script_path = settings.PROJECT_ROOT / 'modules' / 'thumbnail_cache_builder.py'

    if not script_path.exists():
        raise CustomFileNotFoundError("Cache builder script not found")

    # Run in background
    subprocess.Popen(
        ['/usr/bin/python3', str(script_path)],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        start_new_session=True
    )

    return {
        "success": True,
        "message": "Cache rebuild started in background"
    }


@router.get("/cache/stats")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_cache_stats(
    request: Request,
    current_user: Dict = Depends(get_current_user)
):
    """Get statistics about the media cache."""
    thumb_db_path = settings.PROJECT_ROOT / 'database' / 'thumbnails.db'
    metadata_db_path = settings.PROJECT_ROOT / 'database' / 'media_metadata.db'

    stats = {
        "thumbnails": {
            "exists": True,
            "count": 0,
            "size_bytes": 0
        },
        "metadata": {
            "exists": True,
            "count": 0,
            "size_bytes": 0
        }
    }

    try:
        with sqlite3.connect(str(thumb_db_path)) as conn:
            cursor = conn.execute("SELECT COUNT(*) FROM thumbnails")
            stats["thumbnails"]["count"] = cursor.fetchone()[0]
    except Exception:
        stats["thumbnails"]["exists"] = False

    try:
        with sqlite3.connect(str(metadata_db_path)) as conn:
            cursor = conn.execute("SELECT COUNT(*) FROM media_metadata")
            stats["metadata"]["count"] = cursor.fetchone()[0]
    except Exception:
        stats["metadata"]["exists"] = False

    return stats


# ============================================================================
# BATCH OPERATIONS
# ============================================================================

MAX_BATCH_SIZE = 500  # Maximum files per batch operation


@router.post("/batch-delete")
@limiter.limit("10/minute")
@handle_exceptions
async def batch_delete_media(
    request: Request,
    current_user: Dict = Depends(require_admin),
    file_paths: List[str] = Body(...)
):
    """
    Move multiple media files to recycle bin (admin only).
    Maximum 500 files per request.
    """
    # Security: Limit batch size to prevent DoS
    if len(file_paths) > MAX_BATCH_SIZE:
        raise ValidationError(f"Batch size exceeds maximum of {MAX_BATCH_SIZE} files")

    app_state = get_app_state()
    deleted = []
    errors = []

    for file_path in file_paths:
        try:
            requested_path = Path(file_path)
            resolved_path = requested_path.resolve()

            # Use relative_to() for safe path validation (prevents symlink bypass)
            try:
                resolved_path.relative_to(MEDIA_BASE.resolve())
            except ValueError:
                errors.append({"file": file_path, "error": "Access denied"})
                continue

            if resolved_path.exists() and resolved_path.is_file():
                recycle_id = app_state.db.move_to_recycle_bin(
                    file_path=str(resolved_path),
                    deleted_from='media',
                    deleted_by=current_user.get('sub'),
                    metadata={}
                )
                if recycle_id:
                    deleted.append(file_path)
                else:
                    errors.append({"file": file_path, "error": "Failed to move to recycle bin"})
            else:
                errors.append({"file": file_path, "error": "File not found"})
        except Exception as e:
            errors.append({"file": file_path, "error": str(e)})

    # Broadcast update
    try:
        if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
            await app_state.websocket_manager.broadcast({
                "type": "batch_delete_completed",
                "deleted_count": len(deleted),
                "error_count": len(errors),
                "timestamp": now_iso8601()
            })
    except Exception:
        pass

    return {
        "success": True,
        "deleted": deleted,
        "errors": errors,
        "deleted_count": len(deleted),
        "error_count": len(errors)
    }


@router.post("/update-date")
@limiter.limit("30/minute")
@handle_exceptions
async def update_media_date(
    request: Request,
    data: UpdateDateRequest,
    current_user: Dict = Depends(require_admin)
):
    """
    Update the date (post_date or download_date) for media files.
    Optionally updates file timestamps (EXIF, video metadata, filesystem).
    """
    from modules.date_utils import DateHandler

    app_state = get_app_state()
    results = []
    success_count = 0
    failed_count = 0

    # Parse the new date
    try:
        new_date = datetime.fromisoformat(data.new_date.replace('Z', '+00:00'))
    except ValueError:
        raise ValidationError(f"Invalid date format: {data.new_date}. Use ISO format like 2024-06-15T14:30:00")

    # Validate date_type
    if data.date_type not in ("post_date", "download_date"):
        raise ValidationError(f"Invalid date_type: {data.date_type}. Must be 'post_date' or 'download_date'")

    # Use connection pool for main database access
    with app_state.db.get_connection(for_write=True) as conn:
        cursor = conn.cursor()

        for file_id in data.ids:
            try:
                # Get file info from file_inventory
                cursor.execute("""
                    SELECT id, file_path, filename, platform
                    FROM file_inventory
                    WHERE id = ?
                """, (file_id,))
                row = cursor.fetchone()

                if not row:
                    results.append({"id": file_id, "success": False, "error": "File not found in inventory"})
                    failed_count += 1
                    continue

                file_path = row['file_path']
                filename = row['filename']

                # Update downloads table
                # Use explicit SQL statements instead of f-string interpolation to prevent SQL injection
                date_value = new_date.strftime('%Y-%m-%d %H:%M:%S')
                if data.date_type == "post_date":
                    cursor.execute("""
                        UPDATE downloads
                        SET post_date = ?
                        WHERE file_path = ? OR filename = ?
                    """, (date_value, file_path, filename))
                else:  # download_date (already validated above)
                    cursor.execute("""
                        UPDATE downloads
                        SET download_date = ?
                        WHERE file_path = ? OR filename = ?
                    """, (date_value, file_path, filename))

                rows_updated = cursor.rowcount

                # If no downloads record matched, try by filename only
                if rows_updated == 0:
                    if data.date_type == "post_date":
                        cursor.execute("""
                            UPDATE downloads
                            SET post_date = ?
                            WHERE filename = ?
                        """, (date_value, filename))
                    else:  # download_date
                        cursor.execute("""
                            UPDATE downloads
                            SET download_date = ?
                            WHERE filename = ?
                        """, (date_value, filename))
                    rows_updated = cursor.rowcount

                # If still no match, insert a downloads record so the date is persisted
                if rows_updated == 0:
                    import hashlib
                    platform = row['platform'] or 'unknown'
                    url_hash = hashlib.sha256(file_path.encode()).hexdigest()
                    url = f"file://{file_path}"
                    if data.date_type == "post_date":
                        cursor.execute("""
                            INSERT INTO downloads (url_hash, url, filename, file_path, platform, post_date, download_date, status)
                            VALUES (?, ?, ?, ?, ?, ?, ?, 'completed')
                        """, (url_hash, url, filename, file_path, platform, date_value, date_value))
                    else:  # download_date
                        cursor.execute("""
                            INSERT INTO downloads (url_hash, url, filename, file_path, platform, download_date, status)
                            VALUES (?, ?, ?, ?, ?, ?, 'completed')
                        """, (url_hash, url, filename, file_path, platform, date_value))
                    rows_updated = 1
                    logger.info(f"Created downloads record for {filename} to store {data.date_type}")

                # Update file timestamps if requested
                file_update_success = True
                if data.update_file and Path(file_path).exists():
                    try:
                        file_update_success = DateHandler.update_file_timestamps(file_path, new_date)
                        if file_update_success:
                            logger.info(f"Updated file timestamps for {filename}")
                        else:
                            logger.warning(f"Failed to update file timestamps for {filename}")
                    except Exception as e:
                        logger.error(f"Error updating file timestamps for {filename}: {e}")
                        file_update_success = False

                results.append({
                    "id": file_id,
                    "success": True,
                    "db_rows_updated": rows_updated,
                    "file_updated": file_update_success if data.update_file else None
                })
                success_count += 1

            except Exception as e:
                logger.error(f"Error updating date for file {file_id}: {e}")
                results.append({"id": file_id, "success": False, "error": str(e)})
                failed_count += 1

    return {
        "success": True,
        "results": results,
        "success_count": success_count,
        "failed_count": failed_count
    }


def process_batch_move_background(file_paths: List[str], destination: str, app_state):
    """Background task to process batch file moves."""
    dest_path = Path(destination)

    if not dest_path.is_absolute():
        dest_path = MEDIA_BASE / destination

    dest_path.mkdir(parents=True, exist_ok=True)

    moved_count = 0
    error_count = 0

    for file_path in file_paths:
        filename = Path(file_path).name
        try:
            requested_path = Path(file_path)
            resolved_path = requested_path.resolve()

            # Use relative_to() for safe path validation (prevents symlink bypass)
            try:
                resolved_path.relative_to(MEDIA_BASE.resolve())
            except ValueError:
                try:
                    if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                        app_state.websocket_manager.broadcast_sync({
                            "type": "batch_move_progress",
                            "filename": filename,
                            "success": False,
                            "error": "Access denied"
                        })
                except Exception:
                    pass
                error_count += 1
                continue

            if resolved_path.exists() and resolved_path.is_file():
                dest_file = dest_path / resolved_path.name
                shutil.move(str(resolved_path), str(dest_file))

                # Update file_inventory
                try:
                    app_state.db.update_file_inventory_location(
                        file_path=str(resolved_path),
                        new_location='final',
                        new_file_path=str(dest_file)
                    )
                except Exception as e:
                    logger.warning(f"Failed to update file_inventory for {filename}: {e}", module="API")

                # Update paths in all tables
                update_file_path_in_all_tables(app_state.db, str(resolved_path), str(dest_file))

                try:
                    if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                        app_state.websocket_manager.broadcast_sync({
                            "type": "batch_move_progress",
                            "filename": filename,
                            "success": True,
                            "destination": str(dest_file)
                        })
                except Exception:
                    pass
                moved_count += 1
            else:
                try:
                    if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                        app_state.websocket_manager.broadcast_sync({
                            "type": "batch_move_progress",
                            "filename": filename,
                            "success": False,
                            "error": "File not found"
                        })
                except Exception:
                    pass
                error_count += 1
        except Exception as e:
            try:
                if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                    app_state.websocket_manager.broadcast_sync({
                        "type": "batch_move_progress",
                        "filename": filename,
                        "success": False,
                        "error": str(e)
                    })
            except Exception:
                pass
            error_count += 1

    # Send completion update
    try:
        if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
            app_state.websocket_manager.broadcast_sync({
                "type": "batch_move_completed",
                "moved_count": moved_count,
                "error_count": error_count,
                "timestamp": now_iso8601()
            })
    except Exception:
        pass


@router.post("/batch-move")
@limiter.limit("10/minute")
@handle_exceptions
async def batch_move_media(
    request: Request,
    background_tasks: BackgroundTasks,
    current_user: Dict = Depends(require_admin),
    move_data: BatchMoveRequest = Body(...)
):
    """
    Move multiple media files to a different directory (admin only, async with progress updates).
    Maximum 500 files per request.
    """
    # Security: Limit batch size to prevent DoS
    if len(move_data.file_paths) > MAX_BATCH_SIZE:
        raise ValidationError(f"Batch size exceeds maximum of {MAX_BATCH_SIZE} files")

    app_state = get_app_state()
    dest_path = Path(move_data.destination)

    if not dest_path.is_absolute():
        dest_path = MEDIA_BASE / move_data.destination

    # Use relative_to() for safe path validation (prevents symlink bypass)
    try:
        dest_path.resolve().relative_to(MEDIA_BASE.resolve())
    except ValueError:
        raise ValidationError(
            "Destination must be within media directory",
            {"destination": move_data.destination}
        )

    # Queue batch move in background
    background_tasks.add_task(
        process_batch_move_background,
        move_data.file_paths,
        move_data.destination,
        app_state
    )

    return {
        "success": True,
        "processing": True,
        "file_count": len(move_data.file_paths),
        "message": "Batch move started, processing in background"
    }


@router.post("/batch-download")
@limiter.limit("10/minute")
@handle_exceptions
async def batch_download_media(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    file_paths: List[str] = Body(...)
):
    """Create a zip file of selected media files."""
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
    temp_file.close()

    with zipfile.ZipFile(temp_file.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in file_paths:
            try:
                requested_path = Path(file_path)
                resolved_path = requested_path.resolve()

                # Use relative_to() for safe path validation (prevents symlink bypass)
                try:
                    resolved_path.relative_to(MEDIA_BASE.resolve())
                except ValueError:
                    continue

                if resolved_path.exists() and resolved_path.is_file():
                    arcname = resolved_path.name
                    zipf.write(resolved_path, arcname)
            except Exception:
                continue

    return FileResponse(
        temp_file.name,
        media_type='application/zip',
        filename=f'media-{datetime.now().strftime("%Y%m%d-%H%M%S")}.zip'
    )


# ============================================================================
# FILE STATUS AND LOCATION ENDPOINTS
# ============================================================================

@router.post("/check-status")
@limiter.limit("1000/minute")
@handle_exceptions
async def check_file_statuses(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    file_paths: List[str] = Body(..., embed=True)
):
    """
    Check the status of multiple files - returns location (media/review/recycle/deleted).
    """
    import os
    app_state = get_app_state()
    results = {}

    with app_state.db.get_connection() as conn:
        cursor = conn.cursor()

        for file_path in file_paths:
            cursor.execute('''
                SELECT location, file_path FROM file_inventory WHERE file_path = ?
            ''', (file_path,))
            row = cursor.fetchone()

            if row:
                location = row[0]
                current_path = row[1]

                if location == 'final':
                    if os.path.exists(current_path):
                        results[file_path] = {'status': 'media', 'current_path': current_path}
                    else:
                        results[file_path] = {'status': 'missing', 'current_path': current_path}
                elif location == 'review':
                    if os.path.exists(current_path):
                        results[file_path] = {'status': 'review', 'current_path': current_path}
                    else:
                        results[file_path] = {'status': 'missing', 'current_path': current_path}
                else:
                    results[file_path] = {'status': location, 'current_path': current_path}
            else:
                # Not in file_inventory, check recycle bin
                cursor.execute('''
                    SELECT id, original_path FROM recycle_bin WHERE original_path = ?
                ''', (file_path,))
                recycle_row = cursor.fetchone()

                if recycle_row:
                    results[file_path] = {'status': 'recycle', 'recycle_id': recycle_row[0]}
                elif os.path.exists(file_path):
                    # File exists on disk but not tracked - treat as media
                    results[file_path] = {'status': 'media', 'current_path': file_path}
                else:
                    results[file_path] = {'status': 'deleted'}

    return {"file_statuses": results}


@router.post("/move-to-review")
@limiter.limit("10/minute")
@handle_exceptions
async def move_to_review(
    request: Request,
    current_user: Dict = Depends(require_admin),
    file_paths: List[str] = Body(..., embed=True)
):
    """Move media files to review queue. Requires admin privileges."""
    app_state = get_app_state()
    moved = []
    errors = []

    for file_path in file_paths:
        try:
            source_path = Path(file_path)
            resolved_path = source_path.resolve()

            # Use relative_to() for safe path validation (prevents symlink bypass)
            try:
                resolved_path.relative_to(MEDIA_BASE.resolve())
            except ValueError:
                errors.append({"file": file_path, "error": "Access denied"})
                continue

            if not resolved_path.exists() or not resolved_path.is_file():
                errors.append({"file": file_path, "error": "File not found"})
                continue

            # Maintain folder structure relative to media_base
            relative_path = resolved_path.relative_to(MEDIA_BASE)
            dest_path = REVIEW_BASE / relative_path

            dest_path.parent.mkdir(parents=True, exist_ok=True)
            shutil.move(str(resolved_path), str(dest_path))

            # Update file_inventory
            try:
                app_state.db.update_file_inventory_location(
                    file_path=str(resolved_path),
                    new_location='review',
                    new_file_path=str(dest_path)
                )
            except Exception as e:
                logger.warning(f"Failed to update file_inventory: {e}", module="API")

            # Update paths in all tables
            update_file_path_in_all_tables(app_state.db, str(resolved_path), str(dest_path))

            moved.append(file_path)

        except Exception as e:
            errors.append({"file": file_path, "error": str(e)})

    # Broadcast update
    try:
        if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
            await app_state.websocket_manager.broadcast({
                "type": "move_to_review_completed",
                "moved_count": len(moved),
                "error_count": len(errors),
                "timestamp": now_iso8601()
            })
    except Exception:
        pass

    return {
        "success": True,
        "moved": moved,
        "errors": errors,
        "moved_count": len(moved),
        "error_count": len(errors)
    }


# ============================================================================
# GALLERY ENDPOINT
# ============================================================================

@router.get("/gallery")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_media_gallery(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    platform: Optional[str] = None,
    source: Optional[str] = None,
    media_type: str = Query("all", pattern="^(all|image|video)$"),
    limit: int = Query(50, ge=1, le=500, description="Max items to return (1-500)"),
    offset: int = Query(0, ge=0, description="Number of items to skip"),
    face_recognition: Optional[str] = None,
    sort_by: str = Query("post_date", pattern="^(post_date|download_date|file_size|filename)$"),
    sort_order: str = Query("desc", pattern="^(asc|desc)$"),
    date_from: Optional[str] = Query(None, pattern="^\\d{4}-\\d{2}-\\d{2}$", description="Filter by post date from (YYYY-MM-DD)"),
    date_to: Optional[str] = Query(None, pattern="^\\d{4}-\\d{2}-\\d{2}$", description="Filter by post date to (YYYY-MM-DD)"),
    size_min: Optional[int] = Query(None, ge=0, description="Minimum file size in bytes"),
    size_max: Optional[int] = Query(None, ge=0, description="Maximum file size in bytes"),
    search: Optional[str] = Query(None, max_length=200, description="Search filename"),
    shuffle: bool = Query(False, description="Shuffle results deterministically"),
    shuffle_seed: Optional[int] = Query(None, description="Seed for deterministic shuffle"),
):
    """Get media files for gallery view (database-first)."""
    app_state = get_app_state()

    with app_state.db.get_connection() as conn:
        cursor = conn.cursor()

        # Build query with filters
        # Only join face_recognition_scans if filtering by face_recognition (30-40% faster when not filtering)
        if face_recognition:
            query = '''
                SELECT
                    fi.id,
                    fi.file_path,
                    fi.filename,
                    fi.platform,
                    fi.source,
                    fi.content_type as media_type,
                    fi.file_size,
                    fi.width,
                    fi.height,
                    fi.video_id,
                    COALESCE(d_date.max_download_date, fi.created_date) as download_date,
                    COALESCE(d_post.max_post_date, fi.created_date) as post_date,
                    frs.has_match as face_has_match,
                    frs.matched_person as face_matched_person,
                    frs.confidence as face_confidence,
                    frs.scan_date as face_scan_date
                FROM file_inventory fi
                LEFT JOIN (
                    SELECT filename, MAX(download_date) as max_download_date
                    FROM downloads GROUP BY filename
                ) d_date ON d_date.filename = fi.filename
                LEFT JOIN (
                    SELECT file_path, MAX(post_date) as max_post_date
                    FROM downloads GROUP BY file_path
                ) d_post ON d_post.file_path = fi.file_path
                LEFT JOIN face_recognition_scans frs ON frs.file_path = fi.file_path
                WHERE fi.location = 'final'
            '''
        else:
            query = '''
                SELECT
                    fi.id,
                    fi.file_path,
                    fi.filename,
                    fi.platform,
                    fi.source,
                    fi.content_type as media_type,
                    fi.file_size,
                    fi.width,
                    fi.height,
                    fi.video_id,
                    COALESCE(d_date.max_download_date, fi.created_date) as download_date,
                    COALESCE(d_post.max_post_date, fi.created_date) as post_date,
                    NULL as face_has_match,
                    NULL as face_matched_person,
                    NULL as face_confidence,
                    NULL as face_scan_date
                FROM file_inventory fi
                LEFT JOIN (
                    SELECT filename, MAX(download_date) as max_download_date
                    FROM downloads GROUP BY filename
                ) d_date ON d_date.filename = fi.filename
                LEFT JOIN (
                    SELECT file_path, MAX(post_date) as max_post_date
                    FROM downloads GROUP BY file_path
                ) d_post ON d_post.file_path = fi.file_path
                WHERE fi.location = 'final'
            '''
        params = []

        if platform:
            query += ' AND fi.platform = ?'
            params.append(platform)

        if source:
            query += ' AND fi.source = ?'
            params.append(source)

        if media_type != "all":
            query += ' AND fi.content_type = ?'
            params.append(media_type)

        if date_from:
            query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) >= ?'
            params.append(date_from)

        if date_to:
            query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) <= ?'
            params.append(date_to)

        if size_min is not None:
            query += ' AND fi.file_size >= ?'
            params.append(size_min)

        if size_max is not None:
            query += ' AND fi.file_size <= ?'
            params.append(size_max)

        if search:
            search_term = f'%{search}%'
            query += ' AND (fi.filename LIKE ? OR fi.platform LIKE ? OR fi.source LIKE ? OR fi.content_type LIKE ?)'
            params.extend([search_term, search_term, search_term, search_term])

        # Apply face recognition filter
        if face_recognition:
            if face_recognition == 'matched':
                query += ' AND frs.has_match = 1'
            elif face_recognition == 'no_match':
                query += ' AND frs.file_path IS NOT NULL AND frs.has_match = 0'
            elif face_recognition == 'not_scanned':
                query += ' AND frs.file_path IS NULL'

        # Build count query (only join face_recognition_scans if filtering)
        if face_recognition:
            count_query = '''
                SELECT COUNT(*)
                FROM file_inventory fi
                LEFT JOIN (
                    SELECT file_path, MAX(post_date) as max_post_date
                    FROM downloads GROUP BY file_path
                ) d_post ON d_post.file_path = fi.file_path
                LEFT JOIN face_recognition_scans frs ON frs.file_path = fi.file_path
                WHERE fi.location = 'final'
            '''
        else:
            count_query = '''
                SELECT COUNT(*)
                FROM file_inventory fi
                LEFT JOIN (
                    SELECT file_path, MAX(post_date) as max_post_date
                    FROM downloads GROUP BY file_path
                ) d_post ON d_post.file_path = fi.file_path
                WHERE fi.location = 'final'
            '''
        count_params = []

        if platform:
            count_query += ' AND fi.platform = ?'
            count_params.append(platform)

        if source:
            count_query += ' AND fi.source = ?'
            count_params.append(source)

        if media_type != "all":
            count_query += ' AND fi.content_type = ?'
            count_params.append(media_type)

        if date_from:
            count_query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) >= ?'
            count_params.append(date_from)

        if date_to:
            count_query += ' AND DATE(COALESCE(d_post.max_post_date, fi.created_date)) <= ?'
            count_params.append(date_to)

        if size_min is not None:
            count_query += ' AND fi.file_size >= ?'
            count_params.append(size_min)

        if size_max is not None:
            count_query += ' AND fi.file_size <= ?'
            count_params.append(size_max)

        if search:
            search_term = f'%{search}%'
            count_query += ' AND (fi.filename LIKE ? OR fi.platform LIKE ? OR fi.source LIKE ? OR fi.content_type LIKE ?)'
            count_params.extend([search_term, search_term, search_term, search_term])

        if face_recognition:
            if face_recognition == 'matched':
                count_query += ' AND frs.has_match = 1'
            elif face_recognition == 'no_match':
                count_query += ' AND frs.file_path IS NOT NULL AND frs.has_match = 0'
            elif face_recognition == 'not_scanned':
                count_query += ' AND frs.file_path IS NULL'

        cursor.execute(count_query, count_params)
        total = cursor.fetchone()[0]

        # Add sorting
        if shuffle:
            # Deterministic shuffle using PostgreSQL md5 hash
            seed = shuffle_seed if shuffle_seed is not None else 42
            query += ' ORDER BY md5(fi.id::text || ?::text), fi.id'
            params.append(str(seed))
        else:
            field_mapping = {
                'post_date': 'post_date',
                'download_date': 'download_date',
                'file_size': 'fi.file_size',
                'filename': 'fi.filename',
                'source': 'fi.source',
                'platform': 'fi.platform'
            }

            db_sort_field = field_mapping.get(sort_by, 'post_date')
            sort_direction = 'DESC' if sort_order.lower() == 'desc' else 'ASC'

            query += f' ORDER BY {db_sort_field} {sort_direction}'

        # Add pagination
        query += ' LIMIT ? OFFSET ?'
        params.extend([limit, offset])

        cursor.execute(query, params)
        rows = cursor.fetchall()

    # Batch fetch dimensions for items missing width/height (avoids N+1 queries)
    paths_needing_dimensions = [
        row['file_path'] for row in rows
        if row['width'] is None or row['height'] is None
    ]
    dimensions_cache = get_media_dimensions_batch(paths_needing_dimensions) if paths_needing_dimensions else {}

    # Convert to list of dicts
    media = []
    for row in rows:
        # Use cached dimensions or existing values
        if row['width'] is not None and row['height'] is not None:
            width, height = row['width'], row['height']
        else:
            width, height = dimensions_cache.get(row['file_path'], (None, None))

        has_face_data = row['face_has_match'] is not None
        face_recognition_data = {
            'scanned': has_face_data,
            'matched': bool(row['face_has_match']) if has_face_data else False,
            'person_name': row['face_matched_person'] if has_face_data else None,
            'confidence': row['face_confidence'] if has_face_data else None,
            'scan_date': row['face_scan_date'] if has_face_data else None
        }

        item = {
            "id": row['id'],
            "platform": row['platform'],
            "source": row['source'] or 'unknown',
            "filename": row['filename'],
            "file_path": row['file_path'],
            "file_size": row['file_size'] or 0,
            "media_type": row['media_type'] or 'image',
            "download_date": row['download_date'],
            "post_date": row['post_date'] if row['post_date'] else '',
            "width": width,
            "height": height,
            "video_id": row['video_id'],
            "face_recognition": face_recognition_data
        }
        media.append(item)

    return {
        "media": media,
        "total": total,
        "limit": limit,
        "offset": offset
    }


@router.get("/gallery/date-range")
@limiter.limit("60/minute")
@handle_exceptions
async def get_media_gallery_date_range(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    media_type: Optional[str] = Query(None, pattern="^(image|video)$"),
):
    """Get year/month distribution of media for timeline scrubber."""
    app_state = get_app_state()

    with app_state.db.get_connection() as conn:
        cursor = conn.cursor()

        query = '''
            SELECT
                EXTRACT(YEAR FROM COALESCE(d.max_post_date, fi.created_date)::timestamp) as year,
                EXTRACT(MONTH FROM COALESCE(d.max_post_date, fi.created_date)::timestamp) as month,
                COUNT(*) as count
            FROM file_inventory fi
            LEFT JOIN (
                SELECT file_path, MAX(post_date) as max_post_date
                FROM downloads
                WHERE status = 'completed'
                GROUP BY file_path
            ) d ON fi.file_path = d.file_path
            WHERE fi.location = 'final'
        '''
        params = []

        if media_type == 'image':
            query += " AND fi.content_type = 'image'"
        elif media_type == 'video':
            query += " AND fi.content_type = 'video'"

        query += '''
            GROUP BY year, month
            ORDER BY year DESC, month DESC
        '''

        cursor.execute(query, params)
        rows = cursor.fetchall()

    ranges = [{"year": int(row["year"]), "month": int(row["month"]), "count": row["count"]} for row in rows]
    return {"ranges": ranges}