media-downloader/web/backend/routers/recycle.py

"""
Recycle Bin Router

Handles all recycle bin operations:
- List deleted files
- Recycle bin statistics
- Restore files
- Permanently delete files
- Empty recycle bin
- Serve files for preview
- Get file metadata
"""

import hashlib
import json
import mimetypes
import sqlite3
from typing import Dict, Optional
from pathlib import Path

from fastapi import APIRouter, Depends, HTTPException, Body, Query, Request
from fastapi.responses import FileResponse, Response
from slowapi import Limiter
from slowapi.util import get_remote_address

from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
    handle_exceptions,
    DatabaseError,
    RecordNotFoundError,
    MediaFileNotFoundError as CustomFileNotFoundError,
    FileOperationError
)
from ..core.responses import now_iso8601
from ..core.utils import ThumbnailLRUCache
from modules.universal_logger import get_logger

logger = get_logger('API')

router = APIRouter(prefix="/api/recycle", tags=["Recycle Bin"])
limiter = Limiter(key_func=get_remote_address)

# Global thumbnail memory cache for recycle bin (500 items or 100MB max)
# Using shared ThumbnailLRUCache from core/utils.py
_thumbnail_cache = ThumbnailLRUCache(max_size=500, max_memory_mb=100)


@router.get("/list")
@limiter.limit("100/minute")
@handle_exceptions
async def list_recycle_bin(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    deleted_from: Optional[str] = None,
    platform: Optional[str] = None,
    source: Optional[str] = None,
    search: Optional[str] = None,
    media_type: Optional[str] = None,
    date_from: Optional[str] = None,
    date_to: Optional[str] = None,
    size_min: Optional[int] = None,
    size_max: Optional[int] = None,
    sort_by: str = Query('download_date', pattern='^(deleted_at|file_size|filename|deleted_from|download_date|post_date|confidence)$'),
    sort_order: str = Query('desc', pattern='^(asc|desc)$'),
    limit: int = Query(100, ge=1, le=1000),
    offset: int = Query(0, ge=0)
):
    """
    List files in recycle bin.

    Args:
        deleted_from: Filter by source (downloads, media, review)
        platform: Filter by platform (instagram, tiktok, etc.)
        source: Filter by source/username
        search: Search in filename
        media_type: Filter by type (image, video)
        date_from: Filter by deletion date (YYYY-MM-DD)
        date_to: Filter by deletion date (YYYY-MM-DD)
        size_min: Minimum file size in bytes
        size_max: Maximum file size in bytes
        sort_by: Column to sort by
        sort_order: Sort direction (asc, desc)
        limit: Maximum items to return
        offset: Number of items to skip
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    result = db.list_recycle_bin(
        deleted_from=deleted_from,
        platform=platform,
        source=source,
        search=search,
        media_type=media_type,
        date_from=date_from,
        date_to=date_to,
        size_min=size_min,
        size_max=size_max,
        sort_by=sort_by,
        sort_order=sort_order,
        limit=limit,
        offset=offset
    )

    return {
        "success": True,
        "items": result['items'],
        "total": result['total']
    }


@router.get("/filters")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_filters(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    platform: Optional[str] = None
):
    """
    Get available filter options for recycle bin.

    Args:
        platform: If provided, only return sources for this platform
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    filters = db.get_recycle_bin_filters(platform=platform)

    return {
        "success": True,
        "platforms": filters['platforms'],
        "sources": filters['sources']
    }


@router.get("/stats")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_bin_stats(request: Request, current_user: Dict = Depends(get_current_user)):
    """
    Get recycle bin statistics.

    Returns total count, total size, and breakdown by deleted_from source.
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    stats = db.get_recycle_bin_stats()

    return {
        "success": True,
        "stats": stats,
        "timestamp": now_iso8601()
    }


@router.post("/restore")
@limiter.limit("20/minute")
@handle_exceptions
async def restore_from_recycle(
    request: Request,
    current_user: Dict = Depends(get_current_user),
    recycle_id: str = Body(..., embed=True)
):
    """
    Restore a file from recycle bin to its original location.

    The file will be moved back to its original path and re-registered
    in the file_inventory table.
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    success = db.restore_from_recycle_bin(recycle_id)

    if success:
        # Broadcast update to connected clients
        try:
            # app_state already retrieved above, use it for websocket broadcast
            if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                await app_state.websocket_manager.broadcast({
                    "type": "recycle_restore_completed",
                    "recycle_id": recycle_id,
                    "timestamp": now_iso8601()
                })
        except Exception:
            pass  # Broadcasting is optional

        logger.info(f"Restored file from recycle bin: {recycle_id}", module="Recycle")
        return {
            "success": True,
            "message": "File restored successfully",
            "recycle_id": recycle_id
        }
    else:
        raise FileOperationError(
            "Failed to restore file",
            {"recycle_id": recycle_id}
        )


@router.delete("/delete/{recycle_id}")
@limiter.limit("20/minute")
@handle_exceptions
async def permanently_delete_from_recycle(
    request: Request,
    recycle_id: str,
    current_user: Dict = Depends(require_admin)
):
    """
    Permanently delete a file from recycle bin.

    **Admin only** - This action cannot be undone. The file will be
    removed from disk permanently.
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    success = db.permanently_delete_from_recycle_bin(recycle_id)

    if success:
        # Broadcast update
        try:
            if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
                await app_state.websocket_manager.broadcast({
                    "type": "recycle_delete_completed",
                    "recycle_id": recycle_id,
                    "timestamp": now_iso8601()
                })
        except Exception:
            pass

        logger.info(f"Permanently deleted file from recycle: {recycle_id}", module="Recycle")
        return {
            "success": True,
            "message": "File permanently deleted",
            "recycle_id": recycle_id
        }
    else:
        raise FileOperationError(
            "Failed to delete file",
            {"recycle_id": recycle_id}
        )


@router.post("/empty")
@limiter.limit("5/minute")
@handle_exceptions
async def empty_recycle_bin(
    request: Request,
    current_user: Dict = Depends(require_admin),  # Require admin for destructive operation
    older_than_days: Optional[int] = Body(None, embed=True)
):
    """
    Empty recycle bin.

    Args:
        older_than_days: Only delete files older than X days.
                         If not specified, all files are deleted.
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    deleted_count = db.empty_recycle_bin(older_than_days=older_than_days)

    # Broadcast update
    try:
        if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
            await app_state.websocket_manager.broadcast({
                "type": "recycle_emptied",
                "deleted_count": deleted_count,
                "timestamp": now_iso8601()
            })
    except Exception:
        pass

    logger.info(f"Emptied recycle bin: {deleted_count} files deleted", module="Recycle")
    return {
        "success": True,
        "deleted_count": deleted_count,
        "older_than_days": older_than_days
    }


@router.get("/file/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_file(
    request: Request,
    recycle_id: str,
    thumbnail: bool = False,
    type: Optional[str] = None,
    token: Optional[str] = None,
    current_user: Dict = Depends(get_current_user_media)
):
    """
    Serve a file from recycle bin for preview.

    Args:
        recycle_id: ID of the recycle bin record
        thumbnail: If True, return a thumbnail instead of the full file
        type: Media type hint (image/video)
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    # Get recycle bin record
    with db.get_connection() as conn:
        cursor = conn.cursor()
        cursor.execute(
            'SELECT recycle_path, original_path, original_filename, file_hash FROM recycle_bin WHERE id = ?',
            (recycle_id,)
        )
        row = cursor.fetchone()

    if not row:
        raise RecordNotFoundError(
            "File not found in recycle bin",
            {"recycle_id": recycle_id}
        )

    file_path = Path(row['recycle_path'])
    original_path = row['original_path']  # Path where thumbnail was originally cached
    if not file_path.exists():
        raise CustomFileNotFoundError(
            "Physical file not found",
            {"path": str(file_path)}
        )

    # If thumbnail requested, use 3-tier caching
    # Use content hash as cache key so thumbnails survive file moves
    if thumbnail:
        content_hash = row['file_hash']
        cache_key = content_hash if content_hash else str(file_path)

        # 1. Check in-memory LRU cache first (fastest)
        thumbnail_data = _thumbnail_cache.get(cache_key)
        if thumbnail_data:
            return Response(
                content=thumbnail_data,
                media_type="image/jpeg",
                headers={
                    "Cache-Control": "public, max-age=86400, immutable",
                    "Vary": "Accept-Encoding"
                }
            )

        # 2. Get from database cache or generate on-demand
        # Pass content hash and original_path for fallback lookup
        thumbnail_data = _get_or_create_thumbnail(file_path, type or 'image', content_hash, original_path)
        if not thumbnail_data:
            raise FileOperationError("Failed to generate thumbnail")

        # 3. Add to in-memory cache for faster subsequent requests
        _thumbnail_cache.put(cache_key, thumbnail_data)

        return Response(
            content=thumbnail_data,
            media_type="image/jpeg",
            headers={
                "Cache-Control": "public, max-age=86400, immutable",
                "Vary": "Accept-Encoding"
            }
        )

    # Otherwise serve full file
    mime_type, _ = mimetypes.guess_type(str(file_path))
    if not mime_type:
        mime_type = "application/octet-stream"

    return FileResponse(
        path=str(file_path),
        media_type=mime_type,
        filename=row['original_filename']
    )


@router.get("/metadata/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_metadata(
    request: Request,
    recycle_id: str,
    current_user: Dict = Depends(get_current_user)
):
    """
    Get metadata for a recycle bin file.

    Returns dimensions, size, platform, source, and other metadata.
    This is fetched on-demand for performance.
    """
    app_state = get_app_state()
    db = app_state.db

    if not db:
        raise DatabaseError("Database not initialized")

    # Get recycle bin record
    with db.get_connection() as conn:
        cursor = conn.cursor()
        cursor.execute('''
            SELECT recycle_path, original_filename, file_size, original_path, metadata
            FROM recycle_bin WHERE id = ?
        ''', (recycle_id,))
        row = cursor.fetchone()

    if not row:
        raise RecordNotFoundError(
            "File not found in recycle bin",
            {"recycle_id": recycle_id}
        )

    recycle_path = Path(row['recycle_path'])
    if not recycle_path.exists():
        raise CustomFileNotFoundError(
            "Physical file not found",
            {"path": str(recycle_path)}
        )

    # Parse metadata for platform/source info
    platform, source = None, None
    try:
        metadata = json.loads(row['metadata']) if row['metadata'] else {}
        platform = metadata.get('platform')
        source = metadata.get('source')
    except Exception:
        pass

    # Get dimensions dynamically
    width, height, duration = _extract_dimensions(recycle_path)

    return {
        "success": True,
        "recycle_id": recycle_id,
        "filename": row['original_filename'],
        "file_size": row['file_size'],
        "platform": platform,
        "source": source,
        "width": width,
        "height": height,
        "duration": duration
    }


# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def _get_or_create_thumbnail(file_path: Path, media_type: str, content_hash: str = None, original_path: str = None) -> Optional[bytes]:
    """
    Get or create a thumbnail for a file.
    Uses the same caching system as media.py for consistency.

    Uses a 2-step lookup for backwards compatibility:
    1. Try content hash (new method - survives file moves)
    2. Fall back to original_path lookup (legacy thumbnails cached before move)

    Args:
        file_path: Path to the file (current location in recycle bin)
        media_type: 'image' or 'video'
        content_hash: Optional content hash (SHA256 of file content) to use for cache lookup.
        original_path: Optional original file path before moving to recycle bin.
    """
    from PIL import Image
    import io
    from datetime import datetime

    try:
        with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'thumbnails.db'), timeout=30.0) as conn:
            cursor = conn.cursor()

            # 1. Try content hash first (new method - survives file moves)
            if content_hash:
                cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_hash = ?", (content_hash,))
                result = cursor.fetchone()
                if result:
                    return result[0]

            # 2. Fall back to original_path lookup (legacy thumbnails cached before move)
            if original_path:
                cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_path = ?", (original_path,))
                result = cursor.fetchone()
                if result:
                    return result[0]
    except Exception:
        pass

    # Generate thumbnail
    thumbnail_data = None
    try:
        if media_type == 'video':
            # For videos, try to extract a frame
            import subprocess
            result = subprocess.run([
                'ffmpeg', '-i', str(file_path),
                '-ss', '00:00:01', '-vframes', '1',
                '-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
            ], capture_output=True, timeout=10)

            if result.returncode == 0:
                img = Image.open(io.BytesIO(result.stdout))
            else:
                return None
        else:
            img = Image.open(file_path)

        # Convert to RGB if necessary
        if img.mode in ('RGBA', 'P'):
            img = img.convert('RGB')

        # Create thumbnail
        img.thumbnail((300, 300), Image.Resampling.LANCZOS)

        # Save to bytes
        output = io.BytesIO()
        img.save(output, format='JPEG', quality=85)
        thumbnail_data = output.getvalue()

        # Cache the generated thumbnail
        if thumbnail_data:
            try:
                file_mtime = file_path.stat().st_mtime if file_path.exists() else None
                # Compute file_hash if not provided
                thumb_file_hash = content_hash if content_hash else hashlib.sha256(str(file_path).encode()).hexdigest()
                with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'thumbnails.db')) as conn:
                    conn.execute("""
                        INSERT OR REPLACE INTO thumbnails
                        (file_hash, file_path, thumbnail_data, created_at, file_mtime)
                        VALUES (?, ?, ?, ?, ?)
                    """, (thumb_file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
                    conn.commit()
            except Exception:
                pass  # Caching is optional, don't fail if it doesn't work

        return thumbnail_data

    except Exception as e:
        logger.warning(f"Failed to generate thumbnail: {e}", module="Recycle")
        return None


def _extract_dimensions(file_path: Path) -> tuple:
    """
    Extract dimensions from a media file.

    Returns: (width, height, duration)
    """
    width, height, duration = None, None, None
    file_ext = file_path.suffix.lower()

    try:
        if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.heic', '.heif']:
            from PIL import Image
            with Image.open(file_path) as img:
                width, height = img.size

        elif file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
            import subprocess
            result = subprocess.run([
                'ffprobe', '-v', 'quiet', '-print_format', 'json',
                '-show_streams', str(file_path)
            ], capture_output=True, text=True, timeout=10)

            if result.returncode == 0:
                data = json.loads(result.stdout)
                for stream in data.get('streams', []):
                    if stream.get('codec_type') == 'video':
                        width = stream.get('width')
                        height = stream.get('height')
                        duration_str = stream.get('duration')
                        if duration_str:
                            duration = float(duration_str)
                        break

    except Exception as e:
        logger.warning(f"Failed to extract dimensions: {e}", module="Recycle")

    return width, height, duration