Files
media-downloader/web/backend/routers/recycle.py
Todd 523f91788e Fix DB paths, add auth to sensitive endpoints, misc bug fixes
- scheduler.py: Use full path for scheduler_state.db instead of relative name
- recycle.py: Use full path for thumbnails.db instead of relative name
- cloud_backup.py, maintenance.py, stats.py: Require admin for config/cleanup/settings endpoints
- press.py: Add auth to press image serving endpoint
- private_gallery.py: Fix _create_pg_job call and add missing secrets import
- appearances.py: Use sync httpx instead of asyncio.run for background thread HTTP call

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 08:25:00 -04:00

603 lines
19 KiB
Python

"""
Recycle Bin Router
Handles all recycle bin operations:
- List deleted files
- Recycle bin statistics
- Restore files
- Permanently delete files
- Empty recycle bin
- Serve files for preview
- Get file metadata
"""
import hashlib
import json
import mimetypes
import sqlite3
from typing import Dict, Optional
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Body, Query, Request
from fastapi.responses import FileResponse, Response
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
handle_exceptions,
DatabaseError,
RecordNotFoundError,
MediaFileNotFoundError as CustomFileNotFoundError,
FileOperationError
)
from ..core.responses import now_iso8601
from ..core.utils import ThumbnailLRUCache
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/recycle", tags=["Recycle Bin"])
limiter = Limiter(key_func=get_remote_address)
# Global thumbnail memory cache for recycle bin (500 items or 100MB max)
# Using shared ThumbnailLRUCache from core/utils.py
_thumbnail_cache = ThumbnailLRUCache(max_size=500, max_memory_mb=100)
@router.get("/list")
@limiter.limit("100/minute")
@handle_exceptions
async def list_recycle_bin(
request: Request,
current_user: Dict = Depends(get_current_user),
deleted_from: Optional[str] = None,
platform: Optional[str] = None,
source: Optional[str] = None,
search: Optional[str] = None,
media_type: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
size_min: Optional[int] = None,
size_max: Optional[int] = None,
sort_by: str = Query('download_date', pattern='^(deleted_at|file_size|filename|deleted_from|download_date|post_date|confidence)$'),
sort_order: str = Query('desc', pattern='^(asc|desc)$'),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0)
):
"""
List files in recycle bin.
Args:
deleted_from: Filter by source (downloads, media, review)
platform: Filter by platform (instagram, tiktok, etc.)
source: Filter by source/username
search: Search in filename
media_type: Filter by type (image, video)
date_from: Filter by deletion date (YYYY-MM-DD)
date_to: Filter by deletion date (YYYY-MM-DD)
size_min: Minimum file size in bytes
size_max: Maximum file size in bytes
sort_by: Column to sort by
sort_order: Sort direction (asc, desc)
limit: Maximum items to return
offset: Number of items to skip
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
result = db.list_recycle_bin(
deleted_from=deleted_from,
platform=platform,
source=source,
search=search,
media_type=media_type,
date_from=date_from,
date_to=date_to,
size_min=size_min,
size_max=size_max,
sort_by=sort_by,
sort_order=sort_order,
limit=limit,
offset=offset
)
return {
"success": True,
"items": result['items'],
"total": result['total']
}
@router.get("/filters")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_filters(
request: Request,
current_user: Dict = Depends(get_current_user),
platform: Optional[str] = None
):
"""
Get available filter options for recycle bin.
Args:
platform: If provided, only return sources for this platform
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
filters = db.get_recycle_bin_filters(platform=platform)
return {
"success": True,
"platforms": filters['platforms'],
"sources": filters['sources']
}
@router.get("/stats")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_bin_stats(request: Request, current_user: Dict = Depends(get_current_user)):
"""
Get recycle bin statistics.
Returns total count, total size, and breakdown by deleted_from source.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
stats = db.get_recycle_bin_stats()
return {
"success": True,
"stats": stats,
"timestamp": now_iso8601()
}
@router.post("/restore")
@limiter.limit("20/minute")
@handle_exceptions
async def restore_from_recycle(
request: Request,
current_user: Dict = Depends(get_current_user),
recycle_id: str = Body(..., embed=True)
):
"""
Restore a file from recycle bin to its original location.
The file will be moved back to its original path and re-registered
in the file_inventory table.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
success = db.restore_from_recycle_bin(recycle_id)
if success:
# Broadcast update to connected clients
try:
# app_state already retrieved above, use it for websocket broadcast
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_restore_completed",
"recycle_id": recycle_id,
"timestamp": now_iso8601()
})
except Exception:
pass # Broadcasting is optional
logger.info(f"Restored file from recycle bin: {recycle_id}", module="Recycle")
return {
"success": True,
"message": "File restored successfully",
"recycle_id": recycle_id
}
else:
raise FileOperationError(
"Failed to restore file",
{"recycle_id": recycle_id}
)
@router.delete("/delete/{recycle_id}")
@limiter.limit("20/minute")
@handle_exceptions
async def permanently_delete_from_recycle(
request: Request,
recycle_id: str,
current_user: Dict = Depends(require_admin)
):
"""
Permanently delete a file from recycle bin.
**Admin only** - This action cannot be undone. The file will be
removed from disk permanently.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
success = db.permanently_delete_from_recycle_bin(recycle_id)
if success:
# Broadcast update
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_delete_completed",
"recycle_id": recycle_id,
"timestamp": now_iso8601()
})
except Exception:
pass
logger.info(f"Permanently deleted file from recycle: {recycle_id}", module="Recycle")
return {
"success": True,
"message": "File permanently deleted",
"recycle_id": recycle_id
}
else:
raise FileOperationError(
"Failed to delete file",
{"recycle_id": recycle_id}
)
@router.post("/empty")
@limiter.limit("5/minute")
@handle_exceptions
async def empty_recycle_bin(
request: Request,
current_user: Dict = Depends(require_admin), # Require admin for destructive operation
older_than_days: Optional[int] = Body(None, embed=True)
):
"""
Empty recycle bin.
Args:
older_than_days: Only delete files older than X days.
If not specified, all files are deleted.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
deleted_count = db.empty_recycle_bin(older_than_days=older_than_days)
# Broadcast update
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_emptied",
"deleted_count": deleted_count,
"timestamp": now_iso8601()
})
except Exception:
pass
logger.info(f"Emptied recycle bin: {deleted_count} files deleted", module="Recycle")
return {
"success": True,
"deleted_count": deleted_count,
"older_than_days": older_than_days
}
@router.get("/file/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_file(
request: Request,
recycle_id: str,
thumbnail: bool = False,
type: Optional[str] = None,
token: Optional[str] = None,
current_user: Dict = Depends(get_current_user_media)
):
"""
Serve a file from recycle bin for preview.
Args:
recycle_id: ID of the recycle bin record
thumbnail: If True, return a thumbnail instead of the full file
type: Media type hint (image/video)
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
# Get recycle bin record
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute(
'SELECT recycle_path, original_path, original_filename, file_hash FROM recycle_bin WHERE id = ?',
(recycle_id,)
)
row = cursor.fetchone()
if not row:
raise RecordNotFoundError(
"File not found in recycle bin",
{"recycle_id": recycle_id}
)
file_path = Path(row['recycle_path'])
original_path = row['original_path'] # Path where thumbnail was originally cached
if not file_path.exists():
raise CustomFileNotFoundError(
"Physical file not found",
{"path": str(file_path)}
)
# If thumbnail requested, use 3-tier caching
# Use content hash as cache key so thumbnails survive file moves
if thumbnail:
content_hash = row['file_hash']
cache_key = content_hash if content_hash else str(file_path)
# 1. Check in-memory LRU cache first (fastest)
thumbnail_data = _thumbnail_cache.get(cache_key)
if thumbnail_data:
return Response(
content=thumbnail_data,
media_type="image/jpeg",
headers={
"Cache-Control": "public, max-age=86400, immutable",
"Vary": "Accept-Encoding"
}
)
# 2. Get from database cache or generate on-demand
# Pass content hash and original_path for fallback lookup
thumbnail_data = _get_or_create_thumbnail(file_path, type or 'image', content_hash, original_path)
if not thumbnail_data:
raise FileOperationError("Failed to generate thumbnail")
# 3. Add to in-memory cache for faster subsequent requests
_thumbnail_cache.put(cache_key, thumbnail_data)
return Response(
content=thumbnail_data,
media_type="image/jpeg",
headers={
"Cache-Control": "public, max-age=86400, immutable",
"Vary": "Accept-Encoding"
}
)
# Otherwise serve full file
mime_type, _ = mimetypes.guess_type(str(file_path))
if not mime_type:
mime_type = "application/octet-stream"
return FileResponse(
path=str(file_path),
media_type=mime_type,
filename=row['original_filename']
)
@router.get("/metadata/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_metadata(
request: Request,
recycle_id: str,
current_user: Dict = Depends(get_current_user)
):
"""
Get metadata for a recycle bin file.
Returns dimensions, size, platform, source, and other metadata.
This is fetched on-demand for performance.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
# Get recycle bin record
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT recycle_path, original_filename, file_size, original_path, metadata
FROM recycle_bin WHERE id = ?
''', (recycle_id,))
row = cursor.fetchone()
if not row:
raise RecordNotFoundError(
"File not found in recycle bin",
{"recycle_id": recycle_id}
)
recycle_path = Path(row['recycle_path'])
if not recycle_path.exists():
raise CustomFileNotFoundError(
"Physical file not found",
{"path": str(recycle_path)}
)
# Parse metadata for platform/source info
platform, source = None, None
try:
metadata = json.loads(row['metadata']) if row['metadata'] else {}
platform = metadata.get('platform')
source = metadata.get('source')
except Exception:
pass
# Get dimensions dynamically
width, height, duration = _extract_dimensions(recycle_path)
return {
"success": True,
"recycle_id": recycle_id,
"filename": row['original_filename'],
"file_size": row['file_size'],
"platform": platform,
"source": source,
"width": width,
"height": height,
"duration": duration
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _get_or_create_thumbnail(file_path: Path, media_type: str, content_hash: str = None, original_path: str = None) -> Optional[bytes]:
"""
Get or create a thumbnail for a file.
Uses the same caching system as media.py for consistency.
Uses a 2-step lookup for backwards compatibility:
1. Try content hash (new method - survives file moves)
2. Fall back to original_path lookup (legacy thumbnails cached before move)
Args:
file_path: Path to the file (current location in recycle bin)
media_type: 'image' or 'video'
content_hash: Optional content hash (SHA256 of file content) to use for cache lookup.
original_path: Optional original file path before moving to recycle bin.
"""
from PIL import Image
import io
from datetime import datetime
try:
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'thumbnails.db'), timeout=30.0) as conn:
cursor = conn.cursor()
# 1. Try content hash first (new method - survives file moves)
if content_hash:
cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_hash = ?", (content_hash,))
result = cursor.fetchone()
if result:
return result[0]
# 2. Fall back to original_path lookup (legacy thumbnails cached before move)
if original_path:
cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_path = ?", (original_path,))
result = cursor.fetchone()
if result:
return result[0]
except Exception:
pass
# Generate thumbnail
thumbnail_data = None
try:
if media_type == 'video':
# For videos, try to extract a frame
import subprocess
result = subprocess.run([
'ffmpeg', '-i', str(file_path),
'-ss', '00:00:01', '-vframes', '1',
'-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
], capture_output=True, timeout=10)
if result.returncode == 0:
img = Image.open(io.BytesIO(result.stdout))
else:
return None
else:
img = Image.open(file_path)
# Convert to RGB if necessary
if img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
# Create thumbnail
img.thumbnail((300, 300), Image.Resampling.LANCZOS)
# Save to bytes
output = io.BytesIO()
img.save(output, format='JPEG', quality=85)
thumbnail_data = output.getvalue()
# Cache the generated thumbnail
if thumbnail_data:
try:
file_mtime = file_path.stat().st_mtime if file_path.exists() else None
# Compute file_hash if not provided
thumb_file_hash = content_hash if content_hash else hashlib.sha256(str(file_path).encode()).hexdigest()
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'thumbnails.db')) as conn:
conn.execute("""
INSERT OR REPLACE INTO thumbnails
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
VALUES (?, ?, ?, ?, ?)
""", (thumb_file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
conn.commit()
except Exception:
pass # Caching is optional, don't fail if it doesn't work
return thumbnail_data
except Exception as e:
logger.warning(f"Failed to generate thumbnail: {e}", module="Recycle")
return None
def _extract_dimensions(file_path: Path) -> tuple:
"""
Extract dimensions from a media file.
Returns: (width, height, duration)
"""
width, height, duration = None, None, None
file_ext = file_path.suffix.lower()
try:
if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.heic', '.heif']:
from PIL import Image
with Image.open(file_path) as img:
width, height = img.size
elif file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
import subprocess
result = subprocess.run([
'ffprobe', '-v', 'quiet', '-print_format', 'json',
'-show_streams', str(file_path)
], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
data = json.loads(result.stdout)
for stream in data.get('streams', []):
if stream.get('codec_type') == 'video':
width = stream.get('width')
height = stream.get('height')
duration_str = stream.get('duration')
if duration_str:
duration = float(duration_str)
break
except Exception as e:
logger.warning(f"Failed to extract dimensions: {e}", module="Recycle")
return width, height, duration