Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
"""
Router module exports
All API routers for the media-downloader backend.
Import routers from here to include them in the main app.
Usage:
from web.backend.routers import (
auth_router,
health_router,
downloads_router,
media_router,
recycle_router,
scheduler_router,
video_router,
config_router,
review_router,
face_router,
platforms_router,
discovery_router,
scrapers_router,
semantic_router,
manual_import_router,
stats_router
)
app.include_router(auth_router)
app.include_router(health_router)
# ... etc
"""
from .auth import router as auth_router
from .health import router as health_router
from .downloads import router as downloads_router
from .media import router as media_router
from .recycle import router as recycle_router
from .scheduler import router as scheduler_router
from .video import router as video_router
from .config import router as config_router
from .review import router as review_router
from .face import router as face_router
from .platforms import router as platforms_router
from .discovery import router as discovery_router
from .scrapers import router as scrapers_router
from .semantic import router as semantic_router
from .manual_import import router as manual_import_router
from .stats import router as stats_router
from .celebrity import router as celebrity_router
from .video_queue import router as video_queue_router
from .maintenance import router as maintenance_router
from .files import router as files_router
from .appearances import router as appearances_router
from .easynews import router as easynews_router
from .dashboard import router as dashboard_router
from .paid_content import router as paid_content_router
from .private_gallery import router as private_gallery_router
from .instagram_unified import router as instagram_unified_router
from .cloud_backup import router as cloud_backup_router
from .press import router as press_router
__all__ = [
'auth_router',
'health_router',
'downloads_router',
'media_router',
'recycle_router',
'scheduler_router',
'video_router',
'config_router',
'review_router',
'face_router',
'platforms_router',
'discovery_router',
'scrapers_router',
'semantic_router',
'manual_import_router',
'stats_router',
'celebrity_router',
'video_queue_router',
'maintenance_router',
'files_router',
'appearances_router',
'easynews_router',
'dashboard_router',
'paid_content_router',
'private_gallery_router',
'instagram_unified_router',
'cloud_backup_router',
'press_router',
]

File diff suppressed because it is too large Load Diff

217
web/backend/routers/auth.py Normal file
View File

@@ -0,0 +1,217 @@
"""
Authentication Router
Handles all authentication-related endpoints:
- Login/Logout
- User info
- Password changes
- User preferences
"""
import sqlite3
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException, Body, Request
from fastapi.responses import JSONResponse
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.config import settings
from ..core.exceptions import AuthError, handle_exceptions
from ..core.responses import to_iso8601, now_iso8601
from ..models.api_models import LoginRequest, ChangePasswordRequest, UserPreferences
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/auth", tags=["Authentication"])
# Rate limiter - will be set from main app
limiter = Limiter(key_func=get_remote_address)
@router.post("/login")
@limiter.limit("5/minute")
@handle_exceptions
async def login(login_data: LoginRequest, request: Request):
"""
Authenticate user with username and password.
Returns JWT token or 2FA challenge if 2FA is enabled.
"""
app_state = get_app_state()
if not app_state.auth:
raise HTTPException(status_code=500, detail="Authentication not initialized")
# Query user from database
with sqlite3.connect(app_state.auth.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT password_hash, role, is_active, totp_enabled, duo_enabled, passkey_enabled
FROM users WHERE username = ?
""", (login_data.username,))
row = cursor.fetchone()
if not row:
raise HTTPException(status_code=401, detail="Invalid credentials")
password_hash, role, is_active, totp_enabled, duo_enabled, passkey_enabled = row
if not is_active:
raise HTTPException(status_code=401, detail="Account is inactive")
if not app_state.auth.verify_password(login_data.password, password_hash):
app_state.auth._record_login_attempt(login_data.username, False)
raise HTTPException(status_code=401, detail="Invalid credentials")
# Check if user has any 2FA methods enabled
available_methods = []
if totp_enabled:
available_methods.append('totp')
if passkey_enabled:
available_methods.append('passkey')
if duo_enabled:
available_methods.append('duo')
# If user has 2FA enabled, return require2FA flag
if available_methods:
return {
'success': True,
'require2FA': True,
'availableMethods': available_methods,
'username': login_data.username
}
# No 2FA - proceed with normal login
result = app_state.auth._create_session(
username=login_data.username,
role=role,
ip_address=request.client.host if request.client else None,
remember_me=login_data.rememberMe
)
# Create response with cookie
response = JSONResponse(content=result)
# Set auth cookie (secure, httponly for security)
max_age = 30 * 24 * 60 * 60 if login_data.rememberMe else None
response.set_cookie(
key="auth_token",
value=result.get('token'),
max_age=max_age,
httponly=True,
secure=settings.SECURE_COOKIES,
samesite="lax",
path="/"
)
logger.info(f"User {login_data.username} logged in successfully", module="Auth")
return response
@router.post("/logout")
@limiter.limit("10/minute")
@handle_exceptions
async def logout(request: Request, current_user: Dict = Depends(get_current_user)):
"""Logout and invalidate session"""
username = current_user.get('sub', 'unknown')
response = JSONResponse(content={
"success": True,
"message": "Logged out successfully",
"timestamp": now_iso8601()
})
# Clear auth cookie
response.set_cookie(
key="auth_token",
value="",
max_age=0,
httponly=True,
secure=settings.SECURE_COOKIES,
samesite="lax",
path="/"
)
logger.info(f"User {username} logged out", module="Auth")
return response
@router.get("/me")
@limiter.limit("30/minute")
@handle_exceptions
async def get_me(request: Request, current_user: Dict = Depends(get_current_user)):
"""Get current user information"""
app_state = get_app_state()
username = current_user.get('sub')
user_info = app_state.auth.get_user(username)
if not user_info:
raise HTTPException(status_code=404, detail="User not found")
return user_info
@router.post("/change-password")
@limiter.limit("5/minute")
@handle_exceptions
async def change_password(
request: Request,
current_password: str = Body(..., embed=True),
new_password: str = Body(..., embed=True),
current_user: Dict = Depends(get_current_user)
):
"""Change user password"""
app_state = get_app_state()
username = current_user.get('sub')
ip_address = request.client.host if request.client else None
# Validate new password
if len(new_password) < 8:
raise HTTPException(status_code=400, detail="Password must be at least 8 characters")
result = app_state.auth.change_password(username, current_password, new_password, ip_address)
if not result['success']:
raise HTTPException(status_code=400, detail=result.get('error', 'Password change failed'))
logger.info(f"Password changed for user {username}", module="Auth")
return {
"success": True,
"message": "Password changed successfully",
"timestamp": now_iso8601()
}
@router.post("/preferences")
@limiter.limit("10/minute")
@handle_exceptions
async def update_preferences(
request: Request,
preferences: dict = Body(...),
current_user: Dict = Depends(get_current_user)
):
"""Update user preferences (theme, notifications, etc.)"""
app_state = get_app_state()
username = current_user.get('sub')
# Validate theme if provided
if 'theme' in preferences:
if preferences['theme'] not in ('light', 'dark', 'system'):
raise HTTPException(status_code=400, detail="Invalid theme value")
result = app_state.auth.update_preferences(username, preferences)
if not result['success']:
raise HTTPException(status_code=400, detail=result.get('error', 'Failed to update preferences'))
return {
"success": True,
"message": "Preferences updated",
"preferences": preferences,
"timestamp": now_iso8601()
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,756 @@
"""
Config and Logs Router
Handles configuration and logging operations:
- Get/update application configuration
- Log viewing (single component, merged)
- Notification history and stats
- Changelog retrieval
"""
import json
import re
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional
from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
handle_exceptions,
ValidationError,
RecordNotFoundError
)
from ..core.responses import now_iso8601
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api", tags=["Configuration"])
limiter = Limiter(key_func=get_remote_address)
LOG_PATH = settings.PROJECT_ROOT / 'logs'
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class ConfigUpdate(BaseModel):
config: Dict
class MergedLogsRequest(BaseModel):
lines: int = 500
components: List[str]
around_time: Optional[str] = None # ISO timestamp to center logs around
# ============================================================================
# CONFIGURATION ENDPOINTS
# ============================================================================
@router.get("/config")
@limiter.limit("100/minute")
@handle_exceptions
async def get_config(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get current configuration."""
app_state = get_app_state()
return app_state.settings.get_all()
@router.put("/config")
@limiter.limit("20/minute")
@handle_exceptions
async def update_config(
request: Request,
current_user: Dict = Depends(require_admin),
update: ConfigUpdate = Body(...)
):
"""
Update configuration (admin only).
Saves configuration to database and updates in-memory state.
"""
app_state = get_app_state()
if not isinstance(update.config, dict):
raise ValidationError("Invalid configuration format")
logger.debug(f"Incoming config keys: {list(update.config.keys())}", module="Config")
# Save to database
for key, value in update.config.items():
app_state.settings.set(key, value, category=key, updated_by='api')
# Refresh in-memory config so other endpoints see updated values
app_state.config = app_state.settings.get_all()
# Broadcast update
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "config_updated",
"timestamp": now_iso8601()
})
except Exception as e:
logger.debug(f"Failed to broadcast config update: {e}", module="Config")
return {"success": True, "message": "Configuration updated"}
# ============================================================================
# LOG ENDPOINTS
# ============================================================================
@router.get("/logs")
@limiter.limit("100/minute")
@handle_exceptions
async def get_logs(
request: Request,
current_user: Dict = Depends(get_current_user),
lines: int = 100,
component: Optional[str] = None
):
"""Get recent log entries from the most recent log files."""
if not LOG_PATH.exists():
return {"logs": [], "available_components": []}
all_log_files = []
# Find date-stamped logs: YYYYMMDD_component.log or YYYYMMDD_HHMMSS_component.log
seen_paths = set()
for log_file in LOG_PATH.glob('*.log'):
if '_' not in log_file.stem:
continue
parts = log_file.stem.split('_')
if not parts[0].isdigit():
continue
try:
stat_info = log_file.stat()
if stat_info.st_size == 0:
continue
mtime = stat_info.st_mtime
# YYYYMMDD_HHMMSS_component.log (3+ parts, first two numeric)
if len(parts) >= 3 and parts[1].isdigit():
comp_name = '_'.join(parts[2:])
# YYYYMMDD_component.log (2+ parts, first numeric)
elif len(parts) >= 2:
comp_name = '_'.join(parts[1:])
else:
continue
seen_paths.add(log_file)
all_log_files.append({
'path': log_file,
'mtime': mtime,
'component': comp_name
})
except OSError:
pass
# Also check for old-style logs (no date prefix)
for log_file in LOG_PATH.glob('*.log'):
if log_file in seen_paths:
continue
if '_' in log_file.stem and log_file.stem.split('_')[0].isdigit():
continue
try:
stat_info = log_file.stat()
if stat_info.st_size == 0:
continue
mtime = stat_info.st_mtime
all_log_files.append({
'path': log_file,
'mtime': mtime,
'component': log_file.stem
})
except OSError:
pass
if not all_log_files:
return {"logs": [], "available_components": []}
components = sorted(set(f['component'] for f in all_log_files))
if component:
log_files = [f for f in all_log_files if f['component'] == component]
else:
log_files = all_log_files
if not log_files:
return {"logs": [], "available_components": components}
most_recent = max(log_files, key=lambda x: x['mtime'])
try:
with open(most_recent['path'], 'r', encoding='utf-8', errors='ignore') as f:
all_lines = f.readlines()
recent_lines = all_lines[-lines:]
return {
"logs": [line.strip() for line in recent_lines],
"available_components": components,
"current_component": most_recent['component'],
"log_file": str(most_recent['path'].name)
}
except Exception as e:
logger.error(f"Error reading log file: {e}", module="Logs")
return {"logs": [], "available_components": components, "error": str(e)}
@router.post("/logs/merged")
@limiter.limit("100/minute")
@handle_exceptions
async def get_merged_logs(
request: Request,
body: MergedLogsRequest,
current_user: Dict = Depends(get_current_user)
):
"""Get merged log entries from multiple components, sorted by timestamp."""
lines = body.lines
components = body.components
if not LOG_PATH.exists():
return {"logs": [], "available_components": [], "selected_components": []}
all_log_files = []
# Find date-stamped logs
for log_file in LOG_PATH.glob('*_*.log'):
try:
stat_info = log_file.stat()
if stat_info.st_size == 0:
continue
mtime = stat_info.st_mtime
parts = log_file.stem.split('_')
# Check OLD format FIRST (YYYYMMDD_HHMMSS_component.log)
if len(parts) >= 3 and parts[0].isdigit() and len(parts[0]) == 8 and parts[1].isdigit() and len(parts[1]) == 6:
comp_name = '_'.join(parts[2:])
all_log_files.append({
'path': log_file,
'mtime': mtime,
'component': comp_name
})
# Then check NEW format (YYYYMMDD_component.log)
elif len(parts) >= 2 and parts[0].isdigit() and len(parts[0]) == 8:
comp_name = '_'.join(parts[1:])
all_log_files.append({
'path': log_file,
'mtime': mtime,
'component': comp_name
})
except OSError:
pass
# Also check for old-style logs
for log_file in LOG_PATH.glob('*.log'):
if '_' in log_file.stem and log_file.stem.split('_')[0].isdigit():
continue
try:
stat_info = log_file.stat()
if stat_info.st_size == 0:
continue
mtime = stat_info.st_mtime
all_log_files.append({
'path': log_file,
'mtime': mtime,
'component': log_file.stem
})
except OSError:
pass
if not all_log_files:
return {"logs": [], "available_components": [], "selected_components": []}
available_components = sorted(set(f['component'] for f in all_log_files))
if not components or len(components) == 0:
return {
"logs": [],
"available_components": available_components,
"selected_components": []
}
selected_log_files = [f for f in all_log_files if f['component'] in components]
if not selected_log_files:
return {
"logs": [],
"available_components": available_components,
"selected_components": components
}
all_logs_with_timestamps = []
for comp in components:
comp_files = [f for f in selected_log_files if f['component'] == comp]
if not comp_files:
continue
most_recent = max(comp_files, key=lambda x: x['mtime'])
try:
with open(most_recent['path'], 'r', encoding='utf-8', errors='ignore') as f:
all_lines = f.readlines()
recent_lines = all_lines[-lines:]
for line in recent_lines:
line = line.strip()
if not line:
continue
# Match timestamp with optional microseconds
timestamp_match = re.match(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?:\.(\d+))?', line)
if timestamp_match:
timestamp_str = timestamp_match.group(1)
microseconds = timestamp_match.group(2)
try:
timestamp = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
# Add microseconds if present
if microseconds:
# Pad or truncate to 6 digits for microseconds
microseconds = microseconds[:6].ljust(6, '0')
timestamp = timestamp.replace(microsecond=int(microseconds))
all_logs_with_timestamps.append({
'timestamp': timestamp,
'log': line
})
except ValueError:
all_logs_with_timestamps.append({
'timestamp': None,
'log': line
})
else:
all_logs_with_timestamps.append({
'timestamp': None,
'log': line
})
except Exception as e:
logger.error(f"Error reading log file {most_recent['path']}: {e}", module="Logs")
continue
# Sort by timestamp
sorted_logs = sorted(
all_logs_with_timestamps,
key=lambda x: x['timestamp'] if x['timestamp'] is not None else datetime.min
)
# If around_time is specified, center the logs around that timestamp
if body.around_time:
try:
# Parse the target timestamp
target_time = datetime.fromisoformat(body.around_time.replace('Z', '+00:00').replace('+00:00', ''))
# Find logs within 10 minutes of the target time
time_window = timedelta(minutes=10)
filtered_logs = [
entry for entry in sorted_logs
if entry['timestamp'] is not None and
abs((entry['timestamp'] - target_time).total_seconds()) <= time_window.total_seconds()
]
# If we found logs near the target time, use those
# Otherwise fall back to all logs and try to find the closest ones
if filtered_logs:
merged_logs = [entry['log'] for entry in filtered_logs]
else:
# Find the closest logs to the target time
logs_with_diff = [
(entry, abs((entry['timestamp'] - target_time).total_seconds()) if entry['timestamp'] else float('inf'))
for entry in sorted_logs
]
logs_with_diff.sort(key=lambda x: x[1])
# Take the closest logs, centered around the target
closest_logs = logs_with_diff[:lines]
closest_logs.sort(key=lambda x: x[0]['timestamp'] if x[0]['timestamp'] else datetime.min)
merged_logs = [entry[0]['log'] for entry in closest_logs]
except (ValueError, TypeError):
# If parsing fails, fall back to normal behavior
merged_logs = [entry['log'] for entry in sorted_logs]
if len(merged_logs) > lines:
merged_logs = merged_logs[-lines:]
else:
merged_logs = [entry['log'] for entry in sorted_logs]
if len(merged_logs) > lines:
merged_logs = merged_logs[-lines:]
return {
"logs": merged_logs,
"available_components": available_components,
"selected_components": components,
"total_logs": len(merged_logs)
}
# ============================================================================
# NOTIFICATION ENDPOINTS
# ============================================================================
@router.get("/notifications")
@limiter.limit("500/minute")
@handle_exceptions
async def get_notifications(
request: Request,
current_user: Dict = Depends(get_current_user),
limit: int = 50,
offset: int = 0,
platform: Optional[str] = None,
source: Optional[str] = None
):
"""Get notification history with pagination and filters."""
app_state = get_app_state()
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
query = """
SELECT id, platform, source, content_type, message, title,
priority, download_count, sent_at, status, metadata
FROM notifications
WHERE 1=1
"""
params = []
if platform:
query += " AND platform = ?"
params.append(platform)
if source:
# Handle standardized source names
if source == 'YouTube Monitor':
query += " AND source = ?"
params.append('youtube_monitor')
else:
query += " AND source = ?"
params.append(source)
# Get total count
count_query = query.replace(
"SELECT id, platform, source, content_type, message, title, priority, download_count, sent_at, status, metadata",
"SELECT COUNT(*)"
)
cursor.execute(count_query, params)
result = cursor.fetchone()
total = result[0] if result else 0
# Add ordering and pagination
query += " ORDER BY sent_at DESC LIMIT ? OFFSET ?"
params.extend([limit, offset])
cursor.execute(query, params)
rows = cursor.fetchall()
notifications = []
for row in rows:
notifications.append({
'id': row[0],
'platform': row[1],
'source': row[2],
'content_type': row[3],
'message': row[4],
'title': row[5],
'priority': row[6],
'download_count': row[7],
'sent_at': row[8],
'status': row[9],
'metadata': json.loads(row[10]) if row[10] else None
})
return {
'notifications': notifications,
'total': total,
'limit': limit,
'offset': offset
}
@router.get("/notifications/stats")
@limiter.limit("500/minute")
@handle_exceptions
async def get_notification_stats(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get notification statistics."""
app_state = get_app_state()
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
# Total sent
cursor.execute("SELECT COUNT(*) FROM notifications WHERE status = 'sent'")
result = cursor.fetchone()
total_sent = result[0] if result else 0
# Total failed
cursor.execute("SELECT COUNT(*) FROM notifications WHERE status = 'failed'")
result = cursor.fetchone()
total_failed = result[0] if result else 0
# By platform (consolidate and filter)
cursor.execute("""
SELECT platform, COUNT(*) as count
FROM notifications
GROUP BY platform
ORDER BY count DESC
""")
raw_platforms = {row[0]: row[1] for row in cursor.fetchall()}
# Consolidate similar platforms and exclude system
by_platform = {}
for platform, count in raw_platforms.items():
# Skip system notifications
if platform == 'system':
continue
# Consolidate forum -> forums
if platform == 'forum':
by_platform['forums'] = by_platform.get('forums', 0) + count
# Consolidate fastdl -> instagram (fastdl is an Instagram download method)
elif platform == 'fastdl':
by_platform['instagram'] = by_platform.get('instagram', 0) + count
# Standardize youtube_monitor/youtube_monitors -> youtube
elif platform in ('youtube_monitor', 'youtube_monitors'):
by_platform['youtube'] = by_platform.get('youtube', 0) + count
else:
by_platform[platform] = by_platform.get(platform, 0) + count
# Recent 24h
cursor.execute("""
SELECT COUNT(*) FROM notifications
WHERE sent_at >= datetime('now', '-1 day')
""")
result = cursor.fetchone()
recent_24h = result[0] if result else 0
# Unique sources for filter dropdown
cursor.execute("""
SELECT DISTINCT source FROM notifications
WHERE source IS NOT NULL AND source != ''
ORDER BY source
""")
raw_sources = [row[0] for row in cursor.fetchall()]
# Standardize source names and track special sources
sources = []
has_youtube_monitor = False
has_log_errors = False
for source in raw_sources:
# Standardize youtube_monitor -> YouTube Monitor
if source == 'youtube_monitor':
has_youtube_monitor = True
elif source == 'Log Errors':
has_log_errors = True
else:
sources.append(source)
# Put special sources at the top
priority_sources = []
if has_youtube_monitor:
priority_sources.append('YouTube Monitor')
if has_log_errors:
priority_sources.append('Log Errors')
sources = priority_sources + sources
return {
'total_sent': total_sent,
'total_failed': total_failed,
'by_platform': by_platform,
'recent_24h': recent_24h,
'sources': sources
}
@router.delete("/notifications/{notification_id}")
@limiter.limit("100/minute")
@handle_exceptions
async def delete_notification(
request: Request,
notification_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Delete a single notification from history."""
app_state = get_app_state()
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
# Check if notification exists
cursor.execute("SELECT id FROM notifications WHERE id = ?", (notification_id,))
if not cursor.fetchone():
raise RecordNotFoundError(
"Notification not found",
{"notification_id": notification_id}
)
# Delete the notification
cursor.execute("DELETE FROM notifications WHERE id = ?", (notification_id,))
conn.commit()
return {
'success': True,
'message': 'Notification deleted',
'notification_id': notification_id
}
# ============================================================================
# CHANGELOG ENDPOINT
# ============================================================================
@router.get("/changelog")
@limiter.limit("100/minute")
@handle_exceptions
async def get_changelog(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get changelog data from JSON file."""
changelog_path = settings.PROJECT_ROOT / "data" / "changelog.json"
if not changelog_path.exists():
return {"versions": []}
with open(changelog_path, 'r') as f:
changelog_data = json.load(f)
return {"versions": changelog_data}
# ============================================================================
# APPEARANCE CONFIG ENDPOINTS
# ============================================================================
class AppearanceConfigUpdate(BaseModel):
tmdb_api_key: Optional[str] = None
tmdb_enabled: bool = True
tmdb_check_interval_hours: int = 12
notify_new_appearances: bool = True
notify_days_before: int = 1
podcast_enabled: bool = False
radio_enabled: bool = False
podchaser_client_id: Optional[str] = None
podchaser_client_secret: Optional[str] = None
podchaser_api_key: Optional[str] = None
podchaser_enabled: bool = False
imdb_enabled: bool = True
@router.get("/config/appearance")
@limiter.limit("100/minute")
@handle_exceptions
async def get_appearance_config(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get appearance tracking configuration."""
db = get_app_state().db
try:
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT tmdb_api_key, tmdb_enabled, tmdb_check_interval_hours, tmdb_last_check,
notify_new_appearances, notify_days_before, podcast_enabled, radio_enabled,
podchaser_client_id, podchaser_client_secret, podchaser_api_key,
podchaser_enabled, podchaser_last_check, imdb_enabled
FROM appearance_config
WHERE id = 1
''')
row = cursor.fetchone()
if not row:
# Initialize config if not exists
cursor.execute('INSERT OR IGNORE INTO appearance_config (id) VALUES (1)')
conn.commit()
return {
"tmdb_api_key": None,
"tmdb_enabled": True,
"tmdb_check_interval_hours": 12,
"tmdb_last_check": None,
"notify_new_appearances": True,
"notify_days_before": 1,
"podcast_enabled": False,
"radio_enabled": False,
"podchaser_client_id": None,
"podchaser_client_secret": None,
"podchaser_api_key": None,
"podchaser_enabled": False,
"podchaser_last_check": None
}
return {
"tmdb_api_key": row[0],
"tmdb_enabled": bool(row[1]),
"tmdb_check_interval_hours": row[2],
"tmdb_last_check": row[3],
"notify_new_appearances": bool(row[4]),
"notify_days_before": row[5],
"podcast_enabled": bool(row[6]),
"radio_enabled": bool(row[7]),
"podchaser_client_id": row[8] if len(row) > 8 else None,
"podchaser_client_secret": row[9] if len(row) > 9 else None,
"podchaser_api_key": row[10] if len(row) > 10 else None,
"podchaser_enabled": bool(row[11]) if len(row) > 11 else False,
"podchaser_last_check": row[12] if len(row) > 12 else None,
"imdb_enabled": bool(row[13]) if len(row) > 13 else True
}
except Exception as e:
logger.error(f"Error getting appearance config: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/config/appearance")
@limiter.limit("100/minute")
@handle_exceptions
async def update_appearance_config(
request: Request,
config: AppearanceConfigUpdate,
current_user: Dict = Depends(get_current_user)
):
"""Update appearance tracking configuration."""
db = get_app_state().db
try:
with db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
# Update config
cursor.execute('''
UPDATE appearance_config
SET tmdb_api_key = ?,
tmdb_enabled = ?,
tmdb_check_interval_hours = ?,
notify_new_appearances = ?,
notify_days_before = ?,
podcast_enabled = ?,
radio_enabled = ?,
podchaser_client_id = ?,
podchaser_client_secret = ?,
podchaser_api_key = ?,
podchaser_enabled = ?,
imdb_enabled = ?,
updated_at = CURRENT_TIMESTAMP
WHERE id = 1
''', (config.tmdb_api_key, config.tmdb_enabled, config.tmdb_check_interval_hours,
config.notify_new_appearances, config.notify_days_before,
config.podcast_enabled, config.radio_enabled,
config.podchaser_client_id, config.podchaser_client_secret,
config.podchaser_api_key, config.podchaser_enabled, config.imdb_enabled))
conn.commit()
return {
"success": True,
"message": "Appearance configuration updated successfully"
}
except Exception as e:
logger.error(f"Error updating appearance config: {e}")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,304 @@
"""
Dashboard API Router
Provides endpoints for dashboard-specific data like recent items across different locations.
"""
from fastapi import APIRouter, Depends, Request
from typing import Dict, Any, Optional
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.exceptions import handle_exceptions
from modules.universal_logger import get_logger
router = APIRouter(prefix="/api/dashboard", tags=["dashboard"])
logger = get_logger('API')
limiter = Limiter(key_func=get_remote_address)
@router.get("/recent-items")
@limiter.limit("60/minute")
@handle_exceptions
async def get_recent_items(
request: Request,
limit: int = 20,
since_id: Optional[int] = None,
current_user=Depends(get_current_user)
) -> Dict[str, Any]:
"""
Get NEW items from Media, Review, and Internet Discovery for dashboard cards.
Uses file_inventory.id for ordering since it monotonically increases with
insertion order. download_date from the downloads table is included for
display but not used for ordering (batch downloads can interleave timestamps).
Args:
limit: Max items per category
since_id: Optional file_inventory ID - only return items with id > this value
Returns up to `limit` items from each location, sorted by most recently added first.
"""
app_state = get_app_state()
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
# Media items (location='final')
# ORDER BY fi.id DESC — id is monotonically increasing and reflects insertion order.
# download_date is included for display but NOT used for ordering.
if since_id:
cursor.execute("""
SELECT fi.id, fi.file_path, fi.filename, fi.source, fi.platform, fi.content_type,
fi.file_size, COALESCE(d.download_date, fi.created_date) as added_at,
fi.width, fi.height
FROM file_inventory fi
LEFT JOIN downloads d ON d.filename = fi.filename
WHERE fi.location = 'final'
AND fi.id > ?
AND (fi.moved_from_review IS NULL OR fi.moved_from_review = 0)
AND (fi.from_discovery IS NULL OR fi.from_discovery = 0)
ORDER BY fi.id DESC
LIMIT ?
""", (since_id, limit))
else:
cursor.execute("""
SELECT fi.id, fi.file_path, fi.filename, fi.source, fi.platform, fi.content_type,
fi.file_size, COALESCE(d.download_date, fi.created_date) as added_at,
fi.width, fi.height
FROM file_inventory fi
LEFT JOIN downloads d ON d.filename = fi.filename
WHERE fi.location = 'final'
AND (fi.moved_from_review IS NULL OR fi.moved_from_review = 0)
AND (fi.from_discovery IS NULL OR fi.from_discovery = 0)
ORDER BY fi.id DESC
LIMIT ?
""", (limit,))
media_items = []
for row in cursor.fetchall():
media_items.append({
'id': row[0],
'file_path': row[1],
'filename': row[2],
'source': row[3],
'platform': row[4],
'media_type': row[5],
'file_size': row[6],
'added_at': row[7],
'width': row[8],
'height': row[9]
})
# Get total count for new media items
if since_id:
cursor.execute("""
SELECT COUNT(*)
FROM file_inventory
WHERE location = 'final'
AND id > ?
AND (moved_from_review IS NULL OR moved_from_review = 0)
AND (from_discovery IS NULL OR from_discovery = 0)
""", (since_id,))
else:
cursor.execute("""
SELECT COUNT(*) FROM file_inventory
WHERE location = 'final'
AND (moved_from_review IS NULL OR moved_from_review = 0)
AND (from_discovery IS NULL OR from_discovery = 0)
""")
media_count = cursor.fetchone()[0]
# Review items (location='review')
if since_id:
cursor.execute("""
SELECT f.id, f.file_path, f.filename, f.source, f.platform, f.content_type,
f.file_size, COALESCE(d.download_date, f.created_date) as added_at,
f.width, f.height,
CASE WHEN fr.id IS NOT NULL THEN 1 ELSE 0 END as face_scanned,
fr.has_match as face_matched, fr.confidence as face_confidence, fr.matched_person
FROM file_inventory f
LEFT JOIN downloads d ON d.filename = f.filename
LEFT JOIN face_recognition_scans fr ON f.file_path = fr.file_path
WHERE f.location = 'review'
AND f.id > ?
AND (f.moved_from_media IS NULL OR f.moved_from_media = 0)
ORDER BY f.id DESC
LIMIT ?
""", (since_id, limit))
else:
cursor.execute("""
SELECT f.id, f.file_path, f.filename, f.source, f.platform, f.content_type,
f.file_size, COALESCE(d.download_date, f.created_date) as added_at,
f.width, f.height,
CASE WHEN fr.id IS NOT NULL THEN 1 ELSE 0 END as face_scanned,
fr.has_match as face_matched, fr.confidence as face_confidence, fr.matched_person
FROM file_inventory f
LEFT JOIN downloads d ON d.filename = f.filename
LEFT JOIN face_recognition_scans fr ON f.file_path = fr.file_path
WHERE f.location = 'review'
AND (f.moved_from_media IS NULL OR f.moved_from_media = 0)
ORDER BY f.id DESC
LIMIT ?
""", (limit,))
review_items = []
for row in cursor.fetchall():
face_recognition = None
if row[10]: # face_scanned
face_recognition = {
'scanned': True,
'matched': bool(row[11]) if row[11] is not None else False,
'confidence': row[12],
'matched_person': row[13]
}
review_items.append({
'id': row[0],
'file_path': row[1],
'filename': row[2],
'source': row[3],
'platform': row[4],
'media_type': row[5],
'file_size': row[6],
'added_at': row[7],
'width': row[8],
'height': row[9],
'face_recognition': face_recognition
})
# Get total count for new review items
if since_id:
cursor.execute("""
SELECT COUNT(*)
FROM file_inventory
WHERE location = 'review'
AND id > ?
AND (moved_from_media IS NULL OR moved_from_media = 0)
""", (since_id,))
else:
cursor.execute("""
SELECT COUNT(*) FROM file_inventory
WHERE location = 'review'
AND (moved_from_media IS NULL OR moved_from_media = 0)
""")
review_count = cursor.fetchone()[0]
# Internet Discovery items (celebrity_discovered_videos with status='new')
internet_discovery_items = []
internet_discovery_count = 0
try:
cursor.execute("""
SELECT
v.id,
v.video_id,
v.title,
v.thumbnail,
v.channel_name,
v.platform,
v.duration,
v.max_resolution,
v.status,
v.discovered_at,
v.url,
v.view_count,
v.upload_date,
c.name as celebrity_name
FROM celebrity_discovered_videos v
LEFT JOIN celebrity_profiles c ON v.celebrity_id = c.id
WHERE v.status = 'new'
ORDER BY v.id DESC
LIMIT ?
""", (limit,))
for row in cursor.fetchall():
internet_discovery_items.append({
'id': row[0],
'video_id': row[1],
'title': row[2],
'thumbnail': row[3],
'channel_name': row[4],
'platform': row[5],
'duration': row[6],
'max_resolution': row[7],
'status': row[8],
'discovered_at': row[9],
'url': row[10],
'view_count': row[11],
'upload_date': row[12],
'celebrity_name': row[13]
})
# Get total count for internet discovery
cursor.execute("SELECT COUNT(*) FROM celebrity_discovered_videos WHERE status = 'new'")
internet_discovery_count = cursor.fetchone()[0]
except Exception as e:
# Table might not exist if celebrity feature not used
logger.warning(f"Could not fetch internet discovery items: {e}", module="Dashboard")
return {
'media': {
'count': media_count,
'items': media_items
},
'review': {
'count': review_count,
'items': review_items
},
'internet_discovery': {
'count': internet_discovery_count,
'items': internet_discovery_items
}
}
@router.get("/dismissed-cards")
@limiter.limit("60/minute")
@handle_exceptions
async def get_dismissed_cards(
request: Request,
user=Depends(get_current_user)
) -> Dict[str, Any]:
"""Get the user's dismissed card IDs."""
app_state = get_app_state()
user_id = user.get('username', 'default')
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT preference_value FROM user_preferences
WHERE user_id = ? AND preference_key = 'dashboard_dismissed_cards'
""", (user_id,))
row = cursor.fetchone()
if row and row[0]:
import json
return json.loads(row[0])
return {'media': None, 'review': None, 'internet_discovery': None}
@router.post("/dismissed-cards")
@limiter.limit("30/minute")
@handle_exceptions
async def set_dismissed_cards(
request: Request,
data: Dict[str, Any],
user=Depends(get_current_user)
) -> Dict[str, str]:
"""Save the user's dismissed card IDs."""
import json
app_state = get_app_state()
user_id = user.get('username', 'default')
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO user_preferences (user_id, preference_key, preference_value, updated_at)
VALUES (?, 'dashboard_dismissed_cards', ?, CURRENT_TIMESTAMP)
ON CONFLICT(user_id, preference_key) DO UPDATE SET
preference_value = excluded.preference_value,
updated_at = CURRENT_TIMESTAMP
""", (user_id, json.dumps(data)))
return {'status': 'ok'}

View File

@@ -0,0 +1,942 @@
"""
Discovery Router
Handles discovery, organization and browsing features:
- Tags management (CRUD, file tagging, bulk operations)
- Smart folders (filter-based virtual folders)
- Collections (manual file groupings)
- Timeline and activity views
- Discovery queue management
"""
import json
from datetime import datetime
from typing import Dict, List, Optional
from fastapi import APIRouter, Body, Depends, Query, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
from ..core.responses import message_response, id_response, count_response, offset_paginated
from modules.discovery_system import get_discovery_system
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api", tags=["Discovery"])
limiter = Limiter(key_func=get_remote_address)
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class TagCreate(BaseModel):
name: str
parent_id: Optional[int] = None
color: str = '#6366f1'
icon: Optional[str] = None
description: Optional[str] = None
class TagUpdate(BaseModel):
name: Optional[str] = None
parent_id: Optional[int] = None
color: Optional[str] = None
icon: Optional[str] = None
description: Optional[str] = None
class BulkTagRequest(BaseModel):
file_ids: List[int]
tag_ids: List[int]
class SmartFolderCreate(BaseModel):
name: str
filters: dict = {}
icon: str = 'folder'
color: str = '#6366f1'
description: Optional[str] = None
sort_by: str = 'post_date'
sort_order: str = 'desc'
class SmartFolderUpdate(BaseModel):
name: Optional[str] = None
filters: Optional[dict] = None
icon: Optional[str] = None
color: Optional[str] = None
description: Optional[str] = None
sort_by: Optional[str] = None
sort_order: Optional[str] = None
class CollectionCreate(BaseModel):
name: str
description: Optional[str] = None
color: str = '#6366f1'
class CollectionUpdate(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
color: Optional[str] = None
cover_file_id: Optional[int] = None
class BulkCollectionAdd(BaseModel):
file_ids: List[int]
class DiscoveryQueueAdd(BaseModel):
file_ids: List[int]
priority: int = 0
# ============================================================================
# TAGS ENDPOINTS
# ============================================================================
@router.get("/tags")
@limiter.limit("60/minute")
@handle_exceptions
async def get_tags(
request: Request,
current_user: Dict = Depends(get_current_user),
parent_id: Optional[int] = Query(None, description="Parent tag ID (null for root, -1 for all)"),
include_counts: bool = Query(True, description="Include file counts")
):
"""Get all tags, optionally filtered by parent."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
tags = discovery.get_tags(parent_id=parent_id, include_counts=include_counts)
return {"tags": tags}
@router.get("/tags/{tag_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def get_tag(
request: Request,
tag_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Get a single tag by ID."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
tag = discovery.get_tag(tag_id)
if not tag:
raise NotFoundError("Tag not found")
return tag
@router.post("/tags")
@limiter.limit("30/minute")
@handle_exceptions
async def create_tag(
request: Request,
tag_data: TagCreate,
current_user: Dict = Depends(get_current_user)
):
"""Create a new tag."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
tag_id = discovery.create_tag(
name=tag_data.name,
parent_id=tag_data.parent_id,
color=tag_data.color,
icon=tag_data.icon,
description=tag_data.description
)
if tag_id is None:
raise ValidationError("Failed to create tag")
return id_response(tag_id, "Tag created successfully")
@router.put("/tags/{tag_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def update_tag(
request: Request,
tag_id: int,
tag_data: TagUpdate,
current_user: Dict = Depends(get_current_user)
):
"""Update a tag."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.update_tag(
tag_id=tag_id,
name=tag_data.name,
color=tag_data.color,
icon=tag_data.icon,
description=tag_data.description,
parent_id=tag_data.parent_id
)
if not success:
raise NotFoundError("Tag not found or update failed")
return message_response("Tag updated successfully")
@router.delete("/tags/{tag_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def delete_tag(
request: Request,
tag_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Delete a tag."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.delete_tag(tag_id)
if not success:
raise NotFoundError("Tag not found")
return message_response("Tag deleted successfully")
@router.get("/files/{file_id}/tags")
@limiter.limit("60/minute")
@handle_exceptions
async def get_file_tags(
request: Request,
file_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Get all tags for a file."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
tags = discovery.get_file_tags(file_id)
return {"tags": tags}
@router.post("/files/{file_id}/tags/{tag_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def tag_file(
request: Request,
file_id: int,
tag_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Add a tag to a file."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.tag_file(file_id, tag_id, created_by=current_user.get('sub'))
if not success:
raise ValidationError("Failed to tag file")
return message_response("Tag added to file")
@router.delete("/files/{file_id}/tags/{tag_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def untag_file(
request: Request,
file_id: int,
tag_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Remove a tag from a file."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.untag_file(file_id, tag_id)
if not success:
raise NotFoundError("Tag not found on file")
return message_response("Tag removed from file")
@router.get("/tags/{tag_id}/files")
@limiter.limit("60/minute")
@handle_exceptions
async def get_files_by_tag(
request: Request,
tag_id: int,
current_user: Dict = Depends(get_current_user),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0)
):
"""Get files with a specific tag."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
files, total = discovery.get_files_by_tag(tag_id, limit=limit, offset=offset)
return offset_paginated(files, total, limit, offset, key="files")
@router.post("/tags/bulk")
@limiter.limit("30/minute")
@handle_exceptions
async def bulk_tag_files(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Tag multiple files with multiple tags."""
data = await request.json()
file_ids = data.get('file_ids', [])
tag_ids = data.get('tag_ids', [])
if not file_ids or not tag_ids:
raise ValidationError("file_ids and tag_ids required")
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
count = discovery.bulk_tag_files(file_ids, tag_ids, created_by=current_user.get('sub'))
return count_response(f"Tagged {count} file-tag pairs", count)
# ============================================================================
# SMART FOLDERS ENDPOINTS
# ============================================================================
@router.get("/smart-folders")
@limiter.limit("60/minute")
@handle_exceptions
async def get_smart_folders(
request: Request,
current_user: Dict = Depends(get_current_user),
include_system: bool = Query(True, description="Include system smart folders")
):
"""Get all smart folders."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
folders = discovery.get_smart_folders(include_system=include_system)
return {"smart_folders": folders}
@router.get("/smart-folders/stats")
@limiter.limit("30/minute")
@handle_exceptions
async def get_smart_folders_stats(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get file counts and preview thumbnails for all smart folders."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
folders = discovery.get_smart_folders(include_system=True)
stats = {}
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
for folder in folders:
filters = folder.get('filters', {})
folder_id = folder['id']
query = '''
SELECT COUNT(*) as count
FROM file_inventory fi
WHERE fi.location = 'final'
'''
params = []
if filters.get('platform'):
query += ' AND fi.platform = ?'
params.append(filters['platform'])
if filters.get('media_type'):
query += ' AND fi.content_type = ?'
params.append(filters['media_type'])
if filters.get('source'):
query += ' AND fi.source = ?'
params.append(filters['source'])
if filters.get('size_min'):
query += ' AND fi.file_size >= ?'
params.append(filters['size_min'])
cursor.execute(query, params)
count = cursor.fetchone()[0]
preview_query = '''
SELECT fi.file_path, fi.content_type
FROM file_inventory fi
WHERE fi.location = 'final'
'''
preview_params = []
if filters.get('platform'):
preview_query += ' AND fi.platform = ?'
preview_params.append(filters['platform'])
if filters.get('media_type'):
preview_query += ' AND fi.content_type = ?'
preview_params.append(filters['media_type'])
if filters.get('source'):
preview_query += ' AND fi.source = ?'
preview_params.append(filters['source'])
if filters.get('size_min'):
preview_query += ' AND fi.file_size >= ?'
preview_params.append(filters['size_min'])
preview_query += ' ORDER BY fi.created_date DESC LIMIT 4'
cursor.execute(preview_query, preview_params)
previews = []
for row in cursor.fetchall():
previews.append({
'file_path': row['file_path'],
'content_type': row['content_type']
})
stats[folder_id] = {
'count': count,
'previews': previews
}
return {"stats": stats}
@router.get("/smart-folders/{folder_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def get_smart_folder(
request: Request,
folder_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Get a single smart folder by ID."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
folder = discovery.get_smart_folder(folder_id=folder_id)
if not folder:
raise NotFoundError("Smart folder not found")
return folder
@router.post("/smart-folders")
@limiter.limit("30/minute")
@handle_exceptions
async def create_smart_folder(
request: Request,
folder_data: SmartFolderCreate,
current_user: Dict = Depends(get_current_user)
):
"""Create a new smart folder."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
folder_id = discovery.create_smart_folder(
name=folder_data.name,
filters=folder_data.filters,
icon=folder_data.icon,
color=folder_data.color,
description=folder_data.description,
sort_by=folder_data.sort_by,
sort_order=folder_data.sort_order
)
if folder_id is None:
raise ValidationError("Failed to create smart folder")
return {"id": folder_id, "message": "Smart folder created successfully"}
@router.put("/smart-folders/{folder_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def update_smart_folder(
request: Request,
folder_id: int,
folder_data: SmartFolderUpdate,
current_user: Dict = Depends(get_current_user)
):
"""Update a smart folder (cannot update system folders)."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.update_smart_folder(
folder_id=folder_id,
name=folder_data.name,
filters=folder_data.filters,
icon=folder_data.icon,
color=folder_data.color,
description=folder_data.description,
sort_by=folder_data.sort_by,
sort_order=folder_data.sort_order
)
if not success:
raise ValidationError("Failed to update smart folder (may be a system folder)")
return {"message": "Smart folder updated successfully"}
@router.delete("/smart-folders/{folder_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def delete_smart_folder(
request: Request,
folder_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Delete a smart folder (cannot delete system folders)."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.delete_smart_folder(folder_id)
if not success:
raise ValidationError("Failed to delete smart folder (may be a system folder)")
return {"message": "Smart folder deleted successfully"}
# ============================================================================
# COLLECTIONS ENDPOINTS
# ============================================================================
@router.get("/collections")
@limiter.limit("60/minute")
@handle_exceptions
async def get_collections(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get all collections."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
collections = discovery.get_collections()
return {"collections": collections}
@router.get("/collections/{collection_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def get_collection(
request: Request,
collection_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Get a single collection by ID."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
collection = discovery.get_collection(collection_id=collection_id)
if not collection:
raise NotFoundError("Collection not found")
return collection
@router.post("/collections")
@limiter.limit("30/minute")
@handle_exceptions
async def create_collection(
request: Request,
collection_data: CollectionCreate,
current_user: Dict = Depends(get_current_user)
):
"""Create a new collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
collection_id = discovery.create_collection(
name=collection_data.name,
description=collection_data.description,
color=collection_data.color
)
if collection_id is None:
raise ValidationError("Failed to create collection")
return {"id": collection_id, "message": "Collection created successfully"}
@router.put("/collections/{collection_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def update_collection(
request: Request,
collection_id: int,
collection_data: CollectionUpdate,
current_user: Dict = Depends(get_current_user)
):
"""Update a collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.update_collection(
collection_id=collection_id,
name=collection_data.name,
description=collection_data.description,
color=collection_data.color,
cover_file_id=collection_data.cover_file_id
)
if not success:
raise NotFoundError("Collection not found")
return {"message": "Collection updated successfully"}
@router.delete("/collections/{collection_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def delete_collection(
request: Request,
collection_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Delete a collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.delete_collection(collection_id)
if not success:
raise NotFoundError("Collection not found")
return {"message": "Collection deleted successfully"}
@router.get("/collections/{collection_id}/files")
@limiter.limit("60/minute")
@handle_exceptions
async def get_collection_files(
request: Request,
collection_id: int,
current_user: Dict = Depends(get_current_user),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0)
):
"""Get files in a collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
files, total = discovery.get_collection_files(collection_id, limit=limit, offset=offset)
return {"files": files, "total": total, "limit": limit, "offset": offset}
@router.post("/collections/{collection_id}/files/{file_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def add_to_collection(
request: Request,
collection_id: int,
file_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Add a file to a collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.add_to_collection(collection_id, file_id, added_by=current_user.get('sub'))
if not success:
raise ValidationError("Failed to add file to collection")
return {"message": "File added to collection"}
@router.delete("/collections/{collection_id}/files/{file_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def remove_from_collection(
request: Request,
collection_id: int,
file_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Remove a file from a collection."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.remove_from_collection(collection_id, file_id)
if not success:
raise NotFoundError("File not found in collection")
return {"message": "File removed from collection"}
@router.post("/collections/{collection_id}/files/bulk")
@limiter.limit("30/minute")
@handle_exceptions
async def bulk_add_to_collection(
request: Request,
collection_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Add multiple files to a collection."""
data = await request.json()
file_ids = data.get('file_ids', [])
if not file_ids:
raise ValidationError("file_ids required")
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
count = discovery.bulk_add_to_collection(collection_id, file_ids, added_by=current_user.get('sub'))
return {"message": f"Added {count} files to collection", "count": count}
# ============================================================================
# TIMELINE ENDPOINTS
# ============================================================================
@router.get("/timeline")
@limiter.limit("60/minute")
@handle_exceptions
async def get_timeline(
request: Request,
current_user: Dict = Depends(get_current_user),
granularity: str = Query('day', pattern='^(day|week|month|year)$'),
date_from: Optional[str] = Query(None, pattern=r'^\d{4}-\d{2}-\d{2}$'),
date_to: Optional[str] = Query(None, pattern=r'^\d{4}-\d{2}-\d{2}$'),
platform: Optional[str] = Query(None),
source: Optional[str] = Query(None)
):
"""Get timeline aggregation data."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
data = discovery.get_timeline_data(
granularity=granularity,
date_from=date_from,
date_to=date_to,
platform=platform,
source=source
)
return {"timeline": data, "granularity": granularity}
@router.get("/timeline/heatmap")
@limiter.limit("60/minute")
@handle_exceptions
async def get_timeline_heatmap(
request: Request,
current_user: Dict = Depends(get_current_user),
year: Optional[int] = Query(None, ge=2000, le=2100),
platform: Optional[str] = Query(None)
):
"""Get activity heatmap data (file counts per day for a year)."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
heatmap = discovery.get_activity_heatmap(year=year, platform=platform)
return {"heatmap": heatmap, "year": year or datetime.now().year}
@router.get("/timeline/on-this-day")
@limiter.limit("60/minute")
@handle_exceptions
async def get_on_this_day(
request: Request,
current_user: Dict = Depends(get_current_user),
month: Optional[int] = Query(None, ge=1, le=12),
day: Optional[int] = Query(None, ge=1, le=31),
limit: int = Query(50, ge=1, le=200)
):
"""Get content from the same day in previous years ('On This Day' feature)."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
files = discovery.get_on_this_day(month=month, day=day, limit=limit)
return {"files": files, "count": len(files)}
# ============================================================================
# RECENT ACTIVITY ENDPOINT
# ============================================================================
@router.get("/discovery/recent-activity")
@limiter.limit("60/minute")
@handle_exceptions
async def get_recent_activity(
request: Request,
current_user: Dict = Depends(get_current_user),
limit: int = Query(10, ge=1, le=50)
):
"""Get recent activity across downloads, deletions, and restores."""
app_state = get_app_state()
activity = {
'recent_downloads': [],
'recent_deleted': [],
'recent_restored': [],
'recent_moved_to_review': [],
'summary': {
'downloads_24h': 0,
'downloads_7d': 0,
'deleted_24h': 0,
'deleted_7d': 0
}
}
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
# Recent downloads
cursor.execute('''
SELECT
fi.id, fi.file_path, fi.filename, fi.platform, fi.source,
fi.content_type, fi.file_size, fi.created_date,
d.download_date, d.post_date
FROM file_inventory fi
LEFT JOIN downloads d ON d.file_path = fi.file_path
WHERE fi.location = 'final'
ORDER BY fi.created_date DESC
LIMIT ?
''', (limit,))
for row in cursor.fetchall():
activity['recent_downloads'].append({
'id': row['id'],
'file_path': row['file_path'],
'filename': row['filename'],
'platform': row['platform'],
'source': row['source'],
'content_type': row['content_type'],
'file_size': row['file_size'],
'timestamp': row['download_date'] or row['created_date'],
'action': 'download'
})
# Recent deleted
cursor.execute('''
SELECT
id, original_path, original_filename, recycle_path,
file_size, deleted_at, deleted_from, metadata
FROM recycle_bin
ORDER BY deleted_at DESC
LIMIT ?
''', (limit,))
for row in cursor.fetchall():
metadata = {}
if row['metadata']:
try:
metadata = json.loads(row['metadata'])
except (json.JSONDecodeError, TypeError):
pass
activity['recent_deleted'].append({
'id': row['id'],
'file_path': row['recycle_path'],
'original_path': row['original_path'],
'filename': row['original_filename'],
'platform': metadata.get('platform', 'unknown'),
'source': metadata.get('source', ''),
'content_type': metadata.get('content_type', 'image'),
'file_size': row['file_size'] or 0,
'timestamp': row['deleted_at'],
'deleted_from': row['deleted_from'],
'action': 'delete'
})
# Recent moved to review
cursor.execute('''
SELECT
id, file_path, filename, platform, source,
content_type, file_size, created_date
FROM file_inventory
WHERE location = 'review'
ORDER BY created_date DESC
LIMIT ?
''', (limit,))
for row in cursor.fetchall():
activity['recent_moved_to_review'].append({
'id': row['id'],
'file_path': row['file_path'],
'filename': row['filename'],
'platform': row['platform'],
'source': row['source'],
'content_type': row['content_type'],
'file_size': row['file_size'],
'timestamp': row['created_date'],
'action': 'review'
})
# Summary stats
cursor.execute('''
SELECT COUNT(*) FROM file_inventory
WHERE location = 'final'
AND created_date >= datetime('now', '-1 day')
''')
activity['summary']['downloads_24h'] = cursor.fetchone()[0]
cursor.execute('''
SELECT COUNT(*) FROM file_inventory
WHERE location = 'final'
AND created_date >= datetime('now', '-7 days')
''')
activity['summary']['downloads_7d'] = cursor.fetchone()[0]
cursor.execute('''
SELECT COUNT(*) FROM recycle_bin
WHERE deleted_at >= datetime('now', '-1 day')
''')
activity['summary']['deleted_24h'] = cursor.fetchone()[0]
cursor.execute('''
SELECT COUNT(*) FROM recycle_bin
WHERE deleted_at >= datetime('now', '-7 days')
''')
activity['summary']['deleted_7d'] = cursor.fetchone()[0]
return activity
# ============================================================================
# DISCOVERY QUEUE ENDPOINTS
# ============================================================================
@router.get("/discovery/queue/stats")
@limiter.limit("60/minute")
@handle_exceptions
async def get_queue_stats(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get discovery queue statistics."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
stats = discovery.get_queue_stats()
return stats
@router.get("/discovery/queue/pending")
@limiter.limit("60/minute")
@handle_exceptions
async def get_pending_queue(
request: Request,
current_user: Dict = Depends(get_current_user),
limit: int = Query(100, ge=1, le=1000)
):
"""Get pending items in the discovery queue."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
items = discovery.get_pending_queue(limit=limit)
return {"items": items, "count": len(items)}
@router.post("/discovery/queue/add")
@limiter.limit("30/minute")
@handle_exceptions
async def add_to_queue(
request: Request,
current_user: Dict = Depends(get_current_user),
file_id: int = Body(...),
priority: int = Body(0)
):
"""Add a file to the discovery queue."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
success = discovery.add_to_queue(file_id, priority=priority)
if not success:
raise ValidationError("Failed to add file to queue")
return {"message": "File added to queue"}
@router.post("/discovery/queue/bulk-add")
@limiter.limit("10/minute")
@handle_exceptions
async def bulk_add_to_queue(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Add multiple files to the discovery queue."""
data = await request.json()
file_ids = data.get('file_ids', [])
priority = data.get('priority', 0)
if not file_ids:
raise ValidationError("file_ids required")
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
count = discovery.bulk_add_to_queue(file_ids, priority=priority)
return {"message": f"Added {count} files to queue", "count": count}
@router.delete("/discovery/queue/clear")
@limiter.limit("10/minute")
@handle_exceptions
async def clear_queue(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Clear the discovery queue."""
app_state = get_app_state()
discovery = get_discovery_system(app_state.db)
count = discovery.clear_queue()
return {"message": f"Cleared {count} items from queue", "count": count}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,478 @@
"""
Easynews Router
Handles Easynews integration:
- Configuration management (credentials, proxy settings)
- Search term management
- Manual check triggers
- Results browsing and downloads
"""
import asyncio
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Dict, List, Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.exceptions import handle_exceptions
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/easynews", tags=["Easynews"])
limiter = Limiter(key_func=get_remote_address)
# Thread pool for blocking operations
_executor = ThreadPoolExecutor(max_workers=2)
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class EasynewsConfigUpdate(BaseModel):
username: Optional[str] = None
password: Optional[str] = None
enabled: Optional[bool] = None
check_interval_hours: Optional[int] = None
auto_download: Optional[bool] = None
min_quality: Optional[str] = None
proxy_enabled: Optional[bool] = None
proxy_type: Optional[str] = None
proxy_host: Optional[str] = None
proxy_port: Optional[int] = None
proxy_username: Optional[str] = None
proxy_password: Optional[str] = None
notifications_enabled: Optional[bool] = None
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _get_monitor():
"""Get the Easynews monitor instance."""
from modules.easynews_monitor import EasynewsMonitor
app_state = get_app_state()
db_path = str(app_state.db.db_path) # Convert Path to string
return EasynewsMonitor(db_path)
# ============================================================================
# CONFIGURATION ENDPOINTS
# ============================================================================
@router.get("/config")
@limiter.limit("30/minute")
@handle_exceptions
async def get_config(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get Easynews configuration (passwords masked)."""
monitor = _get_monitor()
config = monitor.get_config()
# Mask password for security
if config.get('password'):
config['password'] = '********'
if config.get('proxy_password'):
config['proxy_password'] = '********'
return {
"success": True,
"config": config
}
@router.put("/config")
@limiter.limit("10/minute")
@handle_exceptions
async def update_config(
request: Request,
config: EasynewsConfigUpdate,
current_user: Dict = Depends(require_admin)
):
"""Update Easynews configuration."""
monitor = _get_monitor()
# Build update kwargs
kwargs = {}
if config.username is not None:
kwargs['username'] = config.username
if config.password is not None and config.password != '********':
kwargs['password'] = config.password
if config.enabled is not None:
kwargs['enabled'] = config.enabled
if config.check_interval_hours is not None:
kwargs['check_interval_hours'] = config.check_interval_hours
if config.auto_download is not None:
kwargs['auto_download'] = config.auto_download
if config.min_quality is not None:
kwargs['min_quality'] = config.min_quality
if config.proxy_enabled is not None:
kwargs['proxy_enabled'] = config.proxy_enabled
if config.proxy_type is not None:
kwargs['proxy_type'] = config.proxy_type
if config.proxy_host is not None:
kwargs['proxy_host'] = config.proxy_host
if config.proxy_port is not None:
kwargs['proxy_port'] = config.proxy_port
if config.proxy_username is not None:
kwargs['proxy_username'] = config.proxy_username
if config.proxy_password is not None and config.proxy_password != '********':
kwargs['proxy_password'] = config.proxy_password
if config.notifications_enabled is not None:
kwargs['notifications_enabled'] = config.notifications_enabled
if not kwargs:
return {"success": False, "message": "No updates provided"}
success = monitor.update_config(**kwargs)
return {
"success": success,
"message": "Configuration updated" if success else "Update failed"
}
@router.post("/test")
@limiter.limit("5/minute")
@handle_exceptions
async def test_connection(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Test Easynews connection with current credentials."""
monitor = _get_monitor()
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(_executor, monitor.test_connection)
return result
# ============================================================================
# CELEBRITY ENDPOINTS (uses tracked celebrities from Appearances)
# ============================================================================
@router.get("/celebrities")
@limiter.limit("30/minute")
@handle_exceptions
async def get_celebrities(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get all tracked celebrities that will be searched on Easynews."""
monitor = _get_monitor()
celebrities = monitor.get_celebrities()
return {
"success": True,
"celebrities": celebrities,
"count": len(celebrities)
}
# ============================================================================
# RESULTS ENDPOINTS
# ============================================================================
@router.get("/results")
@limiter.limit("30/minute")
@handle_exceptions
async def get_results(
request: Request,
status: Optional[str] = None,
celebrity_id: Optional[int] = None,
limit: int = 100,
offset: int = 0,
current_user: Dict = Depends(get_current_user)
):
"""Get discovered results with optional filters."""
monitor = _get_monitor()
results = monitor.get_results(
status=status,
celebrity_id=celebrity_id,
limit=limit,
offset=offset,
)
# Get total count
total = monitor.get_result_count(status=status)
return {
"success": True,
"results": results,
"count": len(results),
"total": total
}
@router.post("/results/{result_id}/status")
@limiter.limit("30/minute")
@handle_exceptions
async def update_result_status(
request: Request,
result_id: int,
status: str,
current_user: Dict = Depends(get_current_user)
):
"""Update a result's status (e.g., mark as ignored)."""
valid_statuses = ['new', 'downloaded', 'ignored', 'failed']
if status not in valid_statuses:
raise HTTPException(status_code=400, detail=f"Invalid status. Must be one of: {valid_statuses}")
monitor = _get_monitor()
success = monitor.update_result_status(result_id, status)
return {
"success": success,
"message": f"Status updated to {status}" if success else "Update failed"
}
@router.post("/results/{result_id}/download")
@limiter.limit("10/minute")
@handle_exceptions
async def download_result(
request: Request,
result_id: int,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(get_current_user)
):
"""Start downloading a result."""
monitor = _get_monitor()
def do_download():
return monitor.download_result(result_id)
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(_executor, do_download)
return result
# ============================================================================
# CHECK ENDPOINTS
# ============================================================================
@router.get("/status")
@limiter.limit("60/minute")
@handle_exceptions
async def get_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get current check status."""
monitor = _get_monitor()
status = monitor.get_status()
config = monitor.get_config()
celebrity_count = monitor.get_celebrity_count()
return {
"success": True,
"status": status,
"last_check": config.get('last_check'),
"enabled": config.get('enabled', False),
"has_credentials": config.get('has_credentials', False),
"celebrity_count": celebrity_count,
}
@router.post("/check")
@limiter.limit("5/minute")
@handle_exceptions
async def trigger_check(
request: Request,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(get_current_user)
):
"""Trigger a manual check for all tracked celebrities."""
monitor = _get_monitor()
status = monitor.get_status()
if status.get('is_running'):
return {
"success": False,
"message": "Check already in progress"
}
def do_check():
return monitor.check_all_celebrities()
loop = asyncio.get_event_loop()
background_tasks.add_task(loop.run_in_executor, _executor, do_check)
return {
"success": True,
"message": "Check started"
}
@router.post("/check/{search_id}")
@limiter.limit("5/minute")
@handle_exceptions
async def trigger_single_check(
request: Request,
search_id: int,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(get_current_user)
):
"""Trigger a manual check for a specific search term."""
monitor = _get_monitor()
status = monitor.get_status()
if status.get('is_running'):
return {
"success": False,
"message": "Check already in progress"
}
# Verify search exists
search = monitor.get_search(search_id)
if not search:
raise HTTPException(status_code=404, detail="Search not found")
def do_check():
return monitor.check_single_search(search_id)
loop = asyncio.get_event_loop()
background_tasks.add_task(loop.run_in_executor, _executor, do_check)
return {
"success": True,
"message": f"Check started for: {search['search_term']}"
}
# ============================================================================
# SEARCH MANAGEMENT ENDPOINTS
# ============================================================================
class EasynewsSearchCreate(BaseModel):
search_term: str
media_type: Optional[str] = 'any'
tmdb_id: Optional[int] = None
tmdb_title: Optional[str] = None
poster_url: Optional[str] = None
class EasynewsSearchUpdate(BaseModel):
search_term: Optional[str] = None
media_type: Optional[str] = None
enabled: Optional[bool] = None
tmdb_id: Optional[int] = None
tmdb_title: Optional[str] = None
poster_url: Optional[str] = None
@router.get("/searches")
@limiter.limit("30/minute")
@handle_exceptions
async def get_searches(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get all saved search terms."""
monitor = _get_monitor()
searches = monitor.get_all_searches()
return {
"success": True,
"searches": searches,
"count": len(searches)
}
@router.post("/searches")
@limiter.limit("10/minute")
@handle_exceptions
async def add_search(
request: Request,
search: EasynewsSearchCreate,
current_user: Dict = Depends(get_current_user)
):
"""Add a new search term."""
monitor = _get_monitor()
search_id = monitor.add_search(
search_term=search.search_term,
media_type=search.media_type,
tmdb_id=search.tmdb_id,
tmdb_title=search.tmdb_title,
poster_url=search.poster_url
)
if search_id:
return {
"success": True,
"id": search_id,
"message": f"Search term '{search.search_term}' added"
}
else:
return {
"success": False,
"message": "Failed to add search term"
}
@router.put("/searches/{search_id}")
@limiter.limit("10/minute")
@handle_exceptions
async def update_search(
request: Request,
search_id: int,
updates: EasynewsSearchUpdate,
current_user: Dict = Depends(get_current_user)
):
"""Update an existing search term."""
monitor = _get_monitor()
# Build update kwargs
kwargs = {}
if updates.search_term is not None:
kwargs['search_term'] = updates.search_term
if updates.media_type is not None:
kwargs['media_type'] = updates.media_type
if updates.enabled is not None:
kwargs['enabled'] = updates.enabled
if updates.tmdb_id is not None:
kwargs['tmdb_id'] = updates.tmdb_id
if updates.tmdb_title is not None:
kwargs['tmdb_title'] = updates.tmdb_title
if updates.poster_url is not None:
kwargs['poster_url'] = updates.poster_url
if not kwargs:
return {"success": False, "message": "No updates provided"}
success = monitor.update_search(search_id, **kwargs)
return {
"success": success,
"message": "Search updated" if success else "Update failed"
}
@router.delete("/searches/{search_id}")
@limiter.limit("10/minute")
@handle_exceptions
async def delete_search(
request: Request,
search_id: int,
current_user: Dict = Depends(get_current_user)
):
"""Delete a search term."""
monitor = _get_monitor()
success = monitor.delete_search(search_id)
return {
"success": success,
"message": "Search deleted" if success else "Delete failed"
}

1248
web/backend/routers/face.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,128 @@
"""
File serving and thumbnail generation API
Provides endpoints for:
- On-demand thumbnail generation for images and videos
- File serving with proper caching headers
"""
from typing import Dict
from fastapi import APIRouter, Depends, Query, Request
from fastapi.responses import Response
from PIL import Image
from pathlib import Path
import subprocess
import io
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
from ..core.utils import validate_file_path
from modules.universal_logger import get_logger
router = APIRouter(prefix="/files", tags=["files"])
logger = get_logger('FilesRouter')
limiter = Limiter(key_func=get_remote_address)
@router.get("/thumbnail")
@limiter.limit("300/minute")
@handle_exceptions
async def get_thumbnail(
request: Request,
path: str = Query(..., description="File path"),
current_user: Dict = Depends(get_current_user)
):
"""
Generate and return thumbnail for image or video
Args:
path: Absolute path to file
Returns:
JPEG thumbnail (200x200px max, maintains aspect ratio)
"""
# Validate file is within allowed directories (prevents path traversal)
file_path = validate_file_path(path, require_exists=True)
file_ext = file_path.suffix.lower()
# Generate thumbnail based on type
if file_ext in ['.jpg', '.jpeg', '.png', '.webp', '.gif', '.heic']:
# Image thumbnail with PIL
img = Image.open(file_path)
# Convert HEIC if needed
if file_ext == '.heic':
img = img.convert('RGB')
# Create thumbnail (maintains aspect ratio)
img.thumbnail((200, 200), Image.Resampling.LANCZOS)
# Convert to JPEG
buffer = io.BytesIO()
if img.mode in ('RGBA', 'LA', 'P'):
# Convert transparency to white background
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = background
img.convert('RGB').save(buffer, format='JPEG', quality=85, optimize=True)
buffer.seek(0)
return Response(
content=buffer.read(),
media_type="image/jpeg",
headers={"Cache-Control": "public, max-age=3600"}
)
elif file_ext in ['.mp4', '.webm', '.mov', '.avi', '.mkv']:
# Video thumbnail with ffmpeg
result = subprocess.run(
[
'ffmpeg',
'-ss', '00:00:01', # Seek to 1 second
'-i', str(file_path),
'-vframes', '1', # Extract 1 frame
'-vf', 'scale=200:-1', # Scale to 200px width, maintain aspect
'-f', 'image2pipe', # Output to pipe
'-vcodec', 'mjpeg', # JPEG codec
'pipe:1'
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=10,
check=False
)
if result.returncode != 0:
# Try without seeking (for very short videos)
result = subprocess.run(
[
'ffmpeg',
'-i', str(file_path),
'-vframes', '1',
'-vf', 'scale=200:-1',
'-f', 'image2pipe',
'-vcodec', 'mjpeg',
'pipe:1'
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=10,
check=True
)
return Response(
content=result.stdout,
media_type="image/jpeg",
headers={"Cache-Control": "public, max-age=3600"}
)
else:
raise ValidationError("Unsupported file type")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,436 @@
"""
Instagram Unified Configuration Router
Provides a single configuration interface for all Instagram scrapers.
Manages one central account list with per-account content type toggles,
scraper assignments, and auto-generates legacy per-scraper configs on save.
"""
import copy
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.exceptions import handle_exceptions, ValidationError
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/instagram-unified", tags=["Instagram Unified"])
limiter = Limiter(key_func=get_remote_address)
# Scraper capability matrix
SCRAPER_CAPABILITIES = {
'fastdl': {'posts': True, 'stories': True, 'reels': True, 'tagged': False},
'imginn_api': {'posts': True, 'stories': True, 'reels': False, 'tagged': True},
'imginn': {'posts': True, 'stories': True, 'reels': False, 'tagged': True},
'toolzu': {'posts': True, 'stories': True, 'reels': False, 'tagged': False},
'instagram_client': {'posts': True, 'stories': True, 'reels': True, 'tagged': True},
'instagram': {'posts': True, 'stories': True, 'reels': False, 'tagged': True},
}
SCRAPER_LABELS = {
'fastdl': 'FastDL',
'imginn_api': 'ImgInn API',
'imginn': 'ImgInn',
'toolzu': 'Toolzu',
'instagram_client': 'Instagram Client',
'instagram': 'InstaLoader',
}
CONTENT_TYPES = ['posts', 'stories', 'reels', 'tagged']
LEGACY_SCRAPER_KEYS = ['fastdl', 'imginn_api', 'imginn', 'toolzu', 'instagram_client', 'instagram']
# Default destination paths
DEFAULT_PATHS = {
'posts': '/opt/immich/md/social media/instagram/posts',
'stories': '/opt/immich/md/social media/instagram/stories',
'reels': '/opt/immich/md/social media/instagram/reels',
'tagged': '/opt/immich/md/social media/instagram/tagged',
}
class UnifiedConfigUpdate(BaseModel):
config: Dict[str, Any]
# ============================================================================
# MIGRATION LOGIC
# ============================================================================
def _migrate_from_legacy(app_state) -> Dict[str, Any]:
"""
Build a unified config from existing per-scraper configs.
Called on first load when no instagram_unified key exists.
"""
settings = app_state.settings
# Load all legacy configs
legacy = {}
for key in LEGACY_SCRAPER_KEYS:
legacy[key] = settings.get(key) or {}
# Determine scraper assignments from currently enabled configs
# Only assign a scraper if it actually supports the content type per capability matrix
scraper_assignment = {}
for ct in CONTENT_TYPES:
assigned = None
for scraper_key in LEGACY_SCRAPER_KEYS:
cfg = legacy[scraper_key]
if (cfg.get('enabled') and cfg.get(ct, {}).get('enabled')
and SCRAPER_CAPABILITIES.get(scraper_key, {}).get(ct)):
assigned = scraper_key
break
# Fallback defaults if nothing is enabled — pick first capable scraper
if not assigned:
if ct in ('posts', 'tagged'):
assigned = 'imginn_api'
elif ct in ('stories', 'reels'):
assigned = 'fastdl'
scraper_assignment[ct] = assigned
# Collect all unique usernames from all scrapers
all_usernames = set()
scraper_usernames = {} # track which usernames belong to which scraper
for scraper_key in LEGACY_SCRAPER_KEYS:
cfg = legacy[scraper_key]
usernames = []
if scraper_key == 'instagram':
# InstaLoader uses accounts list format
for acc in cfg.get('accounts', []):
u = acc.get('username')
if u:
usernames.append(u)
else:
usernames = cfg.get('usernames', [])
# Also include phrase_search usernames
ps_usernames = cfg.get('phrase_search', {}).get('usernames', [])
combined = set(usernames) | set(ps_usernames)
scraper_usernames[scraper_key] = combined
all_usernames |= combined
# Build per-account content type flags
# An account gets a content type enabled if:
# - The scraper assigned to that content type has this username in its list
accounts = []
for username in sorted(all_usernames):
account = {'username': username}
for ct in CONTENT_TYPES:
assigned_scraper = scraper_assignment[ct]
# Enable if this user is in the assigned scraper's list
account[ct] = username in scraper_usernames.get(assigned_scraper, set())
accounts.append(account)
# Import content type settings from the first enabled scraper that has them
content_types = {}
for ct in CONTENT_TYPES:
ct_config = {'enabled': False, 'days_back': 7, 'destination_path': DEFAULT_PATHS[ct]}
assigned = scraper_assignment[ct]
cfg = legacy.get(assigned, {})
ct_sub = cfg.get(ct, {})
if ct_sub.get('enabled'):
ct_config['enabled'] = True
if ct_sub.get('days_back'):
ct_config['days_back'] = ct_sub['days_back']
if ct_sub.get('destination_path'):
ct_config['destination_path'] = ct_sub['destination_path']
content_types[ct] = ct_config
# Import phrase search from first scraper that has it enabled
phrase_search = {
'enabled': False,
'download_all': True,
'phrases': [],
'case_sensitive': False,
'match_all': False,
}
for scraper_key in LEGACY_SCRAPER_KEYS:
ps = legacy[scraper_key].get('phrase_search', {})
if ps.get('enabled') or ps.get('phrases'):
phrase_search['enabled'] = ps.get('enabled', False)
phrase_search['download_all'] = ps.get('download_all', True)
phrase_search['phrases'] = ps.get('phrases', [])
phrase_search['case_sensitive'] = ps.get('case_sensitive', False)
phrase_search['match_all'] = ps.get('match_all', False)
break
# Import scraper-specific settings (auth, cookies, etc.)
scraper_settings = {
'fastdl': {},
'imginn_api': {},
'imginn': {'cookie_file': legacy['imginn'].get('cookie_file', '')},
'toolzu': {
'email': legacy['toolzu'].get('email', ''),
'password': legacy['toolzu'].get('password', ''),
'cookie_file': legacy['toolzu'].get('cookie_file', ''),
},
'instagram_client': {},
'instagram': {
'username': legacy['instagram'].get('username', ''),
'password': legacy['instagram'].get('password', ''),
'totp_secret': legacy['instagram'].get('totp_secret', ''),
'session_file': legacy['instagram'].get('session_file', ''),
},
}
# Get global settings from the primary enabled scraper
check_interval = 8
run_at_start = False
user_delay = 20
for scraper_key in ['fastdl', 'imginn_api']:
cfg = legacy[scraper_key]
if cfg.get('enabled'):
check_interval = cfg.get('check_interval_hours', 8)
run_at_start = cfg.get('run_at_start', False)
user_delay = cfg.get('user_delay_seconds', 20)
break
return {
'enabled': True,
'check_interval_hours': check_interval,
'run_at_start': run_at_start,
'user_delay_seconds': user_delay,
'scraper_assignment': scraper_assignment,
'content_types': content_types,
'accounts': accounts,
'phrase_search': phrase_search,
'scraper_settings': scraper_settings,
}
# ============================================================================
# LEGACY CONFIG GENERATION
# ============================================================================
def _generate_legacy_configs(unified: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
"""
From the unified config, generate 6 legacy per-scraper configs
that the existing scraper modules can consume.
"""
scraper_assignment = unified.get('scraper_assignment', {})
content_types = unified.get('content_types', {})
accounts = unified.get('accounts', [])
phrase_search = unified.get('phrase_search', {})
scraper_settings = unified.get('scraper_settings', {})
# For each scraper, determine which content types it's assigned to
scraper_content_types = {key: [] for key in LEGACY_SCRAPER_KEYS}
for ct, scraper_key in scraper_assignment.items():
if scraper_key in scraper_content_types:
scraper_content_types[scraper_key].append(ct)
# For each scraper, collect usernames that have any of its assigned content types enabled
scraper_usernames = {key: set() for key in LEGACY_SCRAPER_KEYS}
for account in accounts:
username = account.get('username', '')
if not username:
continue
for ct, scraper_key in scraper_assignment.items():
if account.get(ct, False):
scraper_usernames[scraper_key].add(username)
# Build legacy configs
result = {}
for scraper_key in LEGACY_SCRAPER_KEYS:
assigned_cts = scraper_content_types[scraper_key]
usernames = sorted(scraper_usernames[scraper_key])
is_enabled = unified.get('enabled', False) and len(assigned_cts) > 0 and len(usernames) > 0
extra_settings = scraper_settings.get(scraper_key, {})
cfg = {
'enabled': is_enabled,
'check_interval_hours': unified.get('check_interval_hours', 8),
'run_at_start': unified.get('run_at_start', False),
}
# Add user_delay_seconds for scrapers that support it
if scraper_key in ('imginn_api', 'instagram_client'):
cfg['user_delay_seconds'] = unified.get('user_delay_seconds', 20)
# Add scraper-specific settings
if scraper_key == 'fastdl':
cfg['high_res'] = True # Always use high resolution
elif scraper_key == 'imginn':
cfg['cookie_file'] = extra_settings.get('cookie_file', '')
elif scraper_key == 'toolzu':
cfg['email'] = extra_settings.get('email', '')
cfg['password'] = extra_settings.get('password', '')
cfg['cookie_file'] = extra_settings.get('cookie_file', '')
# InstaLoader uses accounts format + auth fields at top level
if scraper_key == 'instagram':
ig_settings = extra_settings
cfg['method'] = 'instaloader'
cfg['username'] = ig_settings.get('username', '')
cfg['password'] = ig_settings.get('password', '')
cfg['totp_secret'] = ig_settings.get('totp_secret', '')
cfg['session_file'] = ig_settings.get('session_file', '')
cfg['accounts'] = [
{'username': u, 'check_interval_hours': unified.get('check_interval_hours', 8), 'run_at_start': False}
for u in usernames
]
else:
cfg['usernames'] = usernames
# Content type sub-configs
for ct in CONTENT_TYPES:
ct_global = content_types.get(ct, {})
if ct in assigned_cts:
cfg[ct] = {
'enabled': ct_global.get('enabled', False),
'days_back': ct_global.get('days_back', 7),
'destination_path': ct_global.get('destination_path', DEFAULT_PATHS.get(ct, '')),
}
# Add temp_dir based on scraper key
cfg[ct]['temp_dir'] = f'temp/{scraper_key}/{ct}'
else:
cfg[ct] = {'enabled': False}
# Phrase search goes on the scraper assigned to posts
posts_scraper = scraper_assignment.get('posts')
if scraper_key == posts_scraper and phrase_search.get('enabled'):
# Collect all usernames that have posts enabled for the phrase search usernames list
ps_usernames = sorted(scraper_usernames.get(scraper_key, set()))
cfg['phrase_search'] = {
'enabled': phrase_search.get('enabled', False),
'download_all': phrase_search.get('download_all', True),
'usernames': ps_usernames,
'phrases': phrase_search.get('phrases', []),
'case_sensitive': phrase_search.get('case_sensitive', False),
'match_all': phrase_search.get('match_all', False),
}
else:
cfg['phrase_search'] = {
'enabled': False,
'usernames': [],
'phrases': [],
'case_sensitive': False,
'match_all': False,
}
result[scraper_key] = cfg
return result
# ============================================================================
# ENDPOINTS
# ============================================================================
@router.get("/config")
@handle_exceptions
async def get_config(request: Request, user=Depends(get_current_user)):
"""
Load unified config. Auto-migrates from legacy configs on first load.
Returns config (or generated migration preview), hidden_modules, and scraper capabilities.
"""
app_state = get_app_state()
existing = app_state.settings.get('instagram_unified')
hidden_modules = app_state.settings.get('hidden_modules') or []
if existing:
return {
'config': existing,
'migrated': False,
'hidden_modules': hidden_modules,
'scraper_capabilities': SCRAPER_CAPABILITIES,
'scraper_labels': SCRAPER_LABELS,
}
# No unified config yet — generate migration preview (not auto-saved)
migrated_config = _migrate_from_legacy(app_state)
return {
'config': migrated_config,
'migrated': True,
'hidden_modules': hidden_modules,
'scraper_capabilities': SCRAPER_CAPABILITIES,
'scraper_labels': SCRAPER_LABELS,
}
@router.put("/config")
@handle_exceptions
async def update_config(request: Request, body: UnifiedConfigUpdate, user=Depends(get_current_user)):
"""
Save unified config + generate 6 legacy configs.
"""
app_state = get_app_state()
config = body.config
# Validate scraper assignments against capability matrix
scraper_assignment = config.get('scraper_assignment', {})
for ct, scraper_key in scraper_assignment.items():
if ct not in CONTENT_TYPES:
raise ValidationError(f"Unknown content type: {ct}")
if scraper_key not in SCRAPER_CAPABILITIES:
raise ValidationError(f"Unknown scraper: {scraper_key}")
if not SCRAPER_CAPABILITIES[scraper_key].get(ct):
raise ValidationError(
f"Scraper {SCRAPER_LABELS.get(scraper_key, scraper_key)} does not support {ct}"
)
# Save unified config
app_state.settings.set(
key='instagram_unified',
value=config,
category='scrapers',
description='Unified Instagram configuration',
updated_by='api'
)
# Generate and save 6 legacy configs
legacy_configs = _generate_legacy_configs(config)
for scraper_key, legacy_cfg in legacy_configs.items():
app_state.settings.set(
key=scraper_key,
value=legacy_cfg,
category='scrapers',
description=f'{SCRAPER_LABELS.get(scraper_key, scraper_key)} configuration (auto-generated)',
updated_by='api'
)
# Refresh in-memory config
if hasattr(app_state, 'config') and app_state.config is not None:
app_state.config['instagram_unified'] = config
for scraper_key, legacy_cfg in legacy_configs.items():
app_state.config[scraper_key] = legacy_cfg
# Broadcast config update via WebSocket
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
import asyncio
asyncio.create_task(app_state.websocket_manager.broadcast({
'type': 'config_updated',
'data': {'source': 'instagram_unified'}
}))
except Exception as e:
logger.warning(f"Failed to broadcast config update: {e}", module="InstagramUnified")
return {
'success': True,
'message': 'Instagram configuration saved',
'legacy_configs_updated': list(legacy_configs.keys()),
}
@router.get("/capabilities")
@handle_exceptions
async def get_capabilities(request: Request, user=Depends(get_current_user)):
"""Return scraper capability matrix and hidden modules."""
app_state = get_app_state()
hidden_modules = app_state.settings.get('hidden_modules') or []
return {
'scraper_capabilities': SCRAPER_CAPABILITIES,
'scraper_labels': SCRAPER_LABELS,
'hidden_modules': hidden_modules,
'content_types': CONTENT_TYPES,
}

View File

@@ -0,0 +1,259 @@
"""
Maintenance Router
Handles database maintenance and cleanup operations:
- Scan and remove missing file references
- Database integrity checks
- Orphaned record cleanup
"""
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, List
from fastapi import APIRouter, Depends, Request, BackgroundTasks
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.config import settings
from ..core.responses import now_iso8601
from ..core.exceptions import handle_exceptions
from modules.universal_logger import get_logger
logger = get_logger('Maintenance')
router = APIRouter(prefix="/api/maintenance", tags=["Maintenance"])
limiter = Limiter(key_func=get_remote_address)
# Whitelist of allowed table/column combinations for cleanup operations
# This prevents SQL injection by only allowing known-safe identifiers
ALLOWED_CLEANUP_TABLES = {
"file_inventory": "file_path",
"downloads": "file_path",
"youtube_downloads": "file_path",
"video_downloads": "file_path",
"face_recognition_scans": "file_path",
"face_recognition_references": "reference_image_path",
"discovery_scan_queue": "file_path",
"recycle_bin": "recycle_path",
}
# Pre-built SQL queries for each allowed table (avoids any string interpolation)
# Uses 'id' instead of 'rowid' (PostgreSQL does not have rowid)
# Uses information_schema for table existence checks (PostgreSQL)
_CLEANUP_QUERIES = {
table: {
"check_exists": "SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_name=?",
"select": f"SELECT id, {col} FROM {table} WHERE {col} IS NOT NULL AND {col} != ''",
"delete": f"DELETE FROM {table} WHERE id IN ",
}
for table, col in ALLOWED_CLEANUP_TABLES.items()
}
# Store last scan results
last_scan_result = None
@router.post("/cleanup/missing-files")
@limiter.limit("5/hour")
@handle_exceptions
async def cleanup_missing_files(
request: Request,
background_tasks: BackgroundTasks,
dry_run: bool = True,
current_user: Dict = Depends(get_current_user)
):
"""
Scan all database tables for file references and remove entries for missing files.
Args:
dry_run: If True, only report what would be deleted (default: True)
Returns:
Summary of files found and removed
"""
app_state = get_app_state()
user_id = current_user.get('sub', 'unknown')
logger.info(f"Database cleanup started by {user_id} (dry_run={dry_run})", module="Maintenance")
# Run cleanup in background
background_tasks.add_task(
_cleanup_missing_files_task,
app_state,
dry_run,
user_id
)
return {
"status": "started",
"dry_run": dry_run,
"message": "Cleanup scan started in background. Check /api/maintenance/cleanup/status for progress.",
"timestamp": now_iso8601()
}
@router.get("/cleanup/status")
@limiter.limit("60/minute")
@handle_exceptions
async def get_cleanup_status(request: Request, current_user: Dict = Depends(get_current_user)):
"""Get the status and results of the last cleanup scan"""
global last_scan_result
if last_scan_result is None:
return {
"status": "no_scan",
"message": "No cleanup scan has been run yet"
}
return last_scan_result
async def _cleanup_missing_files_task(app_state, dry_run: bool, user_id: str):
"""Background task to scan and cleanup missing files"""
global last_scan_result
start_time = datetime.now()
# Initialize result tracking
result = {
"status": "running",
"started_at": start_time.isoformat(),
"dry_run": dry_run,
"user": user_id,
"tables_scanned": {},
"total_checked": 0,
"total_missing": 0,
"total_removed": 0
}
try:
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
# Define tables and their file path columns
# NOTE: instagram_perceptual_hashes is excluded because the hash data
# is valuable for duplicate detection even if the original file is gone
tables_to_scan = [
("file_inventory", "file_path"),
("downloads", "file_path"),
("youtube_downloads", "file_path"),
("video_downloads", "file_path"),
("face_recognition_scans", "file_path"),
("face_recognition_references", "reference_image_path"),
("discovery_scan_queue", "file_path"),
("recycle_bin", "recycle_path"),
]
for table_name, column_name in tables_to_scan:
logger.info(f"Scanning {table_name}.{column_name}...", module="Maintenance")
table_result = _scan_table(cursor, table_name, column_name, dry_run)
result["tables_scanned"][table_name] = table_result
result["total_checked"] += table_result["checked"]
result["total_missing"] += table_result["missing"]
result["total_removed"] += table_result["removed"]
# Commit if not dry run
if not dry_run:
conn.commit()
logger.info(f"Cleanup completed: removed {result['total_removed']} records", module="Maintenance")
else:
logger.info(f"Dry run completed: {result['total_missing']} records would be removed", module="Maintenance")
# Update result
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
result.update({
"status": "completed",
"completed_at": end_time.isoformat(),
"duration_seconds": round(duration, 2)
})
except Exception as e:
logger.error(f"Cleanup failed: {e}", module="Maintenance", exc_info=True)
result.update({
"status": "failed",
"error": str(e),
"completed_at": datetime.now().isoformat()
})
last_scan_result = result
def _scan_table(cursor, table_name: str, column_name: str, dry_run: bool) -> Dict:
"""Scan a table for missing files and optionally remove them.
Uses pre-built queries from _CLEANUP_QUERIES to prevent SQL injection.
Only tables in ALLOWED_CLEANUP_TABLES whitelist are permitted.
"""
result = {
"checked": 0,
"missing": 0,
"removed": 0,
"missing_files": []
}
# Validate table/column against whitelist to prevent SQL injection
if table_name not in ALLOWED_CLEANUP_TABLES:
logger.error(f"Table {table_name} not in allowed whitelist", module="Maintenance")
result["error"] = f"Table {table_name} not allowed"
return result
if ALLOWED_CLEANUP_TABLES[table_name] != column_name:
logger.error(f"Column {column_name} not allowed for table {table_name}", module="Maintenance")
result["error"] = f"Column {column_name} not allowed for table {table_name}"
return result
# Get pre-built queries for this table (built at module load time, not from user input)
queries = _CLEANUP_QUERIES[table_name]
try:
# Check if table exists using parameterized query
cursor.execute(queries["check_exists"], (table_name,))
if not cursor.fetchone():
logger.warning(f"Table {table_name} does not exist", module="Maintenance")
return result
# Get all file paths from table using pre-built query
cursor.execute(queries["select"])
rows = cursor.fetchall()
result["checked"] = len(rows)
missing_rowids = []
for rowid, file_path in rows:
if file_path and not os.path.exists(file_path):
result["missing"] += 1
missing_rowids.append(rowid)
# Only keep first 100 examples
if len(result["missing_files"]) < 100:
result["missing_files"].append(file_path)
# Remove missing entries if not dry run
if not dry_run and missing_rowids:
# Delete in batches of 100 using pre-built query base
delete_base = queries["delete"]
for i in range(0, len(missing_rowids), 100):
batch = missing_rowids[i:i+100]
placeholders = ','.join('?' * len(batch))
# The delete_base is pre-built from whitelist, placeholders are just ?
cursor.execute(f"{delete_base}({placeholders})", batch)
result["removed"] += len(batch)
logger.info(
f" {table_name}: checked={result['checked']}, missing={result['missing']}, "
f"{'would_remove' if dry_run else 'removed'}={result['missing']}",
module="Maintenance"
)
except Exception as e:
logger.error(f"Error scanning {table_name}: {e}", module="Maintenance", exc_info=True)
result["error"] = str(e)
return result

View File

@@ -0,0 +1,669 @@
"""
Manual Import Router
Handles manual file import operations:
- Service configuration
- File upload to temp directory
- Filename parsing
- Processing and moving to final destination (async background processing)
"""
import asyncio
import shutil
import uuid
from datetime import datetime
from pathlib import Path
from threading import Lock
from typing import Dict, List, Optional
from fastapi import APIRouter, BackgroundTasks, Depends, File, Form, Request, UploadFile
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_app_state
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
from modules.filename_parser import FilenameParser, get_preset_patterns, parse_with_fallbacks, INSTAGRAM_PATTERNS
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/manual-import", tags=["Manual Import"])
limiter = Limiter(key_func=get_remote_address)
# ============================================================================
# JOB TRACKING FOR BACKGROUND PROCESSING
# ============================================================================
# In-memory job tracking (jobs are transient - cleared on restart)
_import_jobs: Dict[str, Dict] = {}
_jobs_lock = Lock()
def get_job_status(job_id: str) -> Optional[Dict]:
"""Get the current status of an import job."""
with _jobs_lock:
return _import_jobs.get(job_id)
def update_job_status(job_id: str, updates: Dict):
"""Update an import job's status."""
with _jobs_lock:
if job_id in _import_jobs:
_import_jobs[job_id].update(updates)
def create_job(job_id: str, total_files: int, service_name: str):
"""Create a new import job."""
with _jobs_lock:
_import_jobs[job_id] = {
'id': job_id,
'status': 'processing',
'service_name': service_name,
'total_files': total_files,
'processed_files': 0,
'success_count': 0,
'failed_count': 0,
'results': [],
'current_file': None,
'started_at': datetime.now().isoformat(),
'completed_at': None
}
def cleanup_old_jobs():
"""Remove jobs older than 1 hour."""
with _jobs_lock:
now = datetime.now()
to_remove = []
for job_id, job in _import_jobs.items():
if job.get('completed_at'):
try:
completed = datetime.fromisoformat(job['completed_at'])
if (now - completed).total_seconds() > 3600: # 1 hour
to_remove.append(job_id)
except (ValueError, TypeError):
pass
for job_id in to_remove:
del _import_jobs[job_id]
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class ParseFilenameRequest(BaseModel):
filename: str
pattern: str
class FileInfo(BaseModel):
filename: str
temp_path: str
manual_datetime: Optional[str] = None
manual_username: Optional[str] = None
class ProcessFilesRequest(BaseModel):
service_name: str
files: List[dict]
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def extract_youtube_metadata(video_id: str) -> Optional[Dict]:
"""Extract metadata from YouTube video using yt-dlp."""
import subprocess
import json
try:
result = subprocess.run(
[
'/opt/media-downloader/venv/bin/yt-dlp',
'--dump-json',
'--no-download',
'--no-warnings',
f'https://www.youtube.com/watch?v={video_id}'
],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0:
return None
metadata = json.loads(result.stdout)
upload_date = None
if 'upload_date' in metadata and metadata['upload_date']:
try:
upload_date = datetime.strptime(metadata['upload_date'], '%Y%m%d')
except ValueError:
pass
return {
'title': metadata.get('title', ''),
'uploader': metadata.get('uploader', metadata.get('channel', '')),
'channel': metadata.get('channel', metadata.get('uploader', '')),
'upload_date': upload_date,
'duration': metadata.get('duration'),
'view_count': metadata.get('view_count'),
'description': metadata.get('description', '')[:200] if metadata.get('description') else None
}
except Exception as e:
logger.warning(f"Failed to extract YouTube metadata for {video_id}: {e}", module="ManualImport")
return None
def extract_video_id_from_filename(filename: str) -> Optional[str]:
"""Try to extract YouTube video ID from filename."""
import re
name = Path(filename).stem
# Pattern 1: ID in brackets [ID]
bracket_match = re.search(r'\[([A-Za-z0-9_-]{11})\]', name)
if bracket_match:
return bracket_match.group(1)
# Pattern 2: ID at end after underscore
underscore_match = re.search(r'_([A-Za-z0-9_-]{11})$', name)
if underscore_match:
return underscore_match.group(1)
# Pattern 3: Just the ID (filename is exactly 11 chars)
if re.match(r'^[A-Za-z0-9_-]{11}$', name):
return name
# Pattern 4: ID somewhere in the filename
id_match = re.search(r'(?:^|[_\-\s])([A-Za-z0-9_-]{11})(?:[_\-\s.]|$)', name)
if id_match:
return id_match.group(1)
return None
# ============================================================================
# ENDPOINTS
# ============================================================================
@router.get("/services")
@limiter.limit("60/minute")
@handle_exceptions
async def get_manual_import_services(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get configured manual import services."""
app_state = get_app_state()
config = app_state.settings.get('manual_import')
if not config:
return {
"enabled": False,
"temp_dir": "/opt/media-downloader/temp/manual_import",
"services": [],
"preset_patterns": get_preset_patterns()
}
config['preset_patterns'] = get_preset_patterns()
return config
@router.post("/parse")
@limiter.limit("100/minute")
@handle_exceptions
async def parse_filename(
request: Request,
body: ParseFilenameRequest,
current_user: Dict = Depends(get_current_user)
):
"""Parse a filename using a pattern and return extracted metadata."""
try:
parser = FilenameParser(body.pattern)
result = parser.parse(body.filename)
if result['datetime']:
result['datetime'] = result['datetime'].isoformat()
return result
except Exception as e:
logger.error(f"Error parsing filename: {e}", module="ManualImport")
return {
"valid": False,
"error": str(e),
"username": None,
"datetime": None,
"media_id": None
}
# File upload constants
MAX_FILE_SIZE = 5 * 1024 * 1024 * 1024 # 5GB max file size
MAX_FILENAME_LENGTH = 255
ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.mp4', '.mov', '.avi', '.mkv', '.webm', '.webp', '.heic', '.heif'}
@router.post("/upload")
@limiter.limit("30/minute")
@handle_exceptions
async def upload_files_for_import(
request: Request,
files: List[UploadFile] = File(...),
service_name: str = Form(...),
current_user: Dict = Depends(get_current_user)
):
"""Upload files to temp directory for manual import."""
app_state = get_app_state()
config = app_state.settings.get('manual_import')
if not config or not config.get('enabled'):
raise ValidationError("Manual import is not enabled")
services = config.get('services', [])
service = next((s for s in services if s['name'] == service_name and s.get('enabled', True)), None)
if not service:
raise NotFoundError(f"Service '{service_name}' not found or disabled")
session_id = str(uuid.uuid4())[:8]
temp_base = Path(config.get('temp_dir', '/opt/media-downloader/temp/manual_import'))
temp_dir = temp_base / session_id
temp_dir.mkdir(parents=True, exist_ok=True)
pattern = service.get('filename_pattern', '{username}_{YYYYMMDD}_{HHMMSS}_{id}')
platform = service.get('platform', 'unknown')
# Use fallback patterns for Instagram (handles both underscore and dash formats)
use_fallbacks = platform == 'instagram'
parser = FilenameParser(pattern) if not use_fallbacks else None
uploaded_files = []
for file in files:
# Sanitize filename - use only the basename to prevent path traversal
safe_filename = Path(file.filename).name
# Validate filename length
if len(safe_filename) > MAX_FILENAME_LENGTH:
raise ValidationError(f"Filename too long: {safe_filename[:50]}... (max {MAX_FILENAME_LENGTH} chars)")
# Validate file extension
file_ext = Path(safe_filename).suffix.lower()
if file_ext not in ALLOWED_EXTENSIONS:
raise ValidationError(f"File type not allowed: {file_ext}. Allowed: {', '.join(sorted(ALLOWED_EXTENSIONS))}")
file_path = temp_dir / safe_filename
content = await file.read()
# Validate file size
if len(content) > MAX_FILE_SIZE:
raise ValidationError(f"File too large: {safe_filename} ({len(content) / (1024*1024*1024):.2f}GB, max {MAX_FILE_SIZE / (1024*1024*1024)}GB)")
with open(file_path, 'wb') as f:
f.write(content)
# Parse filename - use fallback patterns for Instagram
if use_fallbacks:
parse_result = parse_with_fallbacks(file.filename, INSTAGRAM_PATTERNS)
else:
parse_result = parser.parse(file.filename)
parsed_datetime = None
if parse_result['datetime']:
parsed_datetime = parse_result['datetime'].isoformat()
uploaded_files.append({
"filename": file.filename,
"temp_path": str(file_path),
"size": len(content),
"parsed": {
"valid": parse_result['valid'],
"username": parse_result['username'],
"datetime": parsed_datetime,
"media_id": parse_result['media_id'],
"error": parse_result['error']
}
})
logger.info(f"Uploaded {len(uploaded_files)} files for manual import (service: {service_name})", module="ManualImport")
return {
"session_id": session_id,
"service_name": service_name,
"files": uploaded_files,
"temp_dir": str(temp_dir)
}
def process_files_background(
job_id: str,
service_name: str,
files: List[dict],
service: dict,
app_state
):
"""Background task to process imported files."""
import hashlib
from modules.move_module import MoveManager
destination = Path(service['destination'])
destination.mkdir(parents=True, exist_ok=True)
pattern = service.get('filename_pattern', '{username}_{YYYYMMDD}_{HHMMSS}_{id}')
platform = service.get('platform', 'unknown')
content_type = service.get('content_type', 'videos')
use_ytdlp = service.get('use_ytdlp', False)
parse_filename = service.get('parse_filename', True)
# Use fallback patterns for Instagram
use_fallbacks = platform == 'instagram'
parser = FilenameParser(pattern) if not use_fallbacks else None
# Generate session ID for real-time monitoring
session_id = f"manual_import_{service_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Emit scraper_started event
if app_state.scraper_event_emitter:
app_state.scraper_event_emitter.emit_scraper_started(
session_id=session_id,
platform=platform,
account=service_name,
content_type=content_type,
estimated_count=len(files)
)
move_manager = MoveManager(
unified_db=app_state.db,
face_recognition_enabled=False,
notifier=None,
event_emitter=app_state.scraper_event_emitter
)
move_manager.set_session_context(
platform=platform,
account=service_name,
session_id=session_id
)
results = []
success_count = 0
failed_count = 0
for idx, file_info in enumerate(files):
temp_path = Path(file_info['temp_path'])
filename = file_info['filename']
manual_datetime_str = file_info.get('manual_datetime')
manual_username = file_info.get('manual_username')
# Update job status with current file
update_job_status(job_id, {
'current_file': filename,
'processed_files': idx
})
if not temp_path.exists():
result = {"filename": filename, "status": "error", "error": "File not found in temp directory"}
results.append(result)
failed_count += 1
update_job_status(job_id, {'results': results.copy(), 'failed_count': failed_count})
continue
username = 'unknown'
parsed_datetime = None
final_filename = filename
# Use manual values if provided
if not parse_filename or manual_datetime_str or manual_username:
if manual_username:
username = manual_username.strip().lower()
if manual_datetime_str:
try:
parsed_datetime = datetime.strptime(manual_datetime_str, '%Y-%m-%dT%H:%M')
except ValueError:
try:
parsed_datetime = datetime.fromisoformat(manual_datetime_str)
except ValueError:
logger.warning(f"Could not parse manual datetime: {manual_datetime_str}", module="ManualImport")
# Try yt-dlp for YouTube videos
if use_ytdlp and platform == 'youtube':
video_id = extract_video_id_from_filename(filename)
if video_id:
logger.info(f"Extracting YouTube metadata for video ID: {video_id}", module="ManualImport")
yt_metadata = extract_youtube_metadata(video_id)
if yt_metadata:
username = yt_metadata.get('channel') or yt_metadata.get('uploader') or 'unknown'
username = "".join(c for c in username if c.isalnum() or c in ' _-').strip().replace(' ', '_')
parsed_datetime = yt_metadata.get('upload_date')
if yt_metadata.get('title'):
title = yt_metadata['title'][:50]
title = "".join(c for c in title if c.isalnum() or c in ' _-').strip().replace(' ', '_')
ext = Path(filename).suffix
final_filename = f"{username}_{parsed_datetime.strftime('%Y%m%d') if parsed_datetime else 'unknown'}_{title}_{video_id}{ext}"
# Fall back to filename parsing
if parse_filename and username == 'unknown':
if use_fallbacks:
parse_result = parse_with_fallbacks(filename, INSTAGRAM_PATTERNS)
else:
parse_result = parser.parse(filename)
if parse_result['valid']:
username = parse_result['username'] or 'unknown'
parsed_datetime = parse_result['datetime']
elif not use_ytdlp:
result = {"filename": filename, "status": "error", "error": parse_result['error'] or "Failed to parse filename"}
results.append(result)
failed_count += 1
update_job_status(job_id, {'results': results.copy(), 'failed_count': failed_count})
continue
dest_subdir = destination / username
dest_subdir.mkdir(parents=True, exist_ok=True)
dest_path = dest_subdir / final_filename
move_manager.start_batch(
platform=platform,
source=username,
content_type=content_type
)
file_size = temp_path.stat().st_size if temp_path.exists() else 0
try:
success = move_manager.move_file(
source=temp_path,
destination=dest_path,
timestamp=parsed_datetime,
preserve_if_no_timestamp=True,
content_type=content_type
)
move_manager.end_batch()
if success:
url_hash = hashlib.sha256(f"manual_import:{final_filename}".encode()).hexdigest()
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO downloads
(url_hash, url, platform, source, content_type, filename, file_path,
file_size, file_hash, post_date, download_date, status, media_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'completed', ?)
""", (
url_hash,
f"manual_import://{final_filename}",
platform,
username,
content_type,
final_filename,
str(dest_path),
file_size,
None,
parsed_datetime.isoformat() if parsed_datetime else None,
datetime.now().isoformat(),
None
))
conn.commit()
result = {
"filename": filename,
"status": "success",
"destination": str(dest_path),
"username": username,
"datetime": parsed_datetime.isoformat() if parsed_datetime else None
}
results.append(result)
success_count += 1
else:
result = {"filename": filename, "status": "error", "error": "Failed to move file (possibly duplicate)"}
results.append(result)
failed_count += 1
except Exception as e:
move_manager.end_batch()
result = {"filename": filename, "status": "error", "error": str(e)}
results.append(result)
failed_count += 1
# Update job status after each file
update_job_status(job_id, {
'results': results.copy(),
'success_count': success_count,
'failed_count': failed_count,
'processed_files': idx + 1
})
# Clean up temp directory
try:
temp_parent = Path(files[0]['temp_path']).parent if files else None
if temp_parent and temp_parent.exists():
for f in temp_parent.iterdir():
f.unlink()
temp_parent.rmdir()
except Exception:
pass
# Emit scraper_completed event
if app_state.scraper_event_emitter:
app_state.scraper_event_emitter.emit_scraper_completed(
session_id=session_id,
stats={
'total_downloaded': len(files),
'moved': success_count,
'review': 0,
'duplicates': 0,
'failed': failed_count
}
)
# Mark job as complete
update_job_status(job_id, {
'status': 'completed',
'completed_at': datetime.now().isoformat(),
'current_file': None
})
logger.info(f"Manual import complete: {success_count} succeeded, {failed_count} failed", module="ManualImport")
# Cleanup old jobs
cleanup_old_jobs()
@router.post("/process")
@limiter.limit("10/minute")
@handle_exceptions
async def process_imported_files(
request: Request,
background_tasks: BackgroundTasks,
body: ProcessFilesRequest,
current_user: Dict = Depends(get_current_user)
):
"""Process uploaded files in the background - returns immediately with job ID."""
app_state = get_app_state()
config = app_state.settings.get('manual_import')
if not config or not config.get('enabled'):
raise ValidationError("Manual import is not enabled")
services = config.get('services', [])
service = next((s for s in services if s['name'] == body.service_name and s.get('enabled', True)), None)
if not service:
raise NotFoundError(f"Service '{body.service_name}' not found")
# Generate unique job ID
job_id = f"import_{uuid.uuid4().hex[:12]}"
# Create job tracking entry
create_job(job_id, len(body.files), body.service_name)
# Queue background processing
background_tasks.add_task(
process_files_background,
job_id,
body.service_name,
body.files,
service,
app_state
)
logger.info(f"Manual import job {job_id} queued: {len(body.files)} files for {body.service_name}", module="ManualImport")
return {
"job_id": job_id,
"status": "processing",
"total_files": len(body.files),
"message": "Processing started in background"
}
@router.get("/status/{job_id}")
@limiter.limit("120/minute")
@handle_exceptions
async def get_import_job_status(
request: Request,
job_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Get the status of a manual import job."""
job = get_job_status(job_id)
if not job:
raise NotFoundError(f"Job '{job_id}' not found")
return job
@router.delete("/temp")
@limiter.limit("10/minute")
@handle_exceptions
async def clear_temp_directory(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Clear all files from manual import temp directory."""
app_state = get_app_state()
config = app_state.settings.get('manual_import')
temp_dir = Path(config.get('temp_dir', '/opt/media-downloader/temp/manual_import')) if config else Path('/opt/media-downloader/temp/manual_import')
if temp_dir.exists():
shutil.rmtree(temp_dir)
temp_dir.mkdir(parents=True, exist_ok=True)
logger.info("Cleared manual import temp directory", module="ManualImport")
return {"status": "success", "message": "Temp directory cleared"}
@router.get("/preset-patterns")
@limiter.limit("60/minute")
@handle_exceptions
async def get_preset_filename_patterns(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get available preset filename patterns."""
return {"patterns": get_preset_patterns()}

1404
web/backend/routers/media.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1098
web/backend/routers/press.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,602 @@
"""
Recycle Bin Router
Handles all recycle bin operations:
- List deleted files
- Recycle bin statistics
- Restore files
- Permanently delete files
- Empty recycle bin
- Serve files for preview
- Get file metadata
"""
import hashlib
import json
import mimetypes
import sqlite3
from typing import Dict, Optional
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Body, Query, Request
from fastapi.responses import FileResponse, Response
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, get_current_user_media, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
handle_exceptions,
DatabaseError,
RecordNotFoundError,
MediaFileNotFoundError as CustomFileNotFoundError,
FileOperationError
)
from ..core.responses import now_iso8601
from ..core.utils import ThumbnailLRUCache
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/recycle", tags=["Recycle Bin"])
limiter = Limiter(key_func=get_remote_address)
# Global thumbnail memory cache for recycle bin (500 items or 100MB max)
# Using shared ThumbnailLRUCache from core/utils.py
_thumbnail_cache = ThumbnailLRUCache(max_size=500, max_memory_mb=100)
@router.get("/list")
@limiter.limit("100/minute")
@handle_exceptions
async def list_recycle_bin(
request: Request,
current_user: Dict = Depends(get_current_user),
deleted_from: Optional[str] = None,
platform: Optional[str] = None,
source: Optional[str] = None,
search: Optional[str] = None,
media_type: Optional[str] = None,
date_from: Optional[str] = None,
date_to: Optional[str] = None,
size_min: Optional[int] = None,
size_max: Optional[int] = None,
sort_by: str = Query('download_date', pattern='^(deleted_at|file_size|filename|deleted_from|download_date|post_date|confidence)$'),
sort_order: str = Query('desc', pattern='^(asc|desc)$'),
limit: int = Query(100, ge=1, le=1000),
offset: int = Query(0, ge=0)
):
"""
List files in recycle bin.
Args:
deleted_from: Filter by source (downloads, media, review)
platform: Filter by platform (instagram, tiktok, etc.)
source: Filter by source/username
search: Search in filename
media_type: Filter by type (image, video)
date_from: Filter by deletion date (YYYY-MM-DD)
date_to: Filter by deletion date (YYYY-MM-DD)
size_min: Minimum file size in bytes
size_max: Maximum file size in bytes
sort_by: Column to sort by
sort_order: Sort direction (asc, desc)
limit: Maximum items to return
offset: Number of items to skip
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
result = db.list_recycle_bin(
deleted_from=deleted_from,
platform=platform,
source=source,
search=search,
media_type=media_type,
date_from=date_from,
date_to=date_to,
size_min=size_min,
size_max=size_max,
sort_by=sort_by,
sort_order=sort_order,
limit=limit,
offset=offset
)
return {
"success": True,
"items": result['items'],
"total": result['total']
}
@router.get("/filters")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_filters(
request: Request,
current_user: Dict = Depends(get_current_user),
platform: Optional[str] = None
):
"""
Get available filter options for recycle bin.
Args:
platform: If provided, only return sources for this platform
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
filters = db.get_recycle_bin_filters(platform=platform)
return {
"success": True,
"platforms": filters['platforms'],
"sources": filters['sources']
}
@router.get("/stats")
@limiter.limit("100/minute")
@handle_exceptions
async def get_recycle_bin_stats(request: Request, current_user: Dict = Depends(get_current_user)):
"""
Get recycle bin statistics.
Returns total count, total size, and breakdown by deleted_from source.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
stats = db.get_recycle_bin_stats()
return {
"success": True,
"stats": stats,
"timestamp": now_iso8601()
}
@router.post("/restore")
@limiter.limit("20/minute")
@handle_exceptions
async def restore_from_recycle(
request: Request,
current_user: Dict = Depends(get_current_user),
recycle_id: str = Body(..., embed=True)
):
"""
Restore a file from recycle bin to its original location.
The file will be moved back to its original path and re-registered
in the file_inventory table.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
success = db.restore_from_recycle_bin(recycle_id)
if success:
# Broadcast update to connected clients
try:
# app_state already retrieved above, use it for websocket broadcast
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_restore_completed",
"recycle_id": recycle_id,
"timestamp": now_iso8601()
})
except Exception:
pass # Broadcasting is optional
logger.info(f"Restored file from recycle bin: {recycle_id}", module="Recycle")
return {
"success": True,
"message": "File restored successfully",
"recycle_id": recycle_id
}
else:
raise FileOperationError(
"Failed to restore file",
{"recycle_id": recycle_id}
)
@router.delete("/delete/{recycle_id}")
@limiter.limit("20/minute")
@handle_exceptions
async def permanently_delete_from_recycle(
request: Request,
recycle_id: str,
current_user: Dict = Depends(require_admin)
):
"""
Permanently delete a file from recycle bin.
**Admin only** - This action cannot be undone. The file will be
removed from disk permanently.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
success = db.permanently_delete_from_recycle_bin(recycle_id)
if success:
# Broadcast update
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_delete_completed",
"recycle_id": recycle_id,
"timestamp": now_iso8601()
})
except Exception:
pass
logger.info(f"Permanently deleted file from recycle: {recycle_id}", module="Recycle")
return {
"success": True,
"message": "File permanently deleted",
"recycle_id": recycle_id
}
else:
raise FileOperationError(
"Failed to delete file",
{"recycle_id": recycle_id}
)
@router.post("/empty")
@limiter.limit("5/minute")
@handle_exceptions
async def empty_recycle_bin(
request: Request,
current_user: Dict = Depends(require_admin), # Require admin for destructive operation
older_than_days: Optional[int] = Body(None, embed=True)
):
"""
Empty recycle bin.
Args:
older_than_days: Only delete files older than X days.
If not specified, all files are deleted.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
deleted_count = db.empty_recycle_bin(older_than_days=older_than_days)
# Broadcast update
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "recycle_emptied",
"deleted_count": deleted_count,
"timestamp": now_iso8601()
})
except Exception:
pass
logger.info(f"Emptied recycle bin: {deleted_count} files deleted", module="Recycle")
return {
"success": True,
"deleted_count": deleted_count,
"older_than_days": older_than_days
}
@router.get("/file/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_file(
request: Request,
recycle_id: str,
thumbnail: bool = False,
type: Optional[str] = None,
token: Optional[str] = None,
current_user: Dict = Depends(get_current_user_media)
):
"""
Serve a file from recycle bin for preview.
Args:
recycle_id: ID of the recycle bin record
thumbnail: If True, return a thumbnail instead of the full file
type: Media type hint (image/video)
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
# Get recycle bin record
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute(
'SELECT recycle_path, original_path, original_filename, file_hash FROM recycle_bin WHERE id = ?',
(recycle_id,)
)
row = cursor.fetchone()
if not row:
raise RecordNotFoundError(
"File not found in recycle bin",
{"recycle_id": recycle_id}
)
file_path = Path(row['recycle_path'])
original_path = row['original_path'] # Path where thumbnail was originally cached
if not file_path.exists():
raise CustomFileNotFoundError(
"Physical file not found",
{"path": str(file_path)}
)
# If thumbnail requested, use 3-tier caching
# Use content hash as cache key so thumbnails survive file moves
if thumbnail:
content_hash = row['file_hash']
cache_key = content_hash if content_hash else str(file_path)
# 1. Check in-memory LRU cache first (fastest)
thumbnail_data = _thumbnail_cache.get(cache_key)
if thumbnail_data:
return Response(
content=thumbnail_data,
media_type="image/jpeg",
headers={
"Cache-Control": "public, max-age=86400, immutable",
"Vary": "Accept-Encoding"
}
)
# 2. Get from database cache or generate on-demand
# Pass content hash and original_path for fallback lookup
thumbnail_data = _get_or_create_thumbnail(file_path, type or 'image', content_hash, original_path)
if not thumbnail_data:
raise FileOperationError("Failed to generate thumbnail")
# 3. Add to in-memory cache for faster subsequent requests
_thumbnail_cache.put(cache_key, thumbnail_data)
return Response(
content=thumbnail_data,
media_type="image/jpeg",
headers={
"Cache-Control": "public, max-age=86400, immutable",
"Vary": "Accept-Encoding"
}
)
# Otherwise serve full file
mime_type, _ = mimetypes.guess_type(str(file_path))
if not mime_type:
mime_type = "application/octet-stream"
return FileResponse(
path=str(file_path),
media_type=mime_type,
filename=row['original_filename']
)
@router.get("/metadata/{recycle_id}")
@limiter.limit("5000/minute")
@handle_exceptions
async def get_recycle_metadata(
request: Request,
recycle_id: str,
current_user: Dict = Depends(get_current_user)
):
"""
Get metadata for a recycle bin file.
Returns dimensions, size, platform, source, and other metadata.
This is fetched on-demand for performance.
"""
app_state = get_app_state()
db = app_state.db
if not db:
raise DatabaseError("Database not initialized")
# Get recycle bin record
with db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT recycle_path, original_filename, file_size, original_path, metadata
FROM recycle_bin WHERE id = ?
''', (recycle_id,))
row = cursor.fetchone()
if not row:
raise RecordNotFoundError(
"File not found in recycle bin",
{"recycle_id": recycle_id}
)
recycle_path = Path(row['recycle_path'])
if not recycle_path.exists():
raise CustomFileNotFoundError(
"Physical file not found",
{"path": str(recycle_path)}
)
# Parse metadata for platform/source info
platform, source = None, None
try:
metadata = json.loads(row['metadata']) if row['metadata'] else {}
platform = metadata.get('platform')
source = metadata.get('source')
except Exception:
pass
# Get dimensions dynamically
width, height, duration = _extract_dimensions(recycle_path)
return {
"success": True,
"recycle_id": recycle_id,
"filename": row['original_filename'],
"file_size": row['file_size'],
"platform": platform,
"source": source,
"width": width,
"height": height,
"duration": duration
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _get_or_create_thumbnail(file_path: Path, media_type: str, content_hash: str = None, original_path: str = None) -> Optional[bytes]:
"""
Get or create a thumbnail for a file.
Uses the same caching system as media.py for consistency.
Uses a 2-step lookup for backwards compatibility:
1. Try content hash (new method - survives file moves)
2. Fall back to original_path lookup (legacy thumbnails cached before move)
Args:
file_path: Path to the file (current location in recycle bin)
media_type: 'image' or 'video'
content_hash: Optional content hash (SHA256 of file content) to use for cache lookup.
original_path: Optional original file path before moving to recycle bin.
"""
from PIL import Image
import io
from datetime import datetime
try:
with sqlite3.connect('thumbnails', timeout=30.0) as conn:
cursor = conn.cursor()
# 1. Try content hash first (new method - survives file moves)
if content_hash:
cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_hash = ?", (content_hash,))
result = cursor.fetchone()
if result:
return result[0]
# 2. Fall back to original_path lookup (legacy thumbnails cached before move)
if original_path:
cursor.execute("SELECT thumbnail_data FROM thumbnails WHERE file_path = ?", (original_path,))
result = cursor.fetchone()
if result:
return result[0]
except Exception:
pass
# Generate thumbnail
thumbnail_data = None
try:
if media_type == 'video':
# For videos, try to extract a frame
import subprocess
result = subprocess.run([
'ffmpeg', '-i', str(file_path),
'-ss', '00:00:01', '-vframes', '1',
'-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
], capture_output=True, timeout=10)
if result.returncode == 0:
img = Image.open(io.BytesIO(result.stdout))
else:
return None
else:
img = Image.open(file_path)
# Convert to RGB if necessary
if img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
# Create thumbnail
img.thumbnail((300, 300), Image.Resampling.LANCZOS)
# Save to bytes
output = io.BytesIO()
img.save(output, format='JPEG', quality=85)
thumbnail_data = output.getvalue()
# Cache the generated thumbnail
if thumbnail_data:
try:
file_mtime = file_path.stat().st_mtime if file_path.exists() else None
# Compute file_hash if not provided
thumb_file_hash = content_hash if content_hash else hashlib.sha256(str(file_path).encode()).hexdigest()
with sqlite3.connect('thumbnails') as conn:
conn.execute("""
INSERT OR REPLACE INTO thumbnails
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
VALUES (?, ?, ?, ?, ?)
""", (thumb_file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
conn.commit()
except Exception:
pass # Caching is optional, don't fail if it doesn't work
return thumbnail_data
except Exception as e:
logger.warning(f"Failed to generate thumbnail: {e}", module="Recycle")
return None
def _extract_dimensions(file_path: Path) -> tuple:
"""
Extract dimensions from a media file.
Returns: (width, height, duration)
"""
width, height, duration = None, None, None
file_ext = file_path.suffix.lower()
try:
if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.heic', '.heif']:
from PIL import Image
with Image.open(file_path) as img:
width, height = img.size
elif file_ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
import subprocess
result = subprocess.run([
'ffprobe', '-v', 'quiet', '-print_format', 'json',
'-show_streams', str(file_path)
], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
data = json.loads(result.stdout)
for stream in data.get('streams', []):
if stream.get('codec_type') == 'video':
width = stream.get('width')
height = stream.get('height')
duration_str = stream.get('duration')
if duration_str:
duration = float(duration_str)
break
except Exception as e:
logger.warning(f"Failed to extract dimensions: {e}", module="Recycle")
return width, height, duration

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,758 @@
"""
Scheduler Router
Handles all scheduler and service management operations:
- Scheduler status and task management
- Current activity monitoring
- Task pause/resume/skip operations
- Service start/stop/restart
- Cache builder service management
- Dependency updates
"""
import json
import os
import re
import signal
import sqlite3
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException, Request
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
handle_exceptions,
RecordNotFoundError,
ServiceError
)
from ..core.responses import now_iso8601
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/scheduler", tags=["Scheduler"])
limiter = Limiter(key_func=get_remote_address)
# Service names
SCHEDULER_SERVICE = 'media-downloader.service'
CACHE_BUILDER_SERVICE = 'media-cache-builder.service'
# Valid platform names for subprocess operations (defense in depth)
VALID_PLATFORMS = frozenset(['fastdl', 'imginn', 'imginn_api', 'toolzu', 'snapchat', 'tiktok', 'forums', 'coppermine', 'instagram', 'youtube'])
# Display name mapping for scheduler task_id prefixes
PLATFORM_DISPLAY_NAMES = {
'fastdl': 'FastDL',
'imginn': 'ImgInn',
'imginn_api': 'ImgInn API',
'toolzu': 'Toolzu',
'snapchat': 'Snapchat',
'tiktok': 'TikTok',
'forums': 'Forums',
'forum': 'Forum',
'monitor': 'Forum Monitor',
'instagram': 'Instagram',
'youtube': 'YouTube',
'youtube_channel_monitor': 'YouTube Channels',
'youtube_monitor': 'YouTube Monitor',
'coppermine': 'Coppermine',
'paid_content': 'Paid Content',
'appearances': 'Appearances',
'easynews_monitor': 'Easynews Monitor',
'press_monitor': 'Press Monitor',
}
# ============================================================================
# SCHEDULER STATUS ENDPOINTS
# ============================================================================
@router.get("/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_scheduler_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get detailed scheduler status including all tasks."""
app_state = get_app_state()
# Get enabled forums from config to filter scheduler tasks
enabled_forums = set()
forums_config = app_state.settings.get('forums')
if forums_config and isinstance(forums_config, dict):
for forum_cfg in forums_config.get('configs', []):
if forum_cfg.get('enabled', False):
enabled_forums.add(forum_cfg.get('name'))
with sqlite3.connect('scheduler_state') as sched_conn:
cursor = sched_conn.cursor()
# Get all tasks
cursor.execute("""
SELECT task_id, last_run, next_run, run_count, status, last_download_count
FROM scheduler_state
ORDER BY next_run ASC
""")
tasks_raw = cursor.fetchall()
# Clean up stale forum/monitor entries
stale_task_ids = []
# Platforms that should always have :username suffix
platforms_requiring_username = {'tiktok', 'instagram', 'imginn', 'imginn_api', 'toolzu', 'snapchat', 'fastdl'}
for row in tasks_raw:
task_id = row[0]
if task_id.startswith('forum:') or task_id.startswith('monitor:'):
forum_name = task_id.split(':', 1)[1]
if forum_name not in enabled_forums:
stale_task_ids.append(task_id)
# Clean up legacy platform entries without :username suffix
elif task_id in platforms_requiring_username:
stale_task_ids.append(task_id)
# Delete stale entries
if stale_task_ids:
for stale_id in stale_task_ids:
cursor.execute("DELETE FROM scheduler_state WHERE task_id = ?", (stale_id,))
sched_conn.commit()
tasks = []
for row in tasks_raw:
task_id = row[0]
# Skip stale and maintenance tasks
if task_id in stale_task_ids:
continue
if task_id.startswith('maintenance:'):
continue
tasks.append({
"task_id": task_id,
"last_run": row[1],
"next_run": row[2],
"run_count": row[3],
"status": row[4],
"last_download_count": row[5]
})
# Count active tasks
active_count = sum(1 for t in tasks if t['status'] == 'active')
# Get next run time
next_run = None
for task in sorted(tasks, key=lambda t: t['next_run'] or ''):
if task['status'] == 'active' and task['next_run']:
next_run = task['next_run']
break
return {
"running": active_count > 0,
"tasks": tasks,
"total_tasks": len(tasks),
"active_tasks": active_count,
"next_run": next_run
}
@router.get("/current-activity")
@limiter.limit("100/minute")
@handle_exceptions
async def get_current_activity(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get current scheduler activity for real-time status."""
app_state = get_app_state()
# Check if scheduler service is running
result = subprocess.run(
['systemctl', 'is-active', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
scheduler_running = result.stdout.strip() == 'active'
if not scheduler_running:
return {
"active": False,
"scheduler_running": False,
"task_id": None,
"platform": None,
"account": None,
"start_time": None,
"status": None
}
# Get current activity from database
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
activity = activity_manager.get_current_activity()
activity["scheduler_running"] = True
return activity
@router.get("/background-tasks")
@limiter.limit("100/minute")
@handle_exceptions
async def get_background_tasks(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get all active background tasks (YouTube monitor, etc.) for real-time status."""
app_state = get_app_state()
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
tasks = activity_manager.get_active_background_tasks()
return {"tasks": tasks}
@router.get("/background-tasks/{task_id}")
@limiter.limit("100/minute")
@handle_exceptions
async def get_background_task(
task_id: str,
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get a specific background task status."""
app_state = get_app_state()
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
task = activity_manager.get_background_task(task_id)
if not task:
return {"active": False, "task_id": task_id}
return task
@router.post("/current-activity/stop")
@limiter.limit("20/minute")
@handle_exceptions
async def stop_current_activity(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Stop the currently running download task."""
app_state = get_app_state()
activity_file = settings.PROJECT_ROOT / 'database' / 'current_activity.json'
if not activity_file.exists():
raise RecordNotFoundError("No active task running")
with open(activity_file, 'r') as f:
activity_data = json.load(f)
if not activity_data.get('active'):
raise RecordNotFoundError("No active task running")
task_id = activity_data.get('task_id')
platform = activity_data.get('platform')
# Security: Validate platform before using in subprocess (defense in depth)
if platform and platform not in VALID_PLATFORMS:
logger.warning(f"Invalid platform in activity file: {platform}", module="Security")
platform = None
# Find and kill the process
if platform:
result = subprocess.run(
['pgrep', '-f', f'media-downloader\\.py.*--platform.*{re.escape(platform)}'],
capture_output=True,
text=True
)
else:
# Fallback: find any media-downloader process
result = subprocess.run(
['pgrep', '-f', 'media-downloader\\.py'],
capture_output=True,
text=True
)
if result.stdout.strip():
pids = [p.strip() for p in result.stdout.strip().split('\n') if p.strip()]
for pid in pids:
try:
os.kill(int(pid), signal.SIGTERM)
logger.info(f"Stopped process {pid} for platform {platform}")
except (ProcessLookupError, ValueError):
pass
# Clear the current activity
inactive_state = {
"active": False,
"task_id": None,
"platform": None,
"account": None,
"start_time": None,
"status": "stopped"
}
with open(activity_file, 'w') as f:
json.dump(inactive_state, f)
# Broadcast stop event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "download_stopped",
"task_id": task_id,
"platform": platform,
"timestamp": now_iso8601()
})
except Exception:
pass
return {
"success": True,
"message": f"Stopped {platform} download",
"task_id": task_id
}
# ============================================================================
# TASK MANAGEMENT ENDPOINTS
# ============================================================================
@router.post("/tasks/{task_id}/pause")
@limiter.limit("20/minute")
@handle_exceptions
async def pause_scheduler_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Pause a specific scheduler task."""
app_state = get_app_state()
with sqlite3.connect('scheduler_state') as sched_conn:
cursor = sched_conn.cursor()
cursor.execute("""
UPDATE scheduler_state
SET status = 'paused'
WHERE task_id = ?
""", (task_id,))
sched_conn.commit()
row_count = cursor.rowcount
if row_count == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_paused",
"task_id": task_id,
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "status": "paused"}
@router.post("/tasks/{task_id}/resume")
@limiter.limit("20/minute")
@handle_exceptions
async def resume_scheduler_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Resume a paused scheduler task."""
app_state = get_app_state()
with sqlite3.connect('scheduler_state') as sched_conn:
cursor = sched_conn.cursor()
cursor.execute("""
UPDATE scheduler_state
SET status = 'active'
WHERE task_id = ?
""", (task_id,))
sched_conn.commit()
row_count = cursor.rowcount
if row_count == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_resumed",
"task_id": task_id,
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "status": "active"}
@router.post("/tasks/{task_id}/skip")
@limiter.limit("20/minute")
@handle_exceptions
async def skip_next_run(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Skip the next scheduled run by advancing next_run time."""
app_state = get_app_state()
with sqlite3.connect('scheduler_state') as sched_conn:
cursor = sched_conn.cursor()
# Get current task info
cursor.execute("""
SELECT next_run, interval_hours
FROM scheduler_state
WHERE task_id = ?
""", (task_id,))
result = cursor.fetchone()
if not result:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
current_next_run, interval_hours = result
# Calculate new next_run time
current_time = datetime.fromisoformat(current_next_run)
new_next_run = current_time + timedelta(hours=interval_hours)
# Update the next_run time
cursor.execute("""
UPDATE scheduler_state
SET next_run = ?
WHERE task_id = ?
""", (new_next_run.isoformat(), task_id))
sched_conn.commit()
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_run_skipped",
"task_id": task_id,
"new_next_run": new_next_run.isoformat(),
"timestamp": now_iso8601()
})
except Exception:
pass
return {
"success": True,
"task_id": task_id,
"skipped_run": current_next_run,
"new_next_run": new_next_run.isoformat()
}
@router.post("/tasks/{task_id}/reschedule")
@limiter.limit("20/minute")
@handle_exceptions
async def reschedule_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Reschedule a task to a new next_run time."""
body = await request.json()
new_next_run = body.get('next_run')
if not new_next_run:
raise HTTPException(status_code=400, detail="next_run is required")
try:
parsed = datetime.fromisoformat(new_next_run)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid datetime format")
with sqlite3.connect('scheduler_state') as sched_conn:
cursor = sched_conn.cursor()
cursor.execute(
"UPDATE scheduler_state SET next_run = ? WHERE task_id = ?",
(parsed.isoformat(), task_id)
)
sched_conn.commit()
if cursor.rowcount == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
app_state = get_app_state()
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_rescheduled",
"task_id": task_id,
"new_next_run": parsed.isoformat(),
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "new_next_run": parsed.isoformat()}
# ============================================================================
# CONFIG RELOAD ENDPOINT
# ============================================================================
@router.post("/reload-config")
@limiter.limit("10/minute")
@handle_exceptions
async def reload_scheduler_config(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Reload scheduler config — picks up new/removed accounts and interval changes."""
app_state = get_app_state()
if not hasattr(app_state, 'scheduler') or app_state.scheduler is None:
raise ServiceError("Scheduler is not running", {"service": SCHEDULER_SERVICE})
result = app_state.scheduler.reload_scheduled_tasks()
return {
"success": True,
"added": result['added'],
"removed": result['removed'],
"modified": result['modified'],
"message": (
f"Reload complete: {len(result['added'])} added, "
f"{len(result['removed'])} removed, "
f"{len(result['modified'])} modified"
)
}
# ============================================================================
# SERVICE MANAGEMENT ENDPOINTS
# ============================================================================
@router.get("/service/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_scheduler_service_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Check if scheduler service is running."""
result = subprocess.run(
['systemctl', 'is-active', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
is_running = result.stdout.strip() == 'active'
return {
"running": is_running,
"status": result.stdout.strip()
}
@router.post("/service/start")
@limiter.limit("20/minute")
@handle_exceptions
async def start_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin) # Require admin for service operations
):
"""Start the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'start', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to start service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service started"}
@router.post("/service/stop")
@limiter.limit("20/minute")
@handle_exceptions
async def stop_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin) # Require admin for service operations
):
"""Stop the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'stop', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to stop service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service stopped"}
@router.post("/service/restart")
@limiter.limit("20/minute")
@handle_exceptions
async def restart_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin)
):
"""Restart the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'restart', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to restart service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service restarted"}
# ============================================================================
# DEPENDENCY MANAGEMENT ENDPOINTS
# ============================================================================
@router.get("/dependencies/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_dependencies_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get dependency update status."""
from modules.dependency_updater import DependencyUpdater
updater = DependencyUpdater(scheduler_mode=False)
status = updater.get_update_status()
return status
@router.post("/dependencies/check")
@limiter.limit("20/minute")
@handle_exceptions
async def check_dependencies(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Force check and update all dependencies."""
app_state = get_app_state()
from modules.dependency_updater import DependencyUpdater
from modules.pushover_notifier import create_notifier_from_config
# Get pushover config
pushover = None
config = app_state.settings.get_all()
if config.get('pushover', {}).get('enabled'):
pushover = create_notifier_from_config(config, unified_db=app_state.db)
updater = DependencyUpdater(
config=config.get('dependency_updater', {}),
pushover_notifier=pushover,
scheduler_mode=True
)
results = updater.force_update_check()
return {
"success": True,
"results": results,
"message": "Dependency check completed"
}
# ============================================================================
# CACHE BUILDER SERVICE ENDPOINTS
# ============================================================================
@router.post("/cache-builder/trigger")
@limiter.limit("10/minute")
@handle_exceptions
async def trigger_cache_builder(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Manually trigger the thumbnail cache builder service."""
result = subprocess.run(
['sudo', 'systemctl', 'start', CACHE_BUILDER_SERVICE],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return {"success": True, "message": "Cache builder started successfully"}
else:
raise ServiceError(
f"Failed to start cache builder: {result.stderr}",
{"service": CACHE_BUILDER_SERVICE}
)
@router.get("/cache-builder/status")
@limiter.limit("30/minute")
@handle_exceptions
async def get_cache_builder_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get detailed cache builder service status."""
# Get service status
result = subprocess.run(
['systemctl', 'status', CACHE_BUILDER_SERVICE, '--no-pager'],
capture_output=True,
text=True
)
status_output = result.stdout
# Parse status
is_running = 'Active: active (running)' in status_output
is_inactive = 'Active: inactive' in status_output
last_run = None
next_run = None
# Try to get timer info
timer_result = subprocess.run(
['systemctl', 'list-timers', '--no-pager', '--all'],
capture_output=True,
text=True
)
if CACHE_BUILDER_SERVICE.replace('.service', '') in timer_result.stdout:
for line in timer_result.stdout.split('\n'):
if CACHE_BUILDER_SERVICE.replace('.service', '') in line:
parts = line.split()
if len(parts) >= 2:
# Extract timing info if available
pass
return {
"running": is_running,
"inactive": is_inactive,
"status_output": status_output[:500], # Truncate for brevity
"last_run": last_run,
"next_run": next_run
}

View File

@@ -0,0 +1,819 @@
"""
Scrapers Router
Handles scraper management and error monitoring:
- Scraper configuration (list, get, update)
- Cookie management (test connection, upload, clear)
- Error tracking (recent, count, dismiss, mark viewed)
"""
import json
import re
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional
import requests
from fastapi import APIRouter, Body, Depends, Query, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api", tags=["Scrapers"])
limiter = Limiter(key_func=get_remote_address)
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class ScraperUpdate(BaseModel):
enabled: Optional[bool] = None
proxy_enabled: Optional[bool] = None
proxy_url: Optional[str] = None
flaresolverr_required: Optional[bool] = None
base_url: Optional[str] = None
class CookieUpload(BaseModel):
cookies: List[dict]
merge: bool = True
user_agent: Optional[str] = None
class DismissErrors(BaseModel):
error_ids: Optional[List[int]] = None
dismiss_all: bool = False
class MarkErrorsViewed(BaseModel):
error_ids: Optional[List[int]] = None
mark_all: bool = False
# ============================================================================
# SCRAPER ENDPOINTS
# ============================================================================
@router.get("/scrapers")
@limiter.limit("60/minute")
@handle_exceptions
async def get_scrapers(
request: Request,
current_user: Dict = Depends(get_current_user),
type_filter: Optional[str] = Query(None, alias="type", description="Filter by type")
):
"""Get all scrapers with optional type filter."""
app_state = get_app_state()
scrapers = app_state.db.get_all_scrapers(type_filter=type_filter)
# Filter out scrapers whose related modules are all hidden
hidden_modules = app_state.config.get('hidden_modules', [])
if hidden_modules:
# Map scraper IDs to the modules that use them.
# A scraper is only hidden if ALL related modules are hidden.
scraper_to_modules = {
'instagram': ['instagram', 'instagram_client'],
'snapchat': ['snapchat', 'snapchat_client'],
'fastdl': ['fastdl'],
'imginn': ['imginn'],
'toolzu': ['toolzu'],
'tiktok': ['tiktok'],
'coppermine': ['coppermine'],
}
# Forum scrapers map to the 'forums' module
filtered = []
for scraper in scrapers:
sid = scraper.get('id', '')
if sid.startswith('forum_'):
related = ['forums']
else:
related = scraper_to_modules.get(sid, [])
# Only hide if ALL related modules are hidden
if related and all(m in hidden_modules for m in related):
continue
filtered.append(scraper)
scrapers = filtered
# Don't send cookies_json to frontend (too large)
for scraper in scrapers:
if 'cookies_json' in scraper:
del scraper['cookies_json']
return {"scrapers": scrapers}
# ============================================================================
# PLATFORM CREDENTIALS (UNIFIED COOKIE MANAGEMENT)
# ============================================================================
# Platform definitions for the unified credentials view
_SCRAPER_PLATFORMS = [
{'id': 'instagram', 'name': 'Instagram', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
{'id': 'tiktok', 'name': 'TikTok', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
{'id': 'snapchat', 'name': 'Snapchat', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
{'id': 'ytdlp', 'name': 'YouTube', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
{'id': 'pornhub', 'name': 'PornHub', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
{'id': 'xhamster', 'name': 'xHamster', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
]
_PAID_CONTENT_PLATFORMS = [
{'id': 'onlyfans_direct', 'name': 'OnlyFans', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://onlyfans.com'},
{'id': 'fansly_direct', 'name': 'Fansly', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://fansly.com'},
{'id': 'coomer', 'name': 'Coomer', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://coomer.su'},
{'id': 'kemono', 'name': 'Kemono', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://kemono.su'},
{'id': 'twitch', 'name': 'Twitch', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://twitch.tv'},
{'id': 'bellazon', 'name': 'Bellazon', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://www.bellazon.com'},
]
@router.get("/scrapers/platform-credentials")
@limiter.limit("30/minute")
@handle_exceptions
async def get_platform_credentials(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get aggregated credential status for all platforms + monitoring preferences."""
app_state = get_app_state()
db = app_state.db
platforms = []
def _get_monitoring_flag(platform_id: str) -> bool:
"""Read monitoring preference from settings."""
try:
val = app_state.settings.get(f"cookie_monitoring:{platform_id}")
if val is not None:
return str(val).lower() not in ('false', '0', 'no')
except Exception:
pass
return True
# 1. Scraper platforms
for platform_def in _SCRAPER_PLATFORMS:
scraper = db.get_scraper(platform_def['id'])
cookies_count = 0
updated_at = None
if scraper:
raw = scraper.get('cookies_json')
if raw:
try:
data = json.loads(raw)
if isinstance(data, list):
cookies_count = len(data)
elif isinstance(data, dict):
c = data.get('cookies', [])
cookies_count = len(c) if isinstance(c, list) else 0
except (json.JSONDecodeError, TypeError):
pass
updated_at = scraper.get('cookies_updated_at')
monitoring_enabled = _get_monitoring_flag(platform_def['id'])
platforms.append({
'id': platform_def['id'],
'name': platform_def['name'],
'type': platform_def['type'],
'source': platform_def['source'],
'cookies_count': cookies_count,
'has_credentials': cookies_count > 0,
'updated_at': updated_at,
'used_by': platform_def['used_by'],
'monitoring_enabled': monitoring_enabled,
})
# 2. Paid content platforms
try:
from modules.paid_content import PaidContentDBAdapter
paid_db = PaidContentDBAdapter(db)
paid_services = {svc['id']: svc for svc in paid_db.get_services()}
except Exception:
paid_services = {}
for platform_def in _PAID_CONTENT_PLATFORMS:
svc = paid_services.get(platform_def['id'], {})
session_val = svc.get('session_cookie') or ''
has_creds = bool(session_val)
updated_at = svc.get('session_updated_at')
# Count credentials: for JSON objects count keys, for JSON arrays count items, otherwise 1 if set
cookies_count = 0
if has_creds:
try:
parsed = json.loads(session_val)
if isinstance(parsed, dict):
cookies_count = len(parsed)
elif isinstance(parsed, list):
cookies_count = len(parsed)
else:
cookies_count = 1
except (json.JSONDecodeError, TypeError):
cookies_count = 1
platforms.append({
'id': platform_def['id'],
'name': platform_def['name'],
'type': platform_def['type'],
'source': platform_def['source'],
'base_url': platform_def.get('base_url'),
'cookies_count': cookies_count,
'has_credentials': has_creds,
'updated_at': updated_at,
'used_by': platform_def['used_by'],
'monitoring_enabled': _get_monitoring_flag(platform_def['id']),
})
# 3. Reddit (private gallery)
reddit_has_creds = False
reddit_cookies_count = 0
reddit_locked = True
try:
from modules.reddit_community_monitor import RedditCommunityMonitor, REDDIT_MONITOR_KEY_FILE
from modules.private_gallery_crypto import get_private_gallery_crypto, load_key_from_file
db_path = str(Path(__file__).parent.parent.parent.parent / 'database' / 'media_downloader.db')
reddit_monitor = RedditCommunityMonitor(db_path)
crypto = get_private_gallery_crypto()
reddit_locked = not crypto.is_initialized()
# If gallery is locked, try loading crypto from key file (exported on unlock)
active_crypto = crypto if not reddit_locked else load_key_from_file(REDDIT_MONITOR_KEY_FILE)
if active_crypto and active_crypto.is_initialized():
reddit_has_creds = reddit_monitor.has_cookies(active_crypto)
if reddit_has_creds:
try:
conn = reddit_monitor._get_connection()
cursor = conn.cursor()
cursor.execute("SELECT value FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'")
row = cursor.fetchone()
conn.close()
if row and row['value']:
decrypted = active_crypto.decrypt_field(row['value'])
parsed = json.loads(decrypted)
if isinstance(parsed, list):
reddit_cookies_count = len(parsed)
except Exception:
reddit_cookies_count = 1 if reddit_has_creds else 0
except Exception:
pass
platforms.append({
'id': 'reddit',
'name': 'Reddit',
'type': 'cookies',
'source': 'private_gallery',
'base_url': 'https://reddit.com',
'cookies_count': reddit_cookies_count,
'has_credentials': reddit_has_creds,
'gallery_locked': reddit_locked,
'updated_at': None,
'used_by': ['Private Gallery'],
'monitoring_enabled': _get_monitoring_flag('reddit'),
})
return {
'platforms': platforms,
'global_monitoring_enabled': _get_monitoring_flag('global'),
}
@router.put("/scrapers/platform-credentials/{platform_id}/monitoring")
@limiter.limit("30/minute")
@handle_exceptions
async def toggle_platform_monitoring(
request: Request,
platform_id: str,
current_user: Dict = Depends(require_admin)
):
"""Toggle health monitoring for a single platform."""
app_state = get_app_state()
body = await request.json()
enabled = body.get('enabled', True)
app_state.settings.set(
key=f"cookie_monitoring:{platform_id}",
value=str(enabled).lower(),
category="cookie_monitoring",
updated_by=current_user.get('username', 'user')
)
return {
'success': True,
'message': f"Monitoring {'enabled' if enabled else 'disabled'} for {platform_id}",
}
@router.put("/scrapers/platform-credentials/monitoring")
@limiter.limit("30/minute")
@handle_exceptions
async def toggle_global_monitoring(
request: Request,
current_user: Dict = Depends(require_admin)
):
"""Toggle global cookie health monitoring."""
app_state = get_app_state()
body = await request.json()
enabled = body.get('enabled', True)
app_state.settings.set(
key="cookie_monitoring:global",
value=str(enabled).lower(),
category="cookie_monitoring",
updated_by=current_user.get('username', 'user')
)
return {
'success': True,
'message': f"Global cookie monitoring {'enabled' if enabled else 'disabled'}",
}
@router.get("/scrapers/{scraper_id}")
@limiter.limit("60/minute")
@handle_exceptions
async def get_scraper(
request: Request,
scraper_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Get a single scraper configuration."""
app_state = get_app_state()
scraper = app_state.db.get_scraper(scraper_id)
if not scraper:
raise NotFoundError(f"Scraper '{scraper_id}' not found")
if 'cookies_json' in scraper:
del scraper['cookies_json']
cookies = app_state.db.get_scraper_cookies(scraper_id)
scraper['cookies_count'] = len(cookies) if cookies else 0
return scraper
@router.put("/scrapers/{scraper_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def update_scraper(
request: Request,
scraper_id: str,
current_user: Dict = Depends(require_admin)
):
"""Update scraper settings (proxy, enabled, base_url)."""
app_state = get_app_state()
body = await request.json()
scraper = app_state.db.get_scraper(scraper_id)
if not scraper:
raise NotFoundError(f"Scraper '{scraper_id}' not found")
success = app_state.db.update_scraper(scraper_id, body)
if not success:
raise ValidationError("No valid fields to update")
return {"success": True, "message": f"Scraper '{scraper_id}' updated"}
@router.post("/scrapers/{scraper_id}/test")
@limiter.limit("10/minute")
@handle_exceptions
async def test_scraper_connection(
request: Request,
scraper_id: str,
current_user: Dict = Depends(require_admin)
):
"""
Test scraper connection via FlareSolverr (if required).
On success, saves cookies to database.
For CLI tools (yt-dlp, gallery-dl), tests that the tool is installed and working.
"""
import subprocess
from modules.cloudflare_handler import CloudflareHandler
app_state = get_app_state()
scraper = app_state.db.get_scraper(scraper_id)
if not scraper:
raise NotFoundError(f"Scraper '{scraper_id}' not found")
# Handle CLI tools specially - test that they're installed and working
if scraper.get('type') == 'cli_tool':
cli_tests = {
'ytdlp': {
'cmd': ['/opt/media-downloader/venv/bin/yt-dlp', '--version'],
'name': 'yt-dlp'
},
'gallerydl': {
'cmd': ['/opt/media-downloader/venv/bin/gallery-dl', '--version'],
'name': 'gallery-dl'
}
}
test_config = cli_tests.get(scraper_id)
if test_config:
try:
result = subprocess.run(
test_config['cmd'],
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
version = result.stdout.strip().split('\n')[0]
cookies_count = 0
# Check if cookies are configured
if scraper.get('cookies_json'):
try:
import json
data = json.loads(scraper['cookies_json'])
# Support both {"cookies": [...]} and [...] formats
if isinstance(data, dict) and 'cookies' in data:
cookies = data['cookies']
elif isinstance(data, list):
cookies = data
else:
cookies = []
cookies_count = len(cookies) if cookies else 0
except (json.JSONDecodeError, TypeError, KeyError) as e:
logger.debug(f"Failed to parse cookies for {scraper_id}: {e}")
app_state.db.update_scraper_test_status(scraper_id, 'success')
msg = f"{test_config['name']} v{version} installed"
if cookies_count > 0:
msg += f", {cookies_count} cookies configured"
return {
"success": True,
"message": msg
}
else:
error_msg = result.stderr.strip() or "Command failed"
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
return {
"success": False,
"message": f"{test_config['name']} error: {error_msg}"
}
except subprocess.TimeoutExpired:
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Command timed out")
return {"success": False, "message": "Command timed out"}
except FileNotFoundError:
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Tool not installed")
return {"success": False, "message": f"{test_config['name']} not installed"}
else:
# Unknown CLI tool
app_state.db.update_scraper_test_status(scraper_id, 'success')
return {"success": True, "message": "CLI tool registered"}
base_url = scraper.get('base_url')
if not base_url:
raise ValidationError(f"Scraper '{scraper_id}' has no base_url configured")
proxy_url = None
if scraper.get('proxy_enabled') and scraper.get('proxy_url'):
proxy_url = scraper['proxy_url']
cf_handler = CloudflareHandler(
module_name=scraper_id,
cookie_file=None,
proxy_url=proxy_url if proxy_url else None,
flaresolverr_enabled=scraper.get('flaresolverr_required', False)
)
if scraper.get('flaresolverr_required'):
success = cf_handler.get_cookies_via_flaresolverr(base_url, max_retries=2)
if success:
cookies = cf_handler.get_cookies_list()
user_agent = cf_handler.get_user_agent()
app_state.db.save_scraper_cookies(scraper_id, cookies, user_agent=user_agent)
app_state.db.update_scraper_test_status(scraper_id, 'success')
return {
"success": True,
"message": f"Connection successful, {len(cookies)} cookies saved",
"cookies_count": len(cookies)
}
else:
error_msg = "FlareSolverr returned no cookies"
if proxy_url:
error_msg += " (check proxy connection)"
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
return {
"success": False,
"message": error_msg
}
else:
try:
proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
response = requests.get(
base_url,
timeout=10,
proxies=proxies,
headers={'User-Agent': cf_handler.user_agent}
)
if response.status_code < 400:
app_state.db.update_scraper_test_status(scraper_id, 'success')
return {
"success": True,
"message": f"Connection successful (HTTP {response.status_code})"
}
else:
app_state.db.update_scraper_test_status(
scraper_id, 'failed',
f"HTTP {response.status_code}"
)
return {
"success": False,
"message": f"Connection failed with HTTP {response.status_code}"
}
except requests.exceptions.Timeout:
app_state.db.update_scraper_test_status(scraper_id, 'timeout', 'Request timed out')
return {"success": False, "message": "Connection timed out"}
except Exception as e:
app_state.db.update_scraper_test_status(scraper_id, 'failed', str(e))
return {"success": False, "message": str(e)}
@router.post("/scrapers/{scraper_id}/cookies")
@limiter.limit("20/minute")
@handle_exceptions
async def upload_scraper_cookies(
request: Request,
scraper_id: str,
current_user: Dict = Depends(require_admin)
):
"""Upload cookies for a scraper (from browser extension export)."""
app_state = get_app_state()
scraper = app_state.db.get_scraper(scraper_id)
if not scraper:
raise NotFoundError(f"Scraper '{scraper_id}' not found")
body = await request.json()
# Support both {cookies: [...]} and bare [...] formats
if isinstance(body, list):
cookies = body
merge = True
user_agent = None
else:
cookies = body.get('cookies', [])
merge = body.get('merge', True)
user_agent = body.get('user_agent')
if not cookies or not isinstance(cookies, list):
raise ValidationError("Invalid cookies format. Expected {cookies: [...]}")
for i, cookie in enumerate(cookies):
if not isinstance(cookie, dict):
raise ValidationError(f"Cookie {i} is not an object")
if 'name' not in cookie or 'value' not in cookie:
raise ValidationError(f"Cookie {i} missing 'name' or 'value'")
success = app_state.db.save_scraper_cookies(
scraper_id, cookies,
user_agent=user_agent,
merge=merge
)
if success:
all_cookies = app_state.db.get_scraper_cookies(scraper_id)
count = len(all_cookies) if all_cookies else 0
return {
"success": True,
"message": f"{'Merged' if merge else 'Replaced'} {len(cookies)} cookies (total: {count})",
"cookies_count": count
}
else:
raise ValidationError("Failed to save cookies")
@router.delete("/scrapers/{scraper_id}/cookies")
@limiter.limit("20/minute")
@handle_exceptions
async def clear_scraper_cookies(
request: Request,
scraper_id: str,
current_user: Dict = Depends(require_admin)
):
"""Clear all cookies for a scraper."""
app_state = get_app_state()
scraper = app_state.db.get_scraper(scraper_id)
if not scraper:
raise NotFoundError(f"Scraper '{scraper_id}' not found")
success = app_state.db.clear_scraper_cookies(scraper_id)
return {
"success": success,
"message": f"Cookies cleared for '{scraper_id}'" if success else "Failed to clear cookies"
}
# ============================================================================
# ERROR MONITORING ENDPOINTS
# ============================================================================
@router.get("/errors/recent")
@limiter.limit("30/minute")
@handle_exceptions
async def get_recent_errors(
request: Request,
limit: int = Query(50, ge=1, le=500, description="Maximum number of errors to return"),
since_visit: bool = Query(False, description="Only show errors since last dashboard visit (default: show ALL unviewed)"),
include_dismissed: bool = Query(False, description="Include dismissed errors"),
current_user: Dict = Depends(get_current_user)
):
"""Get recent errors from database.
By default, shows ALL unviewed/undismissed errors regardless of when they occurred.
This ensures errors are not missed just because the user visited the dashboard.
Errors are recorded in real-time by universal_logger.py.
"""
app_state = get_app_state()
# By default, show ALL unviewed errors (since=None)
# Only filter by visit time if explicitly requested
since = None
if since_visit:
since = app_state.db.get_last_dashboard_visit()
if not since:
since = datetime.now() - timedelta(hours=24)
errors = app_state.db.get_recent_errors(since=since, include_dismissed=include_dismissed, limit=limit)
return {
"errors": errors,
"total_count": len(errors),
"since": since.isoformat() if since else None,
"unviewed_count": app_state.db.get_unviewed_error_count(since=None) # Always count ALL unviewed
}
@router.get("/errors/count")
@limiter.limit("60/minute")
@handle_exceptions
async def get_error_count(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get count of ALL unviewed/undismissed errors.
Errors are recorded in real-time by universal_logger.py.
"""
app_state = get_app_state()
# Count ALL unviewed errors
total_unviewed = app_state.db.get_unviewed_error_count(since=None)
# Count errors since last dashboard visit
last_visit = app_state.db.get_last_dashboard_visit()
since_last_visit = app_state.db.get_unviewed_error_count(since=last_visit) if last_visit else total_unviewed
return {
"unviewed_count": total_unviewed,
"total_recent": total_unviewed,
"since_last_visit": since_last_visit
}
@router.post("/errors/dismiss")
@limiter.limit("20/minute")
@handle_exceptions
async def dismiss_errors(
request: Request,
body: Dict = Body(...),
current_user: Dict = Depends(get_current_user)
):
"""Dismiss errors by ID or all."""
app_state = get_app_state()
error_ids = body.get("error_ids", [])
dismiss_all = body.get("dismiss_all", False)
if dismiss_all:
dismissed = app_state.db.dismiss_errors(dismiss_all=True)
elif error_ids:
dismissed = app_state.db.dismiss_errors(error_ids=error_ids)
else:
return {"success": False, "dismissed": 0, "message": "No errors specified"}
return {
"success": True,
"dismissed": dismissed,
"message": f"Dismissed {dismissed} error(s)"
}
@router.post("/errors/mark-viewed")
@limiter.limit("20/minute")
@handle_exceptions
async def mark_errors_viewed(
request: Request,
body: Dict = Body(...),
current_user: Dict = Depends(get_current_user)
):
"""Mark errors as viewed."""
app_state = get_app_state()
error_ids = body.get("error_ids", [])
mark_all = body.get("mark_all", False)
if mark_all:
marked = app_state.db.mark_errors_viewed(mark_all=True)
elif error_ids:
marked = app_state.db.mark_errors_viewed(error_ids=error_ids)
else:
return {"success": False, "marked": 0}
return {
"success": True,
"marked": marked
}
@router.post("/errors/update-visit")
@limiter.limit("30/minute")
@handle_exceptions
async def update_dashboard_visit(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Update the last dashboard visit timestamp."""
app_state = get_app_state()
success = app_state.db.update_dashboard_visit()
return {"success": success}
@router.get("/logs/context")
@limiter.limit("30/minute")
@handle_exceptions
async def get_log_context(
request: Request,
timestamp: str = Query(..., description="ISO timestamp of the error"),
module: Optional[str] = Query(None, description="Module name to filter"),
minutes_before: int = Query(1, description="Minutes of context before error"),
minutes_after: int = Query(1, description="Minutes of context after error"),
current_user: Dict = Depends(get_current_user)
):
"""Get log lines around a specific timestamp for debugging context."""
target_time = datetime.fromisoformat(timestamp)
start_time = target_time - timedelta(minutes=minutes_before)
end_time = target_time + timedelta(minutes=minutes_after)
log_dir = Path('/opt/media-downloader/logs')
log_pattern = re.compile(
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) '
r'\[MediaDownloader\.(\w+)\] '
r'\[(\w+)\] '
r'\[(\w+)\] '
r'(.+)$'
)
date_str = target_time.strftime('%Y%m%d')
matching_lines = []
for log_file in log_dir.glob(f'{date_str}_*.log'):
if module and module.lower() not in log_file.stem.lower():
continue
try:
lines = log_file.read_text(errors='replace').splitlines()
for line in lines:
match = log_pattern.match(line)
if match:
timestamp_str, _, log_module, level, message = match.groups()
try:
line_time = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
if start_time <= line_time <= end_time:
matching_lines.append({
'timestamp': timestamp_str,
'module': log_module,
'level': level,
'message': message,
'is_target': abs((line_time - target_time).total_seconds()) < 2
})
except ValueError:
continue
except Exception:
continue
matching_lines.sort(key=lambda x: x['timestamp'])
return {
"context": matching_lines,
"target_timestamp": timestamp,
"range": {
"start": start_time.isoformat(),
"end": end_time.isoformat()
}
}

View File

@@ -0,0 +1,366 @@
"""
Semantic Search Router
Handles CLIP-based semantic search operations:
- Text-based image/video search
- Similar file search
- Embedding generation and management
- Model settings
"""
import asyncio
import time
from typing import Dict, Optional
from fastapi import APIRouter, BackgroundTasks, Body, Depends, Query, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.exceptions import handle_exceptions, ValidationError
from modules.semantic_search import get_semantic_search
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/semantic", tags=["Semantic Search"])
limiter = Limiter(key_func=get_remote_address)
# Batch limit for embedding generation
EMBEDDING_BATCH_LIMIT = 10000
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class SemanticSearchRequest(BaseModel):
query: str
limit: int = 50
platform: Optional[str] = None
source: Optional[str] = None
threshold: float = 0.2
class GenerateEmbeddingsRequest(BaseModel):
limit: int = 100
platform: Optional[str] = None
class SemanticSettingsUpdate(BaseModel):
model: Optional[str] = None
threshold: Optional[float] = None
# ============================================================================
# SEARCH ENDPOINTS
# ============================================================================
@router.get("/stats")
@limiter.limit("120/minute")
@handle_exceptions
async def get_semantic_stats(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get statistics about semantic search embeddings."""
app_state = get_app_state()
search = get_semantic_search(app_state.db)
stats = search.get_embedding_stats()
return stats
@router.post("/search")
@limiter.limit("30/minute")
@handle_exceptions
async def semantic_search(
request: Request,
current_user: Dict = Depends(get_current_user),
query: str = Body(..., embed=True),
limit: int = Body(50, ge=1, le=200),
platform: Optional[str] = Body(None),
source: Optional[str] = Body(None),
threshold: float = Body(0.2, ge=0.0, le=1.0)
):
"""Search for images/videos using natural language query."""
app_state = get_app_state()
search = get_semantic_search(app_state.db)
results = search.search_by_text(
query=query,
limit=limit,
platform=platform,
source=source,
threshold=threshold
)
return {"results": results, "count": len(results), "query": query}
@router.post("/similar/{file_id}")
@limiter.limit("30/minute")
@handle_exceptions
async def find_similar_files(
request: Request,
file_id: int,
current_user: Dict = Depends(get_current_user),
limit: int = Query(50, ge=1, le=200),
platform: Optional[str] = Query(None),
source: Optional[str] = Query(None),
threshold: float = Query(0.5, ge=0.0, le=1.0)
):
"""Find files similar to a given file."""
app_state = get_app_state()
search = get_semantic_search(app_state.db)
results = search.search_by_file_id(
file_id=file_id,
limit=limit,
platform=platform,
source=source,
threshold=threshold
)
return {"results": results, "count": len(results), "source_file_id": file_id}
# ============================================================================
# EMBEDDING GENERATION ENDPOINTS
# ============================================================================
@router.post("/generate")
@limiter.limit("10/minute")
@handle_exceptions
async def generate_embeddings(
request: Request,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(get_current_user),
limit: int = Body(100, ge=1, le=1000),
platform: Optional[str] = Body(None)
):
"""Generate CLIP embeddings for files that don't have them yet."""
app_state = get_app_state()
if app_state.indexing_running:
return {
"success": False,
"message": "Indexing already in progress",
"already_running": True,
"status": "already_running"
}
search = get_semantic_search(app_state.db)
app_state.indexing_running = True
app_state.indexing_start_time = time.time()
loop = asyncio.get_event_loop()
manager = getattr(app_state, 'websocket_manager', None)
def progress_callback(processed: int, total: int, current_file: str):
try:
if processed % 10 == 0 or processed == total:
stats = search.get_embedding_stats()
if manager:
asyncio.run_coroutine_threadsafe(
manager.broadcast({
"type": "embedding_progress",
"processed": processed,
"total": total,
"current_file": current_file,
"total_embeddings": stats.get('total_embeddings', 0),
"coverage_percent": stats.get('coverage_percent', 0)
}),
loop
)
except Exception:
pass
def run_generation():
try:
results = search.generate_embeddings_batch(
limit=limit,
platform=platform,
progress_callback=progress_callback
)
logger.info(f"Embedding generation complete: {results}", module="SemanticSearch")
try:
stats = search.get_embedding_stats()
if manager:
asyncio.run_coroutine_threadsafe(
manager.broadcast({
"type": "embedding_complete",
"results": results,
"total_embeddings": stats.get('total_embeddings', 0),
"coverage_percent": stats.get('coverage_percent', 0)
}),
loop
)
except Exception:
pass
except Exception as e:
logger.error(f"Embedding generation failed: {e}", module="SemanticSearch")
finally:
app_state.indexing_running = False
app_state.indexing_start_time = None
background_tasks.add_task(run_generation)
return {
"message": f"Started embedding generation for up to {limit} files",
"status": "processing"
}
@router.get("/status")
@limiter.limit("60/minute")
@handle_exceptions
async def get_semantic_indexing_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Check if semantic indexing is currently running."""
app_state = get_app_state()
elapsed = None
if app_state.indexing_running and app_state.indexing_start_time:
elapsed = int(time.time() - app_state.indexing_start_time)
return {
"indexing_running": app_state.indexing_running,
"elapsed_seconds": elapsed
}
# ============================================================================
# SETTINGS ENDPOINTS
# ============================================================================
@router.get("/settings")
@limiter.limit("30/minute")
@handle_exceptions
async def get_semantic_settings(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get semantic search settings."""
app_state = get_app_state()
settings = app_state.settings.get('semantic_search', {})
return {
"model": settings.get('model', 'clip-ViT-B-32'),
"threshold": settings.get('threshold', 0.2)
}
@router.post("/settings")
@limiter.limit("10/minute")
@handle_exceptions
async def update_semantic_settings(
request: Request,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(require_admin),
model: str = Body(None, embed=True),
threshold: float = Body(None, embed=True)
):
"""Update semantic search settings."""
app_state = get_app_state()
current_settings = app_state.settings.get('semantic_search', {}) or {}
old_model = current_settings.get('model', 'clip-ViT-B-32') if isinstance(current_settings, dict) else 'clip-ViT-B-32'
model_changed = False
new_settings = dict(current_settings) if isinstance(current_settings, dict) else {}
if model:
valid_models = ['clip-ViT-B-32', 'clip-ViT-B-16', 'clip-ViT-L-14']
if model not in valid_models:
raise ValidationError(f"Invalid model. Must be one of: {valid_models}")
if model != old_model:
model_changed = True
new_settings['model'] = model
if threshold is not None:
if threshold < 0 or threshold > 1:
raise ValidationError("Threshold must be between 0 and 1")
new_settings['threshold'] = threshold
app_state.settings.set('semantic_search', new_settings, category='ai')
if model_changed:
logger.info(f"Model changed from {old_model} to {model}, clearing embeddings", module="SemanticSearch")
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM content_embeddings')
deleted = cursor.rowcount
logger.info(f"Cleared {deleted} embeddings for model change", module="SemanticSearch")
def run_reindex_for_model_change():
try:
search = get_semantic_search(app_state.db, force_reload=True)
results = search.generate_embeddings_batch(limit=EMBEDDING_BATCH_LIMIT)
logger.info(f"Model change re-index complete: {results}", module="SemanticSearch")
except Exception as e:
logger.error(f"Model change re-index failed: {e}", module="SemanticSearch")
background_tasks.add_task(run_reindex_for_model_change)
logger.info(f"Semantic search settings updated: {new_settings} (re-indexing started)", module="SemanticSearch")
return {"success": True, "settings": new_settings, "reindexing": True, "message": f"Model changed to {model}, re-indexing started"}
logger.info(f"Semantic search settings updated: {new_settings}", module="SemanticSearch")
return {"success": True, "settings": new_settings, "reindexing": False}
@router.post("/reindex")
@limiter.limit("2/minute")
@handle_exceptions
async def reindex_embeddings(
request: Request,
background_tasks: BackgroundTasks,
current_user: Dict = Depends(require_admin)
):
"""Clear and regenerate all embeddings."""
app_state = get_app_state()
if app_state.indexing_running:
return {"success": False, "message": "Indexing already in progress", "already_running": True}
search = get_semantic_search(app_state.db)
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM content_embeddings')
deleted = cursor.rowcount
logger.info(f"Cleared {deleted} embeddings for reindexing", module="SemanticSearch")
app_state.indexing_running = True
app_state.indexing_start_time = time.time()
def run_reindex():
try:
results = search.generate_embeddings_batch(limit=EMBEDDING_BATCH_LIMIT)
logger.info(f"Reindex complete: {results}", module="SemanticSearch")
except Exception as e:
logger.error(f"Reindex failed: {e}", module="SemanticSearch")
finally:
app_state.indexing_running = False
app_state.indexing_start_time = None
background_tasks.add_task(run_reindex)
return {"success": True, "message": "Re-indexing started in background"}
@router.post("/clear")
@limiter.limit("2/minute")
@handle_exceptions
async def clear_embeddings(
request: Request,
current_user: Dict = Depends(require_admin)
):
"""Clear all embeddings."""
app_state = get_app_state()
with app_state.db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM content_embeddings')
deleted = cursor.rowcount
logger.info(f"Cleared {deleted} embeddings", module="SemanticSearch")
return {"success": True, "deleted": deleted}

View File

@@ -0,0 +1,535 @@
"""
Stats Router
Handles statistics, monitoring, settings, and integrations:
- Dashboard statistics
- Downloader monitoring
- Settings management
- Immich integration
"""
import json
import sqlite3
import time
from typing import Dict, Optional
import requests
from fastapi import APIRouter, Depends, Request
from pydantic import BaseModel
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api", tags=["Stats & Monitoring"])
limiter = Limiter(key_func=get_remote_address)
# ============================================================================
# PYDANTIC MODELS
# ============================================================================
class SettingUpdate(BaseModel):
value: dict | list | str | int | float | bool
category: Optional[str] = None
description: Optional[str] = None
# ============================================================================
# DASHBOARD STATISTICS
# ============================================================================
@router.get("/stats/dashboard")
@limiter.limit("60/minute")
@handle_exceptions
async def get_dashboard_stats(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get comprehensive dashboard statistics."""
app_state = get_app_state()
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
# Get download counts per platform (combine downloads and video_downloads)
cursor.execute("""
SELECT platform, SUM(cnt) as count FROM (
SELECT platform, COUNT(*) as cnt FROM downloads GROUP BY platform
UNION ALL
SELECT platform, COUNT(*) as cnt FROM video_downloads GROUP BY platform
) GROUP BY platform
""")
platform_data = {}
for row in cursor.fetchall():
platform = row[0]
if platform not in platform_data:
platform_data[platform] = {
'count': 0,
'size_bytes': 0
}
platform_data[platform]['count'] += row[1]
# Calculate storage sizes from file_inventory (final + review)
cursor.execute("""
SELECT platform, COALESCE(SUM(file_size), 0) as total_size
FROM file_inventory
WHERE location IN ('final', 'review')
GROUP BY platform
""")
for row in cursor.fetchall():
platform = row[0]
if platform not in platform_data:
platform_data[platform] = {'count': 0, 'size_bytes': 0}
platform_data[platform]['size_bytes'] += row[1]
# Only show platforms with actual files
storage_by_platform = []
for platform in sorted(platform_data.keys(), key=lambda p: platform_data[p]['size_bytes'], reverse=True):
if platform_data[platform]['size_bytes'] > 0:
storage_by_platform.append({
'platform': platform,
'count': platform_data[platform]['count'],
'size_bytes': platform_data[platform]['size_bytes'],
'size_mb': round(platform_data[platform]['size_bytes'] / 1024 / 1024, 2)
})
# Downloads per day (last 30 days) - combine downloads and video_downloads
cursor.execute("""
SELECT date, SUM(count) as count FROM (
SELECT DATE(download_date) as date, COUNT(*) as count
FROM downloads
WHERE download_date >= DATE('now', '-30 days')
GROUP BY DATE(download_date)
UNION ALL
SELECT DATE(download_date) as date, COUNT(*) as count
FROM video_downloads
WHERE download_date >= DATE('now', '-30 days')
GROUP BY DATE(download_date)
) GROUP BY date ORDER BY date
""")
downloads_per_day = [{'date': row[0], 'count': row[1]} for row in cursor.fetchall()]
# Content type breakdown
cursor.execute("""
SELECT
content_type,
COUNT(*) as count
FROM downloads
WHERE content_type IS NOT NULL
GROUP BY content_type
ORDER BY count DESC
""")
content_types = {row[0]: row[1] for row in cursor.fetchall()}
# Top sources
cursor.execute("""
SELECT
source,
platform,
COUNT(*) as count
FROM downloads
WHERE source IS NOT NULL
GROUP BY source, platform
ORDER BY count DESC
LIMIT 10
""")
top_sources = [{'source': row[0], 'platform': row[1], 'count': row[2]} for row in cursor.fetchall()]
# Total statistics - use file_inventory for accurate file counts
cursor.execute("""
SELECT
(SELECT COUNT(*) FROM file_inventory WHERE location IN ('final', 'review')) as total_downloads,
(SELECT COALESCE(SUM(file_size), 0) FROM file_inventory WHERE location IN ('final', 'review')) as total_size,
(SELECT COUNT(DISTINCT source) FROM downloads) +
(SELECT COUNT(DISTINCT uploader) FROM video_downloads) as unique_sources,
(SELECT COUNT(DISTINCT platform) FROM file_inventory) as platforms_used
""")
totals = cursor.fetchone()
# Get recycle bin and review counts separately
cursor.execute("SELECT COUNT(*) FROM recycle_bin")
recycle_count = cursor.fetchone()[0] or 0
cursor.execute("SELECT COUNT(*) FROM file_inventory WHERE location = 'review'")
review_count = cursor.fetchone()[0] or 0
# Growth rate - combine downloads and video_downloads
cursor.execute("""
SELECT
(SELECT SUM(CASE WHEN download_date >= DATE('now', '-7 days') THEN 1 ELSE 0 END) FROM downloads) +
(SELECT SUM(CASE WHEN download_date >= DATE('now', '-7 days') THEN 1 ELSE 0 END) FROM video_downloads) as this_week,
(SELECT SUM(CASE WHEN download_date >= DATE('now', '-14 days') AND download_date < DATE('now', '-7 days') THEN 1 ELSE 0 END) FROM downloads) +
(SELECT SUM(CASE WHEN download_date >= DATE('now', '-14 days') AND download_date < DATE('now', '-7 days') THEN 1 ELSE 0 END) FROM video_downloads) as last_week
""")
growth_row = cursor.fetchone()
growth_rate = 0
if growth_row and growth_row[1] > 0:
growth_rate = round(((growth_row[0] - growth_row[1]) / growth_row[1]) * 100, 1)
return {
'storage_by_platform': storage_by_platform,
'downloads_per_day': downloads_per_day,
'content_types': content_types,
'top_sources': top_sources,
'totals': {
'total_downloads': totals[0] or 0,
'total_size_bytes': totals[1] or 0,
'total_size_gb': round((totals[1] or 0) / 1024 / 1024 / 1024, 2),
'unique_sources': totals[2] or 0,
'platforms_used': totals[3] or 0,
'recycle_bin_count': recycle_count,
'review_count': review_count
},
'growth_rate': growth_rate
}
# ============================================================================
# FLARESOLVERR HEALTH CHECK
# ============================================================================
@router.get("/health/flaresolverr")
@limiter.limit("60/minute")
@handle_exceptions
async def check_flaresolverr_health(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Check FlareSolverr health status."""
app_state = get_app_state()
flaresolverr_url = "http://localhost:8191/v1"
try:
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key='flaresolverr'")
result = cursor.fetchone()
if result:
flaresolverr_config = json.loads(result[0])
if 'url' in flaresolverr_config:
flaresolverr_url = flaresolverr_config['url']
except (sqlite3.Error, json.JSONDecodeError, KeyError):
pass
start_time = time.time()
try:
response = requests.post(
flaresolverr_url,
json={"cmd": "sessions.list"},
timeout=5
)
response_time = round((time.time() - start_time) * 1000, 2)
if response.status_code == 200:
return {
'status': 'healthy',
'url': flaresolverr_url,
'response_time_ms': response_time,
'last_check': time.time(),
'sessions': response.json().get('sessions', [])
}
else:
return {
'status': 'unhealthy',
'url': flaresolverr_url,
'response_time_ms': response_time,
'last_check': time.time(),
'error': f"HTTP {response.status_code}: {response.text}"
}
except requests.exceptions.ConnectionError:
return {
'status': 'offline',
'url': flaresolverr_url,
'last_check': time.time(),
'error': 'Connection refused - FlareSolverr may not be running'
}
except requests.exceptions.Timeout:
return {
'status': 'timeout',
'url': flaresolverr_url,
'last_check': time.time(),
'error': 'Request timed out after 5 seconds'
}
except Exception as e:
return {
'status': 'error',
'url': flaresolverr_url,
'last_check': time.time(),
'error': str(e)
}
# ============================================================================
# MONITORING ENDPOINTS
# ============================================================================
@router.get("/monitoring/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_monitoring_status(
request: Request,
hours: int = 24,
current_user: Dict = Depends(get_current_user)
):
"""Get downloader monitoring status."""
from modules.downloader_monitor import get_monitor
app_state = get_app_state()
monitor = get_monitor(app_state.db, app_state.settings)
status = monitor.get_downloader_status(hours=hours)
return {
"success": True,
"downloaders": status,
"window_hours": hours
}
@router.get("/monitoring/history")
@limiter.limit("100/minute")
@handle_exceptions
async def get_monitoring_history(
request: Request,
downloader: str = None,
limit: int = 100,
current_user: Dict = Depends(get_current_user)
):
"""Get download monitoring history."""
app_state = get_app_state()
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
if downloader:
cursor.execute("""
SELECT
id, downloader, username, timestamp, success,
file_count, error_message, alert_sent
FROM download_monitor
WHERE downloader = ?
ORDER BY timestamp DESC
LIMIT ?
""", (downloader, limit))
else:
cursor.execute("""
SELECT
id, downloader, username, timestamp, success,
file_count, error_message, alert_sent
FROM download_monitor
ORDER BY timestamp DESC
LIMIT ?
""", (limit,))
history = []
for row in cursor.fetchall():
history.append({
'id': row['id'],
'downloader': row['downloader'],
'username': row['username'],
'timestamp': row['timestamp'],
'success': bool(row['success']),
'file_count': row['file_count'],
'error_message': row['error_message'],
'alert_sent': bool(row['alert_sent'])
})
return {
"success": True,
"history": history
}
@router.delete("/monitoring/history")
@limiter.limit("10/minute")
@handle_exceptions
async def clear_monitoring_history(
request: Request,
days: int = 30,
current_user: Dict = Depends(require_admin)
):
"""Clear old monitoring logs."""
from modules.downloader_monitor import get_monitor
app_state = get_app_state()
monitor = get_monitor(app_state.db, app_state.settings)
monitor.clear_old_logs(days=days)
return {
"success": True,
"message": f"Cleared logs older than {days} days"
}
# ============================================================================
# SETTINGS ENDPOINTS
# ============================================================================
@router.get("/settings/{key}")
@limiter.limit("60/minute")
@handle_exceptions
async def get_setting(
request: Request,
key: str,
current_user: Dict = Depends(get_current_user)
):
"""Get a specific setting value."""
app_state = get_app_state()
value = app_state.settings.get(key)
if value is None:
raise NotFoundError(f"Setting '{key}' not found")
return value
@router.put("/settings/{key}")
@limiter.limit("30/minute")
@handle_exceptions
async def update_setting(
request: Request,
key: str,
body: Dict,
current_user: Dict = Depends(get_current_user)
):
"""Update a specific setting value."""
app_state = get_app_state()
value = body.get('value')
if value is None:
raise ValidationError("Missing 'value' in request body")
app_state.settings.set(
key=key,
value=value,
category=body.get('category'),
description=body.get('description'),
updated_by=current_user.get('username', 'user')
)
return {
"success": True,
"message": f"Setting '{key}' updated successfully"
}
# ============================================================================
# IMMICH INTEGRATION
# ============================================================================
@router.post("/immich/scan")
@limiter.limit("10/minute")
@handle_exceptions
async def trigger_immich_scan(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Trigger Immich library scan."""
app_state = get_app_state()
immich_config = app_state.settings.get('immich', {})
if not immich_config.get('enabled'):
return {
"success": False,
"message": "Immich integration is not enabled"
}
api_url = immich_config.get('api_url')
api_key = immich_config.get('api_key')
library_id = immich_config.get('library_id')
if not all([api_url, api_key, library_id]):
return {
"success": False,
"message": "Immich configuration incomplete (missing api_url, api_key, or library_id)"
}
try:
response = requests.post(
f"{api_url}/libraries/{library_id}/scan",
headers={'X-API-KEY': api_key},
timeout=10
)
if response.status_code in [200, 201, 204]:
return {
"success": True,
"message": f"Successfully triggered Immich scan for library {library_id}"
}
else:
return {
"success": False,
"message": f"Immich scan request failed with status {response.status_code}: {response.text}"
}
except requests.exceptions.RequestException as e:
return {
"success": False,
"message": f"Failed to connect to Immich: {str(e)}"
}
# ============================================================================
# ERROR MONITORING SETTINGS
# ============================================================================
class ErrorMonitoringSettings(BaseModel):
enabled: bool = True
push_alert_enabled: bool = True
push_alert_delay_hours: int = 24
dashboard_banner_enabled: bool = True
retention_days: int = 7
@router.get("/error-monitoring/settings")
@limiter.limit("60/minute")
@handle_exceptions
async def get_error_monitoring_settings(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get error monitoring settings."""
app_state = get_app_state()
settings = app_state.settings.get('error_monitoring', {
'enabled': True,
'push_alert_enabled': True,
'push_alert_delay_hours': 24,
'dashboard_banner_enabled': True,
'retention_days': 7
})
return settings
@router.put("/error-monitoring/settings")
@limiter.limit("30/minute")
@handle_exceptions
async def update_error_monitoring_settings(
request: Request,
settings: ErrorMonitoringSettings,
current_user: Dict = Depends(get_current_user)
):
"""Update error monitoring settings."""
app_state = get_app_state()
app_state.settings.set(
key='error_monitoring',
value=settings.model_dump(),
category='monitoring',
description='Error monitoring and alert settings',
updated_by=current_user.get('username', 'user')
)
return {
"success": True,
"message": "Error monitoring settings updated",
"settings": settings.model_dump()
}

1617
web/backend/routers/video.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff