Files
media-downloader/web/backend/routers/scheduler.py
Todd 523f91788e Fix DB paths, add auth to sensitive endpoints, misc bug fixes
- scheduler.py: Use full path for scheduler_state.db instead of relative name
- recycle.py: Use full path for thumbnails.db instead of relative name
- cloud_backup.py, maintenance.py, stats.py: Require admin for config/cleanup/settings endpoints
- press.py: Add auth to press image serving endpoint
- private_gallery.py: Fix _create_pg_job call and add missing secrets import
- appearances.py: Use sync httpx instead of asyncio.run for background thread HTTP call

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 08:25:00 -04:00

759 lines
23 KiB
Python

"""
Scheduler Router
Handles all scheduler and service management operations:
- Scheduler status and task management
- Current activity monitoring
- Task pause/resume/skip operations
- Service start/stop/restart
- Cache builder service management
- Dependency updates
"""
import json
import os
import re
import signal
import sqlite3
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict
from fastapi import APIRouter, Depends, HTTPException, Request
from slowapi import Limiter
from slowapi.util import get_remote_address
from ..core.dependencies import get_current_user, require_admin, get_app_state
from ..core.config import settings
from ..core.exceptions import (
handle_exceptions,
RecordNotFoundError,
ServiceError
)
from ..core.responses import now_iso8601
from modules.universal_logger import get_logger
logger = get_logger('API')
router = APIRouter(prefix="/api/scheduler", tags=["Scheduler"])
limiter = Limiter(key_func=get_remote_address)
# Service names
SCHEDULER_SERVICE = 'media-downloader.service'
CACHE_BUILDER_SERVICE = 'media-cache-builder.service'
# Valid platform names for subprocess operations (defense in depth)
VALID_PLATFORMS = frozenset(['fastdl', 'imginn', 'imginn_api', 'toolzu', 'snapchat', 'tiktok', 'forums', 'coppermine', 'instagram', 'youtube'])
# Display name mapping for scheduler task_id prefixes
PLATFORM_DISPLAY_NAMES = {
'fastdl': 'FastDL',
'imginn': 'ImgInn',
'imginn_api': 'ImgInn API',
'toolzu': 'Toolzu',
'snapchat': 'Snapchat',
'tiktok': 'TikTok',
'forums': 'Forums',
'forum': 'Forum',
'monitor': 'Forum Monitor',
'instagram': 'Instagram',
'youtube': 'YouTube',
'youtube_channel_monitor': 'YouTube Channels',
'youtube_monitor': 'YouTube Monitor',
'coppermine': 'Coppermine',
'paid_content': 'Paid Content',
'appearances': 'Appearances',
'easynews_monitor': 'Easynews Monitor',
'press_monitor': 'Press Monitor',
}
# ============================================================================
# SCHEDULER STATUS ENDPOINTS
# ============================================================================
@router.get("/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_scheduler_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get detailed scheduler status including all tasks."""
app_state = get_app_state()
# Get enabled forums from config to filter scheduler tasks
enabled_forums = set()
forums_config = app_state.settings.get('forums')
if forums_config and isinstance(forums_config, dict):
for forum_cfg in forums_config.get('configs', []):
if forum_cfg.get('enabled', False):
enabled_forums.add(forum_cfg.get('name'))
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'scheduler_state.db')) as sched_conn:
cursor = sched_conn.cursor()
# Get all tasks
cursor.execute("""
SELECT task_id, last_run, next_run, run_count, status, last_download_count
FROM scheduler_state
ORDER BY next_run ASC
""")
tasks_raw = cursor.fetchall()
# Clean up stale forum/monitor entries
stale_task_ids = []
# Platforms that should always have :username suffix
platforms_requiring_username = {'tiktok', 'instagram', 'imginn', 'imginn_api', 'toolzu', 'snapchat', 'fastdl'}
for row in tasks_raw:
task_id = row[0]
if task_id.startswith('forum:') or task_id.startswith('monitor:'):
forum_name = task_id.split(':', 1)[1]
if forum_name not in enabled_forums:
stale_task_ids.append(task_id)
# Clean up legacy platform entries without :username suffix
elif task_id in platforms_requiring_username:
stale_task_ids.append(task_id)
# Delete stale entries
if stale_task_ids:
for stale_id in stale_task_ids:
cursor.execute("DELETE FROM scheduler_state WHERE task_id = ?", (stale_id,))
sched_conn.commit()
tasks = []
for row in tasks_raw:
task_id = row[0]
# Skip stale and maintenance tasks
if task_id in stale_task_ids:
continue
if task_id.startswith('maintenance:'):
continue
tasks.append({
"task_id": task_id,
"last_run": row[1],
"next_run": row[2],
"run_count": row[3],
"status": row[4],
"last_download_count": row[5]
})
# Count active tasks
active_count = sum(1 for t in tasks if t['status'] == 'active')
# Get next run time
next_run = None
for task in sorted(tasks, key=lambda t: t['next_run'] or ''):
if task['status'] == 'active' and task['next_run']:
next_run = task['next_run']
break
return {
"running": active_count > 0,
"tasks": tasks,
"total_tasks": len(tasks),
"active_tasks": active_count,
"next_run": next_run
}
@router.get("/current-activity")
@limiter.limit("100/minute")
@handle_exceptions
async def get_current_activity(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get current scheduler activity for real-time status."""
app_state = get_app_state()
# Check if scheduler service is running
result = subprocess.run(
['systemctl', 'is-active', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
scheduler_running = result.stdout.strip() == 'active'
if not scheduler_running:
return {
"active": False,
"scheduler_running": False,
"task_id": None,
"platform": None,
"account": None,
"start_time": None,
"status": None
}
# Get current activity from database
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
activity = activity_manager.get_current_activity()
activity["scheduler_running"] = True
return activity
@router.get("/background-tasks")
@limiter.limit("100/minute")
@handle_exceptions
async def get_background_tasks(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get all active background tasks (YouTube monitor, etc.) for real-time status."""
app_state = get_app_state()
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
tasks = activity_manager.get_active_background_tasks()
return {"tasks": tasks}
@router.get("/background-tasks/{task_id}")
@limiter.limit("100/minute")
@handle_exceptions
async def get_background_task(
task_id: str,
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get a specific background task status."""
app_state = get_app_state()
from modules.activity_status import get_activity_manager
activity_manager = get_activity_manager(app_state.db)
task = activity_manager.get_background_task(task_id)
if not task:
return {"active": False, "task_id": task_id}
return task
@router.post("/current-activity/stop")
@limiter.limit("20/minute")
@handle_exceptions
async def stop_current_activity(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Stop the currently running download task."""
app_state = get_app_state()
activity_file = settings.PROJECT_ROOT / 'database' / 'current_activity.json'
if not activity_file.exists():
raise RecordNotFoundError("No active task running")
with open(activity_file, 'r') as f:
activity_data = json.load(f)
if not activity_data.get('active'):
raise RecordNotFoundError("No active task running")
task_id = activity_data.get('task_id')
platform = activity_data.get('platform')
# Security: Validate platform before using in subprocess (defense in depth)
if platform and platform not in VALID_PLATFORMS:
logger.warning(f"Invalid platform in activity file: {platform}", module="Security")
platform = None
# Find and kill the process
if platform:
result = subprocess.run(
['pgrep', '-f', f'media-downloader\\.py.*--platform.*{re.escape(platform)}'],
capture_output=True,
text=True
)
else:
# Fallback: find any media-downloader process
result = subprocess.run(
['pgrep', '-f', 'media-downloader\\.py'],
capture_output=True,
text=True
)
if result.stdout.strip():
pids = [p.strip() for p in result.stdout.strip().split('\n') if p.strip()]
for pid in pids:
try:
os.kill(int(pid), signal.SIGTERM)
logger.info(f"Stopped process {pid} for platform {platform}")
except (ProcessLookupError, ValueError):
pass
# Clear the current activity
inactive_state = {
"active": False,
"task_id": None,
"platform": None,
"account": None,
"start_time": None,
"status": "stopped"
}
with open(activity_file, 'w') as f:
json.dump(inactive_state, f)
# Broadcast stop event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "download_stopped",
"task_id": task_id,
"platform": platform,
"timestamp": now_iso8601()
})
except Exception:
pass
return {
"success": True,
"message": f"Stopped {platform} download",
"task_id": task_id
}
# ============================================================================
# TASK MANAGEMENT ENDPOINTS
# ============================================================================
@router.post("/tasks/{task_id}/pause")
@limiter.limit("20/minute")
@handle_exceptions
async def pause_scheduler_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Pause a specific scheduler task."""
app_state = get_app_state()
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'scheduler_state.db')) as sched_conn:
cursor = sched_conn.cursor()
cursor.execute("""
UPDATE scheduler_state
SET status = 'paused'
WHERE task_id = ?
""", (task_id,))
sched_conn.commit()
row_count = cursor.rowcount
if row_count == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_paused",
"task_id": task_id,
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "status": "paused"}
@router.post("/tasks/{task_id}/resume")
@limiter.limit("20/minute")
@handle_exceptions
async def resume_scheduler_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Resume a paused scheduler task."""
app_state = get_app_state()
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'scheduler_state.db')) as sched_conn:
cursor = sched_conn.cursor()
cursor.execute("""
UPDATE scheduler_state
SET status = 'active'
WHERE task_id = ?
""", (task_id,))
sched_conn.commit()
row_count = cursor.rowcount
if row_count == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_resumed",
"task_id": task_id,
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "status": "active"}
@router.post("/tasks/{task_id}/skip")
@limiter.limit("20/minute")
@handle_exceptions
async def skip_next_run(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Skip the next scheduled run by advancing next_run time."""
app_state = get_app_state()
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'scheduler_state.db')) as sched_conn:
cursor = sched_conn.cursor()
# Get current task info
cursor.execute("""
SELECT next_run, interval_hours
FROM scheduler_state
WHERE task_id = ?
""", (task_id,))
result = cursor.fetchone()
if not result:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
current_next_run, interval_hours = result
# Calculate new next_run time
current_time = datetime.fromisoformat(current_next_run)
new_next_run = current_time + timedelta(hours=interval_hours)
# Update the next_run time
cursor.execute("""
UPDATE scheduler_state
SET next_run = ?
WHERE task_id = ?
""", (new_next_run.isoformat(), task_id))
sched_conn.commit()
# Broadcast event
try:
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_run_skipped",
"task_id": task_id,
"new_next_run": new_next_run.isoformat(),
"timestamp": now_iso8601()
})
except Exception:
pass
return {
"success": True,
"task_id": task_id,
"skipped_run": current_next_run,
"new_next_run": new_next_run.isoformat()
}
@router.post("/tasks/{task_id}/reschedule")
@limiter.limit("20/minute")
@handle_exceptions
async def reschedule_task(
request: Request,
task_id: str,
current_user: Dict = Depends(get_current_user)
):
"""Reschedule a task to a new next_run time."""
body = await request.json()
new_next_run = body.get('next_run')
if not new_next_run:
raise HTTPException(status_code=400, detail="next_run is required")
try:
parsed = datetime.fromisoformat(new_next_run)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid datetime format")
with sqlite3.connect(str(settings.PROJECT_ROOT / 'database' / 'scheduler_state.db')) as sched_conn:
cursor = sched_conn.cursor()
cursor.execute(
"UPDATE scheduler_state SET next_run = ? WHERE task_id = ?",
(parsed.isoformat(), task_id)
)
sched_conn.commit()
if cursor.rowcount == 0:
raise RecordNotFoundError("Task not found", {"task_id": task_id})
# Broadcast event
try:
app_state = get_app_state()
if hasattr(app_state, 'websocket_manager') and app_state.websocket_manager:
await app_state.websocket_manager.broadcast({
"type": "scheduler_task_rescheduled",
"task_id": task_id,
"new_next_run": parsed.isoformat(),
"timestamp": now_iso8601()
})
except Exception:
pass
return {"success": True, "task_id": task_id, "new_next_run": parsed.isoformat()}
# ============================================================================
# CONFIG RELOAD ENDPOINT
# ============================================================================
@router.post("/reload-config")
@limiter.limit("10/minute")
@handle_exceptions
async def reload_scheduler_config(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Reload scheduler config — picks up new/removed accounts and interval changes."""
app_state = get_app_state()
if not hasattr(app_state, 'scheduler') or app_state.scheduler is None:
raise ServiceError("Scheduler is not running", {"service": SCHEDULER_SERVICE})
result = app_state.scheduler.reload_scheduled_tasks()
return {
"success": True,
"added": result['added'],
"removed": result['removed'],
"modified": result['modified'],
"message": (
f"Reload complete: {len(result['added'])} added, "
f"{len(result['removed'])} removed, "
f"{len(result['modified'])} modified"
)
}
# ============================================================================
# SERVICE MANAGEMENT ENDPOINTS
# ============================================================================
@router.get("/service/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_scheduler_service_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Check if scheduler service is running."""
result = subprocess.run(
['systemctl', 'is-active', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
is_running = result.stdout.strip() == 'active'
return {
"running": is_running,
"status": result.stdout.strip()
}
@router.post("/service/start")
@limiter.limit("20/minute")
@handle_exceptions
async def start_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin) # Require admin for service operations
):
"""Start the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'start', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to start service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service started"}
@router.post("/service/stop")
@limiter.limit("20/minute")
@handle_exceptions
async def stop_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin) # Require admin for service operations
):
"""Stop the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'stop', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to stop service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service stopped"}
@router.post("/service/restart")
@limiter.limit("20/minute")
@handle_exceptions
async def restart_scheduler_service(
request: Request,
current_user: Dict = Depends(require_admin)
):
"""Restart the scheduler service. Requires admin privileges."""
result = subprocess.run(
['sudo', 'systemctl', 'restart', SCHEDULER_SERVICE],
capture_output=True,
text=True
)
if result.returncode != 0:
raise ServiceError(
f"Failed to restart service: {result.stderr}",
{"service": SCHEDULER_SERVICE}
)
return {"success": True, "message": "Scheduler service restarted"}
# ============================================================================
# DEPENDENCY MANAGEMENT ENDPOINTS
# ============================================================================
@router.get("/dependencies/status")
@limiter.limit("100/minute")
@handle_exceptions
async def get_dependencies_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get dependency update status."""
from modules.dependency_updater import DependencyUpdater
updater = DependencyUpdater(scheduler_mode=False)
status = updater.get_update_status()
return status
@router.post("/dependencies/check")
@limiter.limit("20/minute")
@handle_exceptions
async def check_dependencies(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Force check and update all dependencies."""
app_state = get_app_state()
from modules.dependency_updater import DependencyUpdater
from modules.pushover_notifier import create_notifier_from_config
# Get pushover config
pushover = None
config = app_state.settings.get_all()
if config.get('pushover', {}).get('enabled'):
pushover = create_notifier_from_config(config, unified_db=app_state.db)
updater = DependencyUpdater(
config=config.get('dependency_updater', {}),
pushover_notifier=pushover,
scheduler_mode=True
)
results = updater.force_update_check()
return {
"success": True,
"results": results,
"message": "Dependency check completed"
}
# ============================================================================
# CACHE BUILDER SERVICE ENDPOINTS
# ============================================================================
@router.post("/cache-builder/trigger")
@limiter.limit("10/minute")
@handle_exceptions
async def trigger_cache_builder(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Manually trigger the thumbnail cache builder service."""
result = subprocess.run(
['sudo', 'systemctl', 'start', CACHE_BUILDER_SERVICE],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return {"success": True, "message": "Cache builder started successfully"}
else:
raise ServiceError(
f"Failed to start cache builder: {result.stderr}",
{"service": CACHE_BUILDER_SERVICE}
)
@router.get("/cache-builder/status")
@limiter.limit("30/minute")
@handle_exceptions
async def get_cache_builder_status(
request: Request,
current_user: Dict = Depends(get_current_user)
):
"""Get detailed cache builder service status."""
# Get service status
result = subprocess.run(
['systemctl', 'status', CACHE_BUILDER_SERVICE, '--no-pager'],
capture_output=True,
text=True
)
status_output = result.stdout
# Parse status
is_running = 'Active: active (running)' in status_output
is_inactive = 'Active: inactive' in status_output
last_run = None
next_run = None
# Try to get timer info
timer_result = subprocess.run(
['systemctl', 'list-timers', '--no-pager', '--all'],
capture_output=True,
text=True
)
if CACHE_BUILDER_SERVICE.replace('.service', '') in timer_result.stdout:
for line in timer_result.stdout.split('\n'):
if CACHE_BUILDER_SERVICE.replace('.service', '') in line:
parts = line.split()
if len(parts) >= 2:
# Extract timing info if available
pass
return {
"running": is_running,
"inactive": is_inactive,
"status_output": status_output[:500], # Truncate for brevity
"last_run": last_run,
"next_run": next_run
}