820 lines
29 KiB
Python
820 lines
29 KiB
Python
"""
|
|
Scrapers Router
|
|
|
|
Handles scraper management and error monitoring:
|
|
- Scraper configuration (list, get, update)
|
|
- Cookie management (test connection, upload, clear)
|
|
- Error tracking (recent, count, dismiss, mark viewed)
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
import requests
|
|
from fastapi import APIRouter, Body, Depends, Query, Request
|
|
from pydantic import BaseModel
|
|
from slowapi import Limiter
|
|
from slowapi.util import get_remote_address
|
|
|
|
from ..core.dependencies import get_current_user, require_admin, get_app_state
|
|
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('API')
|
|
|
|
router = APIRouter(prefix="/api", tags=["Scrapers"])
|
|
limiter = Limiter(key_func=get_remote_address)
|
|
|
|
|
|
# ============================================================================
|
|
# PYDANTIC MODELS
|
|
# ============================================================================
|
|
|
|
class ScraperUpdate(BaseModel):
|
|
enabled: Optional[bool] = None
|
|
proxy_enabled: Optional[bool] = None
|
|
proxy_url: Optional[str] = None
|
|
flaresolverr_required: Optional[bool] = None
|
|
base_url: Optional[str] = None
|
|
|
|
|
|
class CookieUpload(BaseModel):
|
|
cookies: List[dict]
|
|
merge: bool = True
|
|
user_agent: Optional[str] = None
|
|
|
|
|
|
class DismissErrors(BaseModel):
|
|
error_ids: Optional[List[int]] = None
|
|
dismiss_all: bool = False
|
|
|
|
|
|
class MarkErrorsViewed(BaseModel):
|
|
error_ids: Optional[List[int]] = None
|
|
mark_all: bool = False
|
|
|
|
|
|
# ============================================================================
|
|
# SCRAPER ENDPOINTS
|
|
# ============================================================================
|
|
|
|
@router.get("/scrapers")
|
|
@limiter.limit("60/minute")
|
|
@handle_exceptions
|
|
async def get_scrapers(
|
|
request: Request,
|
|
current_user: Dict = Depends(get_current_user),
|
|
type_filter: Optional[str] = Query(None, alias="type", description="Filter by type")
|
|
):
|
|
"""Get all scrapers with optional type filter."""
|
|
app_state = get_app_state()
|
|
scrapers = app_state.db.get_all_scrapers(type_filter=type_filter)
|
|
|
|
# Filter out scrapers whose related modules are all hidden
|
|
hidden_modules = app_state.config.get('hidden_modules', [])
|
|
if hidden_modules:
|
|
# Map scraper IDs to the modules that use them.
|
|
# A scraper is only hidden if ALL related modules are hidden.
|
|
scraper_to_modules = {
|
|
'instagram': ['instagram', 'instagram_client'],
|
|
'snapchat': ['snapchat', 'snapchat_client'],
|
|
'fastdl': ['fastdl'],
|
|
'imginn': ['imginn'],
|
|
'toolzu': ['toolzu'],
|
|
'tiktok': ['tiktok'],
|
|
'coppermine': ['coppermine'],
|
|
}
|
|
# Forum scrapers map to the 'forums' module
|
|
filtered = []
|
|
for scraper in scrapers:
|
|
sid = scraper.get('id', '')
|
|
if sid.startswith('forum_'):
|
|
related = ['forums']
|
|
else:
|
|
related = scraper_to_modules.get(sid, [])
|
|
# Only hide if ALL related modules are hidden
|
|
if related and all(m in hidden_modules for m in related):
|
|
continue
|
|
filtered.append(scraper)
|
|
scrapers = filtered
|
|
|
|
# Don't send cookies_json to frontend (too large)
|
|
for scraper in scrapers:
|
|
if 'cookies_json' in scraper:
|
|
del scraper['cookies_json']
|
|
|
|
return {"scrapers": scrapers}
|
|
|
|
|
|
# ============================================================================
|
|
# PLATFORM CREDENTIALS (UNIFIED COOKIE MANAGEMENT)
|
|
# ============================================================================
|
|
|
|
# Platform definitions for the unified credentials view
|
|
_SCRAPER_PLATFORMS = [
|
|
{'id': 'instagram', 'name': 'Instagram', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
{'id': 'tiktok', 'name': 'TikTok', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
{'id': 'snapchat', 'name': 'Snapchat', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
{'id': 'ytdlp', 'name': 'YouTube', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
{'id': 'pornhub', 'name': 'PornHub', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
{'id': 'xhamster', 'name': 'xHamster', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
|
]
|
|
|
|
_PAID_CONTENT_PLATFORMS = [
|
|
{'id': 'onlyfans_direct', 'name': 'OnlyFans', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://onlyfans.com'},
|
|
{'id': 'fansly_direct', 'name': 'Fansly', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://fansly.com'},
|
|
{'id': 'coomer', 'name': 'Coomer', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://coomer.su'},
|
|
{'id': 'kemono', 'name': 'Kemono', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://kemono.su'},
|
|
{'id': 'twitch', 'name': 'Twitch', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://twitch.tv'},
|
|
{'id': 'bellazon', 'name': 'Bellazon', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://www.bellazon.com'},
|
|
]
|
|
|
|
|
|
@router.get("/scrapers/platform-credentials")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def get_platform_credentials(
|
|
request: Request,
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Get aggregated credential status for all platforms + monitoring preferences."""
|
|
app_state = get_app_state()
|
|
db = app_state.db
|
|
platforms = []
|
|
|
|
def _get_monitoring_flag(platform_id: str) -> bool:
|
|
"""Read monitoring preference from settings."""
|
|
try:
|
|
val = app_state.settings.get(f"cookie_monitoring:{platform_id}")
|
|
if val is not None:
|
|
return str(val).lower() not in ('false', '0', 'no')
|
|
except Exception:
|
|
pass
|
|
return True
|
|
|
|
# 1. Scraper platforms
|
|
for platform_def in _SCRAPER_PLATFORMS:
|
|
scraper = db.get_scraper(platform_def['id'])
|
|
cookies_count = 0
|
|
updated_at = None
|
|
if scraper:
|
|
raw = scraper.get('cookies_json')
|
|
if raw:
|
|
try:
|
|
data = json.loads(raw)
|
|
if isinstance(data, list):
|
|
cookies_count = len(data)
|
|
elif isinstance(data, dict):
|
|
c = data.get('cookies', [])
|
|
cookies_count = len(c) if isinstance(c, list) else 0
|
|
except (json.JSONDecodeError, TypeError):
|
|
pass
|
|
updated_at = scraper.get('cookies_updated_at')
|
|
|
|
monitoring_enabled = _get_monitoring_flag(platform_def['id'])
|
|
|
|
platforms.append({
|
|
'id': platform_def['id'],
|
|
'name': platform_def['name'],
|
|
'type': platform_def['type'],
|
|
'source': platform_def['source'],
|
|
'cookies_count': cookies_count,
|
|
'has_credentials': cookies_count > 0,
|
|
'updated_at': updated_at,
|
|
'used_by': platform_def['used_by'],
|
|
'monitoring_enabled': monitoring_enabled,
|
|
})
|
|
|
|
# 2. Paid content platforms
|
|
try:
|
|
from modules.paid_content import PaidContentDBAdapter
|
|
paid_db = PaidContentDBAdapter(db)
|
|
paid_services = {svc['id']: svc for svc in paid_db.get_services()}
|
|
except Exception:
|
|
paid_services = {}
|
|
|
|
for platform_def in _PAID_CONTENT_PLATFORMS:
|
|
svc = paid_services.get(platform_def['id'], {})
|
|
session_val = svc.get('session_cookie') or ''
|
|
has_creds = bool(session_val)
|
|
updated_at = svc.get('session_updated_at')
|
|
|
|
# Count credentials: for JSON objects count keys, for JSON arrays count items, otherwise 1 if set
|
|
cookies_count = 0
|
|
if has_creds:
|
|
try:
|
|
parsed = json.loads(session_val)
|
|
if isinstance(parsed, dict):
|
|
cookies_count = len(parsed)
|
|
elif isinstance(parsed, list):
|
|
cookies_count = len(parsed)
|
|
else:
|
|
cookies_count = 1
|
|
except (json.JSONDecodeError, TypeError):
|
|
cookies_count = 1
|
|
|
|
platforms.append({
|
|
'id': platform_def['id'],
|
|
'name': platform_def['name'],
|
|
'type': platform_def['type'],
|
|
'source': platform_def['source'],
|
|
'base_url': platform_def.get('base_url'),
|
|
'cookies_count': cookies_count,
|
|
'has_credentials': has_creds,
|
|
'updated_at': updated_at,
|
|
'used_by': platform_def['used_by'],
|
|
'monitoring_enabled': _get_monitoring_flag(platform_def['id']),
|
|
})
|
|
|
|
# 3. Reddit (private gallery)
|
|
reddit_has_creds = False
|
|
reddit_cookies_count = 0
|
|
reddit_locked = True
|
|
try:
|
|
from modules.reddit_community_monitor import RedditCommunityMonitor, REDDIT_MONITOR_KEY_FILE
|
|
from modules.private_gallery_crypto import get_private_gallery_crypto, load_key_from_file
|
|
db_path = str(Path(__file__).parent.parent.parent.parent / 'database' / 'media_downloader.db')
|
|
reddit_monitor = RedditCommunityMonitor(db_path)
|
|
crypto = get_private_gallery_crypto()
|
|
reddit_locked = not crypto.is_initialized()
|
|
|
|
# If gallery is locked, try loading crypto from key file (exported on unlock)
|
|
active_crypto = crypto if not reddit_locked else load_key_from_file(REDDIT_MONITOR_KEY_FILE)
|
|
|
|
if active_crypto and active_crypto.is_initialized():
|
|
reddit_has_creds = reddit_monitor.has_cookies(active_crypto)
|
|
if reddit_has_creds:
|
|
try:
|
|
conn = reddit_monitor._get_connection()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT value FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'")
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
if row and row['value']:
|
|
decrypted = active_crypto.decrypt_field(row['value'])
|
|
parsed = json.loads(decrypted)
|
|
if isinstance(parsed, list):
|
|
reddit_cookies_count = len(parsed)
|
|
except Exception:
|
|
reddit_cookies_count = 1 if reddit_has_creds else 0
|
|
except Exception:
|
|
pass
|
|
|
|
platforms.append({
|
|
'id': 'reddit',
|
|
'name': 'Reddit',
|
|
'type': 'cookies',
|
|
'source': 'private_gallery',
|
|
'base_url': 'https://reddit.com',
|
|
'cookies_count': reddit_cookies_count,
|
|
'has_credentials': reddit_has_creds,
|
|
'gallery_locked': reddit_locked,
|
|
'updated_at': None,
|
|
'used_by': ['Private Gallery'],
|
|
'monitoring_enabled': _get_monitoring_flag('reddit'),
|
|
})
|
|
|
|
return {
|
|
'platforms': platforms,
|
|
'global_monitoring_enabled': _get_monitoring_flag('global'),
|
|
}
|
|
|
|
|
|
@router.put("/scrapers/platform-credentials/{platform_id}/monitoring")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def toggle_platform_monitoring(
|
|
request: Request,
|
|
platform_id: str,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""Toggle health monitoring for a single platform."""
|
|
app_state = get_app_state()
|
|
body = await request.json()
|
|
enabled = body.get('enabled', True)
|
|
|
|
app_state.settings.set(
|
|
key=f"cookie_monitoring:{platform_id}",
|
|
value=str(enabled).lower(),
|
|
category="cookie_monitoring",
|
|
updated_by=current_user.get('username', 'user')
|
|
)
|
|
|
|
return {
|
|
'success': True,
|
|
'message': f"Monitoring {'enabled' if enabled else 'disabled'} for {platform_id}",
|
|
}
|
|
|
|
|
|
@router.put("/scrapers/platform-credentials/monitoring")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def toggle_global_monitoring(
|
|
request: Request,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""Toggle global cookie health monitoring."""
|
|
app_state = get_app_state()
|
|
body = await request.json()
|
|
enabled = body.get('enabled', True)
|
|
|
|
app_state.settings.set(
|
|
key="cookie_monitoring:global",
|
|
value=str(enabled).lower(),
|
|
category="cookie_monitoring",
|
|
updated_by=current_user.get('username', 'user')
|
|
)
|
|
|
|
return {
|
|
'success': True,
|
|
'message': f"Global cookie monitoring {'enabled' if enabled else 'disabled'}",
|
|
}
|
|
|
|
|
|
@router.get("/scrapers/{scraper_id}")
|
|
@limiter.limit("60/minute")
|
|
@handle_exceptions
|
|
async def get_scraper(
|
|
request: Request,
|
|
scraper_id: str,
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Get a single scraper configuration."""
|
|
app_state = get_app_state()
|
|
scraper = app_state.db.get_scraper(scraper_id)
|
|
if not scraper:
|
|
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
|
|
|
if 'cookies_json' in scraper:
|
|
del scraper['cookies_json']
|
|
|
|
cookies = app_state.db.get_scraper_cookies(scraper_id)
|
|
scraper['cookies_count'] = len(cookies) if cookies else 0
|
|
|
|
return scraper
|
|
|
|
|
|
@router.put("/scrapers/{scraper_id}")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def update_scraper(
|
|
request: Request,
|
|
scraper_id: str,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""Update scraper settings (proxy, enabled, base_url)."""
|
|
app_state = get_app_state()
|
|
body = await request.json()
|
|
|
|
scraper = app_state.db.get_scraper(scraper_id)
|
|
if not scraper:
|
|
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
|
|
|
success = app_state.db.update_scraper(scraper_id, body)
|
|
if not success:
|
|
raise ValidationError("No valid fields to update")
|
|
|
|
return {"success": True, "message": f"Scraper '{scraper_id}' updated"}
|
|
|
|
|
|
@router.post("/scrapers/{scraper_id}/test")
|
|
@limiter.limit("10/minute")
|
|
@handle_exceptions
|
|
async def test_scraper_connection(
|
|
request: Request,
|
|
scraper_id: str,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""
|
|
Test scraper connection via FlareSolverr (if required).
|
|
On success, saves cookies to database.
|
|
For CLI tools (yt-dlp, gallery-dl), tests that the tool is installed and working.
|
|
"""
|
|
import subprocess
|
|
from modules.cloudflare_handler import CloudflareHandler
|
|
|
|
app_state = get_app_state()
|
|
scraper = app_state.db.get_scraper(scraper_id)
|
|
if not scraper:
|
|
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
|
|
|
# Handle CLI tools specially - test that they're installed and working
|
|
if scraper.get('type') == 'cli_tool':
|
|
cli_tests = {
|
|
'ytdlp': {
|
|
'cmd': ['/opt/media-downloader/venv/bin/yt-dlp', '--version'],
|
|
'name': 'yt-dlp'
|
|
},
|
|
'gallerydl': {
|
|
'cmd': ['/opt/media-downloader/venv/bin/gallery-dl', '--version'],
|
|
'name': 'gallery-dl'
|
|
}
|
|
}
|
|
|
|
test_config = cli_tests.get(scraper_id)
|
|
if test_config:
|
|
try:
|
|
result = subprocess.run(
|
|
test_config['cmd'],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
if result.returncode == 0:
|
|
version = result.stdout.strip().split('\n')[0]
|
|
cookies_count = 0
|
|
# Check if cookies are configured
|
|
if scraper.get('cookies_json'):
|
|
try:
|
|
import json
|
|
data = json.loads(scraper['cookies_json'])
|
|
# Support both {"cookies": [...]} and [...] formats
|
|
if isinstance(data, dict) and 'cookies' in data:
|
|
cookies = data['cookies']
|
|
elif isinstance(data, list):
|
|
cookies = data
|
|
else:
|
|
cookies = []
|
|
cookies_count = len(cookies) if cookies else 0
|
|
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
|
logger.debug(f"Failed to parse cookies for {scraper_id}: {e}")
|
|
|
|
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
|
msg = f"{test_config['name']} v{version} installed"
|
|
if cookies_count > 0:
|
|
msg += f", {cookies_count} cookies configured"
|
|
return {
|
|
"success": True,
|
|
"message": msg
|
|
}
|
|
else:
|
|
error_msg = result.stderr.strip() or "Command failed"
|
|
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
|
|
return {
|
|
"success": False,
|
|
"message": f"{test_config['name']} error: {error_msg}"
|
|
}
|
|
except subprocess.TimeoutExpired:
|
|
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Command timed out")
|
|
return {"success": False, "message": "Command timed out"}
|
|
except FileNotFoundError:
|
|
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Tool not installed")
|
|
return {"success": False, "message": f"{test_config['name']} not installed"}
|
|
else:
|
|
# Unknown CLI tool
|
|
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
|
return {"success": True, "message": "CLI tool registered"}
|
|
|
|
base_url = scraper.get('base_url')
|
|
if not base_url:
|
|
raise ValidationError(f"Scraper '{scraper_id}' has no base_url configured")
|
|
|
|
proxy_url = None
|
|
if scraper.get('proxy_enabled') and scraper.get('proxy_url'):
|
|
proxy_url = scraper['proxy_url']
|
|
|
|
cf_handler = CloudflareHandler(
|
|
module_name=scraper_id,
|
|
cookie_file=None,
|
|
proxy_url=proxy_url if proxy_url else None,
|
|
flaresolverr_enabled=scraper.get('flaresolverr_required', False)
|
|
)
|
|
|
|
if scraper.get('flaresolverr_required'):
|
|
success = cf_handler.get_cookies_via_flaresolverr(base_url, max_retries=2)
|
|
|
|
if success:
|
|
cookies = cf_handler.get_cookies_list()
|
|
user_agent = cf_handler.get_user_agent()
|
|
|
|
app_state.db.save_scraper_cookies(scraper_id, cookies, user_agent=user_agent)
|
|
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
|
|
|
return {
|
|
"success": True,
|
|
"message": f"Connection successful, {len(cookies)} cookies saved",
|
|
"cookies_count": len(cookies)
|
|
}
|
|
else:
|
|
error_msg = "FlareSolverr returned no cookies"
|
|
if proxy_url:
|
|
error_msg += " (check proxy connection)"
|
|
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
|
|
return {
|
|
"success": False,
|
|
"message": error_msg
|
|
}
|
|
else:
|
|
try:
|
|
proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
|
|
response = requests.get(
|
|
base_url,
|
|
timeout=10,
|
|
proxies=proxies,
|
|
headers={'User-Agent': cf_handler.user_agent}
|
|
)
|
|
|
|
if response.status_code < 400:
|
|
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
|
return {
|
|
"success": True,
|
|
"message": f"Connection successful (HTTP {response.status_code})"
|
|
}
|
|
else:
|
|
app_state.db.update_scraper_test_status(
|
|
scraper_id, 'failed',
|
|
f"HTTP {response.status_code}"
|
|
)
|
|
return {
|
|
"success": False,
|
|
"message": f"Connection failed with HTTP {response.status_code}"
|
|
}
|
|
except requests.exceptions.Timeout:
|
|
app_state.db.update_scraper_test_status(scraper_id, 'timeout', 'Request timed out')
|
|
return {"success": False, "message": "Connection timed out"}
|
|
except Exception as e:
|
|
app_state.db.update_scraper_test_status(scraper_id, 'failed', str(e))
|
|
return {"success": False, "message": str(e)}
|
|
|
|
|
|
@router.post("/scrapers/{scraper_id}/cookies")
|
|
@limiter.limit("20/minute")
|
|
@handle_exceptions
|
|
async def upload_scraper_cookies(
|
|
request: Request,
|
|
scraper_id: str,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""Upload cookies for a scraper (from browser extension export)."""
|
|
app_state = get_app_state()
|
|
|
|
scraper = app_state.db.get_scraper(scraper_id)
|
|
if not scraper:
|
|
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
|
|
|
body = await request.json()
|
|
|
|
# Support both {cookies: [...]} and bare [...] formats
|
|
if isinstance(body, list):
|
|
cookies = body
|
|
merge = True
|
|
user_agent = None
|
|
else:
|
|
cookies = body.get('cookies', [])
|
|
merge = body.get('merge', True)
|
|
user_agent = body.get('user_agent')
|
|
|
|
if not cookies or not isinstance(cookies, list):
|
|
raise ValidationError("Invalid cookies format. Expected {cookies: [...]}")
|
|
|
|
for i, cookie in enumerate(cookies):
|
|
if not isinstance(cookie, dict):
|
|
raise ValidationError(f"Cookie {i} is not an object")
|
|
if 'name' not in cookie or 'value' not in cookie:
|
|
raise ValidationError(f"Cookie {i} missing 'name' or 'value'")
|
|
|
|
success = app_state.db.save_scraper_cookies(
|
|
scraper_id, cookies,
|
|
user_agent=user_agent,
|
|
merge=merge
|
|
)
|
|
|
|
if success:
|
|
all_cookies = app_state.db.get_scraper_cookies(scraper_id)
|
|
count = len(all_cookies) if all_cookies else 0
|
|
|
|
return {
|
|
"success": True,
|
|
"message": f"{'Merged' if merge else 'Replaced'} {len(cookies)} cookies (total: {count})",
|
|
"cookies_count": count
|
|
}
|
|
else:
|
|
raise ValidationError("Failed to save cookies")
|
|
|
|
|
|
@router.delete("/scrapers/{scraper_id}/cookies")
|
|
@limiter.limit("20/minute")
|
|
@handle_exceptions
|
|
async def clear_scraper_cookies(
|
|
request: Request,
|
|
scraper_id: str,
|
|
current_user: Dict = Depends(require_admin)
|
|
):
|
|
"""Clear all cookies for a scraper."""
|
|
app_state = get_app_state()
|
|
|
|
scraper = app_state.db.get_scraper(scraper_id)
|
|
if not scraper:
|
|
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
|
|
|
success = app_state.db.clear_scraper_cookies(scraper_id)
|
|
|
|
return {
|
|
"success": success,
|
|
"message": f"Cookies cleared for '{scraper_id}'" if success else "Failed to clear cookies"
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# ERROR MONITORING ENDPOINTS
|
|
# ============================================================================
|
|
|
|
@router.get("/errors/recent")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def get_recent_errors(
|
|
request: Request,
|
|
limit: int = Query(50, ge=1, le=500, description="Maximum number of errors to return"),
|
|
since_visit: bool = Query(False, description="Only show errors since last dashboard visit (default: show ALL unviewed)"),
|
|
include_dismissed: bool = Query(False, description="Include dismissed errors"),
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Get recent errors from database.
|
|
|
|
By default, shows ALL unviewed/undismissed errors regardless of when they occurred.
|
|
This ensures errors are not missed just because the user visited the dashboard.
|
|
Errors are recorded in real-time by universal_logger.py.
|
|
"""
|
|
app_state = get_app_state()
|
|
|
|
# By default, show ALL unviewed errors (since=None)
|
|
# Only filter by visit time if explicitly requested
|
|
since = None
|
|
if since_visit:
|
|
since = app_state.db.get_last_dashboard_visit()
|
|
if not since:
|
|
since = datetime.now() - timedelta(hours=24)
|
|
|
|
errors = app_state.db.get_recent_errors(since=since, include_dismissed=include_dismissed, limit=limit)
|
|
|
|
return {
|
|
"errors": errors,
|
|
"total_count": len(errors),
|
|
"since": since.isoformat() if since else None,
|
|
"unviewed_count": app_state.db.get_unviewed_error_count(since=None) # Always count ALL unviewed
|
|
}
|
|
|
|
|
|
@router.get("/errors/count")
|
|
@limiter.limit("60/minute")
|
|
@handle_exceptions
|
|
async def get_error_count(
|
|
request: Request,
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Get count of ALL unviewed/undismissed errors.
|
|
|
|
Errors are recorded in real-time by universal_logger.py.
|
|
"""
|
|
app_state = get_app_state()
|
|
|
|
# Count ALL unviewed errors
|
|
total_unviewed = app_state.db.get_unviewed_error_count(since=None)
|
|
|
|
# Count errors since last dashboard visit
|
|
last_visit = app_state.db.get_last_dashboard_visit()
|
|
since_last_visit = app_state.db.get_unviewed_error_count(since=last_visit) if last_visit else total_unviewed
|
|
|
|
return {
|
|
"unviewed_count": total_unviewed,
|
|
"total_recent": total_unviewed,
|
|
"since_last_visit": since_last_visit
|
|
}
|
|
|
|
|
|
@router.post("/errors/dismiss")
|
|
@limiter.limit("20/minute")
|
|
@handle_exceptions
|
|
async def dismiss_errors(
|
|
request: Request,
|
|
body: Dict = Body(...),
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Dismiss errors by ID or all."""
|
|
app_state = get_app_state()
|
|
|
|
error_ids = body.get("error_ids", [])
|
|
dismiss_all = body.get("dismiss_all", False)
|
|
|
|
if dismiss_all:
|
|
dismissed = app_state.db.dismiss_errors(dismiss_all=True)
|
|
elif error_ids:
|
|
dismissed = app_state.db.dismiss_errors(error_ids=error_ids)
|
|
else:
|
|
return {"success": False, "dismissed": 0, "message": "No errors specified"}
|
|
|
|
return {
|
|
"success": True,
|
|
"dismissed": dismissed,
|
|
"message": f"Dismissed {dismissed} error(s)"
|
|
}
|
|
|
|
|
|
@router.post("/errors/mark-viewed")
|
|
@limiter.limit("20/minute")
|
|
@handle_exceptions
|
|
async def mark_errors_viewed(
|
|
request: Request,
|
|
body: Dict = Body(...),
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Mark errors as viewed."""
|
|
app_state = get_app_state()
|
|
|
|
error_ids = body.get("error_ids", [])
|
|
mark_all = body.get("mark_all", False)
|
|
|
|
if mark_all:
|
|
marked = app_state.db.mark_errors_viewed(mark_all=True)
|
|
elif error_ids:
|
|
marked = app_state.db.mark_errors_viewed(error_ids=error_ids)
|
|
else:
|
|
return {"success": False, "marked": 0}
|
|
|
|
return {
|
|
"success": True,
|
|
"marked": marked
|
|
}
|
|
|
|
|
|
@router.post("/errors/update-visit")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def update_dashboard_visit(
|
|
request: Request,
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Update the last dashboard visit timestamp."""
|
|
app_state = get_app_state()
|
|
success = app_state.db.update_dashboard_visit()
|
|
return {"success": success}
|
|
|
|
|
|
@router.get("/logs/context")
|
|
@limiter.limit("30/minute")
|
|
@handle_exceptions
|
|
async def get_log_context(
|
|
request: Request,
|
|
timestamp: str = Query(..., description="ISO timestamp of the error"),
|
|
module: Optional[str] = Query(None, description="Module name to filter"),
|
|
minutes_before: int = Query(1, description="Minutes of context before error"),
|
|
minutes_after: int = Query(1, description="Minutes of context after error"),
|
|
current_user: Dict = Depends(get_current_user)
|
|
):
|
|
"""Get log lines around a specific timestamp for debugging context."""
|
|
target_time = datetime.fromisoformat(timestamp)
|
|
start_time = target_time - timedelta(minutes=minutes_before)
|
|
end_time = target_time + timedelta(minutes=minutes_after)
|
|
|
|
log_dir = Path('/opt/media-downloader/logs')
|
|
log_pattern = re.compile(
|
|
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) '
|
|
r'\[MediaDownloader\.(\w+)\] '
|
|
r'\[(\w+)\] '
|
|
r'\[(\w+)\] '
|
|
r'(.+)$'
|
|
)
|
|
|
|
date_str = target_time.strftime('%Y%m%d')
|
|
matching_lines = []
|
|
|
|
for log_file in log_dir.glob(f'{date_str}_*.log'):
|
|
if module and module.lower() not in log_file.stem.lower():
|
|
continue
|
|
|
|
try:
|
|
lines = log_file.read_text(errors='replace').splitlines()
|
|
|
|
for line in lines:
|
|
match = log_pattern.match(line)
|
|
if match:
|
|
timestamp_str, _, log_module, level, message = match.groups()
|
|
try:
|
|
line_time = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
|
if start_time <= line_time <= end_time:
|
|
matching_lines.append({
|
|
'timestamp': timestamp_str,
|
|
'module': log_module,
|
|
'level': level,
|
|
'message': message,
|
|
'is_target': abs((line_time - target_time).total_seconds()) < 2
|
|
})
|
|
except ValueError:
|
|
continue
|
|
except Exception:
|
|
continue
|
|
|
|
matching_lines.sort(key=lambda x: x['timestamp'])
|
|
|
|
return {
|
|
"context": matching_lines,
|
|
"target_timestamp": timestamp,
|
|
"range": {
|
|
"start": start_time.isoformat(),
|
|
"end": end_time.isoformat()
|
|
}
|
|
}
|