819
web/backend/routers/scrapers.py
Normal file
819
web/backend/routers/scrapers.py
Normal file
@@ -0,0 +1,819 @@
|
||||
"""
|
||||
Scrapers Router
|
||||
|
||||
Handles scraper management and error monitoring:
|
||||
- Scraper configuration (list, get, update)
|
||||
- Cookie management (test connection, upload, clear)
|
||||
- Error tracking (recent, count, dismiss, mark viewed)
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from fastapi import APIRouter, Body, Depends, Query, Request
|
||||
from pydantic import BaseModel
|
||||
from slowapi import Limiter
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
from ..core.dependencies import get_current_user, require_admin, get_app_state
|
||||
from ..core.exceptions import handle_exceptions, NotFoundError, ValidationError
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('API')
|
||||
|
||||
router = APIRouter(prefix="/api", tags=["Scrapers"])
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PYDANTIC MODELS
|
||||
# ============================================================================
|
||||
|
||||
class ScraperUpdate(BaseModel):
|
||||
enabled: Optional[bool] = None
|
||||
proxy_enabled: Optional[bool] = None
|
||||
proxy_url: Optional[str] = None
|
||||
flaresolverr_required: Optional[bool] = None
|
||||
base_url: Optional[str] = None
|
||||
|
||||
|
||||
class CookieUpload(BaseModel):
|
||||
cookies: List[dict]
|
||||
merge: bool = True
|
||||
user_agent: Optional[str] = None
|
||||
|
||||
|
||||
class DismissErrors(BaseModel):
|
||||
error_ids: Optional[List[int]] = None
|
||||
dismiss_all: bool = False
|
||||
|
||||
|
||||
class MarkErrorsViewed(BaseModel):
|
||||
error_ids: Optional[List[int]] = None
|
||||
mark_all: bool = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SCRAPER ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/scrapers")
|
||||
@limiter.limit("60/minute")
|
||||
@handle_exceptions
|
||||
async def get_scrapers(
|
||||
request: Request,
|
||||
current_user: Dict = Depends(get_current_user),
|
||||
type_filter: Optional[str] = Query(None, alias="type", description="Filter by type")
|
||||
):
|
||||
"""Get all scrapers with optional type filter."""
|
||||
app_state = get_app_state()
|
||||
scrapers = app_state.db.get_all_scrapers(type_filter=type_filter)
|
||||
|
||||
# Filter out scrapers whose related modules are all hidden
|
||||
hidden_modules = app_state.config.get('hidden_modules', [])
|
||||
if hidden_modules:
|
||||
# Map scraper IDs to the modules that use them.
|
||||
# A scraper is only hidden if ALL related modules are hidden.
|
||||
scraper_to_modules = {
|
||||
'instagram': ['instagram', 'instagram_client'],
|
||||
'snapchat': ['snapchat', 'snapchat_client'],
|
||||
'fastdl': ['fastdl'],
|
||||
'imginn': ['imginn'],
|
||||
'toolzu': ['toolzu'],
|
||||
'tiktok': ['tiktok'],
|
||||
'coppermine': ['coppermine'],
|
||||
}
|
||||
# Forum scrapers map to the 'forums' module
|
||||
filtered = []
|
||||
for scraper in scrapers:
|
||||
sid = scraper.get('id', '')
|
||||
if sid.startswith('forum_'):
|
||||
related = ['forums']
|
||||
else:
|
||||
related = scraper_to_modules.get(sid, [])
|
||||
# Only hide if ALL related modules are hidden
|
||||
if related and all(m in hidden_modules for m in related):
|
||||
continue
|
||||
filtered.append(scraper)
|
||||
scrapers = filtered
|
||||
|
||||
# Don't send cookies_json to frontend (too large)
|
||||
for scraper in scrapers:
|
||||
if 'cookies_json' in scraper:
|
||||
del scraper['cookies_json']
|
||||
|
||||
return {"scrapers": scrapers}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PLATFORM CREDENTIALS (UNIFIED COOKIE MANAGEMENT)
|
||||
# ============================================================================
|
||||
|
||||
# Platform definitions for the unified credentials view
|
||||
_SCRAPER_PLATFORMS = [
|
||||
{'id': 'instagram', 'name': 'Instagram', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
{'id': 'tiktok', 'name': 'TikTok', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
{'id': 'snapchat', 'name': 'Snapchat', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
{'id': 'ytdlp', 'name': 'YouTube', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
{'id': 'pornhub', 'name': 'PornHub', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
{'id': 'xhamster', 'name': 'xHamster', 'type': 'cookies', 'source': 'scraper', 'used_by': ['Scheduler']},
|
||||
]
|
||||
|
||||
_PAID_CONTENT_PLATFORMS = [
|
||||
{'id': 'onlyfans_direct', 'name': 'OnlyFans', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://onlyfans.com'},
|
||||
{'id': 'fansly_direct', 'name': 'Fansly', 'type': 'token', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://fansly.com'},
|
||||
{'id': 'coomer', 'name': 'Coomer', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://coomer.su'},
|
||||
{'id': 'kemono', 'name': 'Kemono', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://kemono.su'},
|
||||
{'id': 'twitch', 'name': 'Twitch', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://twitch.tv'},
|
||||
{'id': 'bellazon', 'name': 'Bellazon', 'type': 'session', 'source': 'paid_content', 'used_by': ['Paid Content'], 'base_url': 'https://www.bellazon.com'},
|
||||
]
|
||||
|
||||
|
||||
@router.get("/scrapers/platform-credentials")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def get_platform_credentials(
|
||||
request: Request,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Get aggregated credential status for all platforms + monitoring preferences."""
|
||||
app_state = get_app_state()
|
||||
db = app_state.db
|
||||
platforms = []
|
||||
|
||||
def _get_monitoring_flag(platform_id: str) -> bool:
|
||||
"""Read monitoring preference from settings."""
|
||||
try:
|
||||
val = app_state.settings.get(f"cookie_monitoring:{platform_id}")
|
||||
if val is not None:
|
||||
return str(val).lower() not in ('false', '0', 'no')
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
# 1. Scraper platforms
|
||||
for platform_def in _SCRAPER_PLATFORMS:
|
||||
scraper = db.get_scraper(platform_def['id'])
|
||||
cookies_count = 0
|
||||
updated_at = None
|
||||
if scraper:
|
||||
raw = scraper.get('cookies_json')
|
||||
if raw:
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
if isinstance(data, list):
|
||||
cookies_count = len(data)
|
||||
elif isinstance(data, dict):
|
||||
c = data.get('cookies', [])
|
||||
cookies_count = len(c) if isinstance(c, list) else 0
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
updated_at = scraper.get('cookies_updated_at')
|
||||
|
||||
monitoring_enabled = _get_monitoring_flag(platform_def['id'])
|
||||
|
||||
platforms.append({
|
||||
'id': platform_def['id'],
|
||||
'name': platform_def['name'],
|
||||
'type': platform_def['type'],
|
||||
'source': platform_def['source'],
|
||||
'cookies_count': cookies_count,
|
||||
'has_credentials': cookies_count > 0,
|
||||
'updated_at': updated_at,
|
||||
'used_by': platform_def['used_by'],
|
||||
'monitoring_enabled': monitoring_enabled,
|
||||
})
|
||||
|
||||
# 2. Paid content platforms
|
||||
try:
|
||||
from modules.paid_content import PaidContentDBAdapter
|
||||
paid_db = PaidContentDBAdapter(db)
|
||||
paid_services = {svc['id']: svc for svc in paid_db.get_services()}
|
||||
except Exception:
|
||||
paid_services = {}
|
||||
|
||||
for platform_def in _PAID_CONTENT_PLATFORMS:
|
||||
svc = paid_services.get(platform_def['id'], {})
|
||||
session_val = svc.get('session_cookie') or ''
|
||||
has_creds = bool(session_val)
|
||||
updated_at = svc.get('session_updated_at')
|
||||
|
||||
# Count credentials: for JSON objects count keys, for JSON arrays count items, otherwise 1 if set
|
||||
cookies_count = 0
|
||||
if has_creds:
|
||||
try:
|
||||
parsed = json.loads(session_val)
|
||||
if isinstance(parsed, dict):
|
||||
cookies_count = len(parsed)
|
||||
elif isinstance(parsed, list):
|
||||
cookies_count = len(parsed)
|
||||
else:
|
||||
cookies_count = 1
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
cookies_count = 1
|
||||
|
||||
platforms.append({
|
||||
'id': platform_def['id'],
|
||||
'name': platform_def['name'],
|
||||
'type': platform_def['type'],
|
||||
'source': platform_def['source'],
|
||||
'base_url': platform_def.get('base_url'),
|
||||
'cookies_count': cookies_count,
|
||||
'has_credentials': has_creds,
|
||||
'updated_at': updated_at,
|
||||
'used_by': platform_def['used_by'],
|
||||
'monitoring_enabled': _get_monitoring_flag(platform_def['id']),
|
||||
})
|
||||
|
||||
# 3. Reddit (private gallery)
|
||||
reddit_has_creds = False
|
||||
reddit_cookies_count = 0
|
||||
reddit_locked = True
|
||||
try:
|
||||
from modules.reddit_community_monitor import RedditCommunityMonitor, REDDIT_MONITOR_KEY_FILE
|
||||
from modules.private_gallery_crypto import get_private_gallery_crypto, load_key_from_file
|
||||
db_path = str(Path(__file__).parent.parent.parent.parent / 'database' / 'media_downloader.db')
|
||||
reddit_monitor = RedditCommunityMonitor(db_path)
|
||||
crypto = get_private_gallery_crypto()
|
||||
reddit_locked = not crypto.is_initialized()
|
||||
|
||||
# If gallery is locked, try loading crypto from key file (exported on unlock)
|
||||
active_crypto = crypto if not reddit_locked else load_key_from_file(REDDIT_MONITOR_KEY_FILE)
|
||||
|
||||
if active_crypto and active_crypto.is_initialized():
|
||||
reddit_has_creds = reddit_monitor.has_cookies(active_crypto)
|
||||
if reddit_has_creds:
|
||||
try:
|
||||
conn = reddit_monitor._get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT value FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'")
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
if row and row['value']:
|
||||
decrypted = active_crypto.decrypt_field(row['value'])
|
||||
parsed = json.loads(decrypted)
|
||||
if isinstance(parsed, list):
|
||||
reddit_cookies_count = len(parsed)
|
||||
except Exception:
|
||||
reddit_cookies_count = 1 if reddit_has_creds else 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
platforms.append({
|
||||
'id': 'reddit',
|
||||
'name': 'Reddit',
|
||||
'type': 'cookies',
|
||||
'source': 'private_gallery',
|
||||
'base_url': 'https://reddit.com',
|
||||
'cookies_count': reddit_cookies_count,
|
||||
'has_credentials': reddit_has_creds,
|
||||
'gallery_locked': reddit_locked,
|
||||
'updated_at': None,
|
||||
'used_by': ['Private Gallery'],
|
||||
'monitoring_enabled': _get_monitoring_flag('reddit'),
|
||||
})
|
||||
|
||||
return {
|
||||
'platforms': platforms,
|
||||
'global_monitoring_enabled': _get_monitoring_flag('global'),
|
||||
}
|
||||
|
||||
|
||||
@router.put("/scrapers/platform-credentials/{platform_id}/monitoring")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def toggle_platform_monitoring(
|
||||
request: Request,
|
||||
platform_id: str,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""Toggle health monitoring for a single platform."""
|
||||
app_state = get_app_state()
|
||||
body = await request.json()
|
||||
enabled = body.get('enabled', True)
|
||||
|
||||
app_state.settings.set(
|
||||
key=f"cookie_monitoring:{platform_id}",
|
||||
value=str(enabled).lower(),
|
||||
category="cookie_monitoring",
|
||||
updated_by=current_user.get('username', 'user')
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'message': f"Monitoring {'enabled' if enabled else 'disabled'} for {platform_id}",
|
||||
}
|
||||
|
||||
|
||||
@router.put("/scrapers/platform-credentials/monitoring")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def toggle_global_monitoring(
|
||||
request: Request,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""Toggle global cookie health monitoring."""
|
||||
app_state = get_app_state()
|
||||
body = await request.json()
|
||||
enabled = body.get('enabled', True)
|
||||
|
||||
app_state.settings.set(
|
||||
key="cookie_monitoring:global",
|
||||
value=str(enabled).lower(),
|
||||
category="cookie_monitoring",
|
||||
updated_by=current_user.get('username', 'user')
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'message': f"Global cookie monitoring {'enabled' if enabled else 'disabled'}",
|
||||
}
|
||||
|
||||
|
||||
@router.get("/scrapers/{scraper_id}")
|
||||
@limiter.limit("60/minute")
|
||||
@handle_exceptions
|
||||
async def get_scraper(
|
||||
request: Request,
|
||||
scraper_id: str,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Get a single scraper configuration."""
|
||||
app_state = get_app_state()
|
||||
scraper = app_state.db.get_scraper(scraper_id)
|
||||
if not scraper:
|
||||
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
||||
|
||||
if 'cookies_json' in scraper:
|
||||
del scraper['cookies_json']
|
||||
|
||||
cookies = app_state.db.get_scraper_cookies(scraper_id)
|
||||
scraper['cookies_count'] = len(cookies) if cookies else 0
|
||||
|
||||
return scraper
|
||||
|
||||
|
||||
@router.put("/scrapers/{scraper_id}")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def update_scraper(
|
||||
request: Request,
|
||||
scraper_id: str,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""Update scraper settings (proxy, enabled, base_url)."""
|
||||
app_state = get_app_state()
|
||||
body = await request.json()
|
||||
|
||||
scraper = app_state.db.get_scraper(scraper_id)
|
||||
if not scraper:
|
||||
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
||||
|
||||
success = app_state.db.update_scraper(scraper_id, body)
|
||||
if not success:
|
||||
raise ValidationError("No valid fields to update")
|
||||
|
||||
return {"success": True, "message": f"Scraper '{scraper_id}' updated"}
|
||||
|
||||
|
||||
@router.post("/scrapers/{scraper_id}/test")
|
||||
@limiter.limit("10/minute")
|
||||
@handle_exceptions
|
||||
async def test_scraper_connection(
|
||||
request: Request,
|
||||
scraper_id: str,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""
|
||||
Test scraper connection via FlareSolverr (if required).
|
||||
On success, saves cookies to database.
|
||||
For CLI tools (yt-dlp, gallery-dl), tests that the tool is installed and working.
|
||||
"""
|
||||
import subprocess
|
||||
from modules.cloudflare_handler import CloudflareHandler
|
||||
|
||||
app_state = get_app_state()
|
||||
scraper = app_state.db.get_scraper(scraper_id)
|
||||
if not scraper:
|
||||
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
||||
|
||||
# Handle CLI tools specially - test that they're installed and working
|
||||
if scraper.get('type') == 'cli_tool':
|
||||
cli_tests = {
|
||||
'ytdlp': {
|
||||
'cmd': ['/opt/media-downloader/venv/bin/yt-dlp', '--version'],
|
||||
'name': 'yt-dlp'
|
||||
},
|
||||
'gallerydl': {
|
||||
'cmd': ['/opt/media-downloader/venv/bin/gallery-dl', '--version'],
|
||||
'name': 'gallery-dl'
|
||||
}
|
||||
}
|
||||
|
||||
test_config = cli_tests.get(scraper_id)
|
||||
if test_config:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
test_config['cmd'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
version = result.stdout.strip().split('\n')[0]
|
||||
cookies_count = 0
|
||||
# Check if cookies are configured
|
||||
if scraper.get('cookies_json'):
|
||||
try:
|
||||
import json
|
||||
data = json.loads(scraper['cookies_json'])
|
||||
# Support both {"cookies": [...]} and [...] formats
|
||||
if isinstance(data, dict) and 'cookies' in data:
|
||||
cookies = data['cookies']
|
||||
elif isinstance(data, list):
|
||||
cookies = data
|
||||
else:
|
||||
cookies = []
|
||||
cookies_count = len(cookies) if cookies else 0
|
||||
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
||||
logger.debug(f"Failed to parse cookies for {scraper_id}: {e}")
|
||||
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
||||
msg = f"{test_config['name']} v{version} installed"
|
||||
if cookies_count > 0:
|
||||
msg += f", {cookies_count} cookies configured"
|
||||
return {
|
||||
"success": True,
|
||||
"message": msg
|
||||
}
|
||||
else:
|
||||
error_msg = result.stderr.strip() or "Command failed"
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"{test_config['name']} error: {error_msg}"
|
||||
}
|
||||
except subprocess.TimeoutExpired:
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Command timed out")
|
||||
return {"success": False, "message": "Command timed out"}
|
||||
except FileNotFoundError:
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'failed', "Tool not installed")
|
||||
return {"success": False, "message": f"{test_config['name']} not installed"}
|
||||
else:
|
||||
# Unknown CLI tool
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
||||
return {"success": True, "message": "CLI tool registered"}
|
||||
|
||||
base_url = scraper.get('base_url')
|
||||
if not base_url:
|
||||
raise ValidationError(f"Scraper '{scraper_id}' has no base_url configured")
|
||||
|
||||
proxy_url = None
|
||||
if scraper.get('proxy_enabled') and scraper.get('proxy_url'):
|
||||
proxy_url = scraper['proxy_url']
|
||||
|
||||
cf_handler = CloudflareHandler(
|
||||
module_name=scraper_id,
|
||||
cookie_file=None,
|
||||
proxy_url=proxy_url if proxy_url else None,
|
||||
flaresolverr_enabled=scraper.get('flaresolverr_required', False)
|
||||
)
|
||||
|
||||
if scraper.get('flaresolverr_required'):
|
||||
success = cf_handler.get_cookies_via_flaresolverr(base_url, max_retries=2)
|
||||
|
||||
if success:
|
||||
cookies = cf_handler.get_cookies_list()
|
||||
user_agent = cf_handler.get_user_agent()
|
||||
|
||||
app_state.db.save_scraper_cookies(scraper_id, cookies, user_agent=user_agent)
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Connection successful, {len(cookies)} cookies saved",
|
||||
"cookies_count": len(cookies)
|
||||
}
|
||||
else:
|
||||
error_msg = "FlareSolverr returned no cookies"
|
||||
if proxy_url:
|
||||
error_msg += " (check proxy connection)"
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'failed', error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"message": error_msg
|
||||
}
|
||||
else:
|
||||
try:
|
||||
proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
|
||||
response = requests.get(
|
||||
base_url,
|
||||
timeout=10,
|
||||
proxies=proxies,
|
||||
headers={'User-Agent': cf_handler.user_agent}
|
||||
)
|
||||
|
||||
if response.status_code < 400:
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'success')
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Connection successful (HTTP {response.status_code})"
|
||||
}
|
||||
else:
|
||||
app_state.db.update_scraper_test_status(
|
||||
scraper_id, 'failed',
|
||||
f"HTTP {response.status_code}"
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Connection failed with HTTP {response.status_code}"
|
||||
}
|
||||
except requests.exceptions.Timeout:
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'timeout', 'Request timed out')
|
||||
return {"success": False, "message": "Connection timed out"}
|
||||
except Exception as e:
|
||||
app_state.db.update_scraper_test_status(scraper_id, 'failed', str(e))
|
||||
return {"success": False, "message": str(e)}
|
||||
|
||||
|
||||
@router.post("/scrapers/{scraper_id}/cookies")
|
||||
@limiter.limit("20/minute")
|
||||
@handle_exceptions
|
||||
async def upload_scraper_cookies(
|
||||
request: Request,
|
||||
scraper_id: str,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""Upload cookies for a scraper (from browser extension export)."""
|
||||
app_state = get_app_state()
|
||||
|
||||
scraper = app_state.db.get_scraper(scraper_id)
|
||||
if not scraper:
|
||||
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
||||
|
||||
body = await request.json()
|
||||
|
||||
# Support both {cookies: [...]} and bare [...] formats
|
||||
if isinstance(body, list):
|
||||
cookies = body
|
||||
merge = True
|
||||
user_agent = None
|
||||
else:
|
||||
cookies = body.get('cookies', [])
|
||||
merge = body.get('merge', True)
|
||||
user_agent = body.get('user_agent')
|
||||
|
||||
if not cookies or not isinstance(cookies, list):
|
||||
raise ValidationError("Invalid cookies format. Expected {cookies: [...]}")
|
||||
|
||||
for i, cookie in enumerate(cookies):
|
||||
if not isinstance(cookie, dict):
|
||||
raise ValidationError(f"Cookie {i} is not an object")
|
||||
if 'name' not in cookie or 'value' not in cookie:
|
||||
raise ValidationError(f"Cookie {i} missing 'name' or 'value'")
|
||||
|
||||
success = app_state.db.save_scraper_cookies(
|
||||
scraper_id, cookies,
|
||||
user_agent=user_agent,
|
||||
merge=merge
|
||||
)
|
||||
|
||||
if success:
|
||||
all_cookies = app_state.db.get_scraper_cookies(scraper_id)
|
||||
count = len(all_cookies) if all_cookies else 0
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"{'Merged' if merge else 'Replaced'} {len(cookies)} cookies (total: {count})",
|
||||
"cookies_count": count
|
||||
}
|
||||
else:
|
||||
raise ValidationError("Failed to save cookies")
|
||||
|
||||
|
||||
@router.delete("/scrapers/{scraper_id}/cookies")
|
||||
@limiter.limit("20/minute")
|
||||
@handle_exceptions
|
||||
async def clear_scraper_cookies(
|
||||
request: Request,
|
||||
scraper_id: str,
|
||||
current_user: Dict = Depends(require_admin)
|
||||
):
|
||||
"""Clear all cookies for a scraper."""
|
||||
app_state = get_app_state()
|
||||
|
||||
scraper = app_state.db.get_scraper(scraper_id)
|
||||
if not scraper:
|
||||
raise NotFoundError(f"Scraper '{scraper_id}' not found")
|
||||
|
||||
success = app_state.db.clear_scraper_cookies(scraper_id)
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"message": f"Cookies cleared for '{scraper_id}'" if success else "Failed to clear cookies"
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ERROR MONITORING ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/errors/recent")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def get_recent_errors(
|
||||
request: Request,
|
||||
limit: int = Query(50, ge=1, le=500, description="Maximum number of errors to return"),
|
||||
since_visit: bool = Query(False, description="Only show errors since last dashboard visit (default: show ALL unviewed)"),
|
||||
include_dismissed: bool = Query(False, description="Include dismissed errors"),
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Get recent errors from database.
|
||||
|
||||
By default, shows ALL unviewed/undismissed errors regardless of when they occurred.
|
||||
This ensures errors are not missed just because the user visited the dashboard.
|
||||
Errors are recorded in real-time by universal_logger.py.
|
||||
"""
|
||||
app_state = get_app_state()
|
||||
|
||||
# By default, show ALL unviewed errors (since=None)
|
||||
# Only filter by visit time if explicitly requested
|
||||
since = None
|
||||
if since_visit:
|
||||
since = app_state.db.get_last_dashboard_visit()
|
||||
if not since:
|
||||
since = datetime.now() - timedelta(hours=24)
|
||||
|
||||
errors = app_state.db.get_recent_errors(since=since, include_dismissed=include_dismissed, limit=limit)
|
||||
|
||||
return {
|
||||
"errors": errors,
|
||||
"total_count": len(errors),
|
||||
"since": since.isoformat() if since else None,
|
||||
"unviewed_count": app_state.db.get_unviewed_error_count(since=None) # Always count ALL unviewed
|
||||
}
|
||||
|
||||
|
||||
@router.get("/errors/count")
|
||||
@limiter.limit("60/minute")
|
||||
@handle_exceptions
|
||||
async def get_error_count(
|
||||
request: Request,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Get count of ALL unviewed/undismissed errors.
|
||||
|
||||
Errors are recorded in real-time by universal_logger.py.
|
||||
"""
|
||||
app_state = get_app_state()
|
||||
|
||||
# Count ALL unviewed errors
|
||||
total_unviewed = app_state.db.get_unviewed_error_count(since=None)
|
||||
|
||||
# Count errors since last dashboard visit
|
||||
last_visit = app_state.db.get_last_dashboard_visit()
|
||||
since_last_visit = app_state.db.get_unviewed_error_count(since=last_visit) if last_visit else total_unviewed
|
||||
|
||||
return {
|
||||
"unviewed_count": total_unviewed,
|
||||
"total_recent": total_unviewed,
|
||||
"since_last_visit": since_last_visit
|
||||
}
|
||||
|
||||
|
||||
@router.post("/errors/dismiss")
|
||||
@limiter.limit("20/minute")
|
||||
@handle_exceptions
|
||||
async def dismiss_errors(
|
||||
request: Request,
|
||||
body: Dict = Body(...),
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Dismiss errors by ID or all."""
|
||||
app_state = get_app_state()
|
||||
|
||||
error_ids = body.get("error_ids", [])
|
||||
dismiss_all = body.get("dismiss_all", False)
|
||||
|
||||
if dismiss_all:
|
||||
dismissed = app_state.db.dismiss_errors(dismiss_all=True)
|
||||
elif error_ids:
|
||||
dismissed = app_state.db.dismiss_errors(error_ids=error_ids)
|
||||
else:
|
||||
return {"success": False, "dismissed": 0, "message": "No errors specified"}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"dismissed": dismissed,
|
||||
"message": f"Dismissed {dismissed} error(s)"
|
||||
}
|
||||
|
||||
|
||||
@router.post("/errors/mark-viewed")
|
||||
@limiter.limit("20/minute")
|
||||
@handle_exceptions
|
||||
async def mark_errors_viewed(
|
||||
request: Request,
|
||||
body: Dict = Body(...),
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Mark errors as viewed."""
|
||||
app_state = get_app_state()
|
||||
|
||||
error_ids = body.get("error_ids", [])
|
||||
mark_all = body.get("mark_all", False)
|
||||
|
||||
if mark_all:
|
||||
marked = app_state.db.mark_errors_viewed(mark_all=True)
|
||||
elif error_ids:
|
||||
marked = app_state.db.mark_errors_viewed(error_ids=error_ids)
|
||||
else:
|
||||
return {"success": False, "marked": 0}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"marked": marked
|
||||
}
|
||||
|
||||
|
||||
@router.post("/errors/update-visit")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def update_dashboard_visit(
|
||||
request: Request,
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Update the last dashboard visit timestamp."""
|
||||
app_state = get_app_state()
|
||||
success = app_state.db.update_dashboard_visit()
|
||||
return {"success": success}
|
||||
|
||||
|
||||
@router.get("/logs/context")
|
||||
@limiter.limit("30/minute")
|
||||
@handle_exceptions
|
||||
async def get_log_context(
|
||||
request: Request,
|
||||
timestamp: str = Query(..., description="ISO timestamp of the error"),
|
||||
module: Optional[str] = Query(None, description="Module name to filter"),
|
||||
minutes_before: int = Query(1, description="Minutes of context before error"),
|
||||
minutes_after: int = Query(1, description="Minutes of context after error"),
|
||||
current_user: Dict = Depends(get_current_user)
|
||||
):
|
||||
"""Get log lines around a specific timestamp for debugging context."""
|
||||
target_time = datetime.fromisoformat(timestamp)
|
||||
start_time = target_time - timedelta(minutes=minutes_before)
|
||||
end_time = target_time + timedelta(minutes=minutes_after)
|
||||
|
||||
log_dir = Path('/opt/media-downloader/logs')
|
||||
log_pattern = re.compile(
|
||||
r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) '
|
||||
r'\[MediaDownloader\.(\w+)\] '
|
||||
r'\[(\w+)\] '
|
||||
r'\[(\w+)\] '
|
||||
r'(.+)$'
|
||||
)
|
||||
|
||||
date_str = target_time.strftime('%Y%m%d')
|
||||
matching_lines = []
|
||||
|
||||
for log_file in log_dir.glob(f'{date_str}_*.log'):
|
||||
if module and module.lower() not in log_file.stem.lower():
|
||||
continue
|
||||
|
||||
try:
|
||||
lines = log_file.read_text(errors='replace').splitlines()
|
||||
|
||||
for line in lines:
|
||||
match = log_pattern.match(line)
|
||||
if match:
|
||||
timestamp_str, _, log_module, level, message = match.groups()
|
||||
try:
|
||||
line_time = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
||||
if start_time <= line_time <= end_time:
|
||||
matching_lines.append({
|
||||
'timestamp': timestamp_str,
|
||||
'module': log_module,
|
||||
'level': level,
|
||||
'message': message,
|
||||
'is_target': abs((line_time - target_time).total_seconds()) < 2
|
||||
})
|
||||
except ValueError:
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
matching_lines.sort(key=lambda x: x['timestamp'])
|
||||
|
||||
return {
|
||||
"context": matching_lines,
|
||||
"target_timestamp": timestamp,
|
||||
"range": {
|
||||
"start": start_time.isoformat(),
|
||||
"end": end_time.isoformat()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user