""" Health & Status Router Handles system health checks and status endpoints: - Basic health check - System health (CPU, memory, disk) - Service status - FlareSolverr status """ import asyncio import sqlite3 import subprocess import threading import time from datetime import datetime, timedelta from pathlib import Path from typing import Dict, Optional from fastapi import APIRouter, Depends, Request from slowapi import Limiter from slowapi.util import get_remote_address from ..core.dependencies import get_current_user, require_admin, get_app_state from ..core.config import settings from ..core.responses import now_iso8601 from ..core.exceptions import handle_exceptions from ..core.utils import MEDIA_FILTERS from modules.universal_logger import get_logger logger = get_logger('API') router = APIRouter(prefix="/api", tags=["Health"]) limiter = Limiter(key_func=get_remote_address) @router.get("/health") @limiter.limit("100/minute") @handle_exceptions async def health_check(request: Request, current_user: Dict = Depends(get_current_user)): """Basic system health check""" app_state = get_app_state() return { "status": "healthy", "timestamp": now_iso8601(), "version": settings.API_VERSION, "database": "connected" if app_state.db else "disconnected", "config_loaded": bool(app_state.config) } @router.get("/health/system") @limiter.limit("60/minute") @handle_exceptions async def get_system_health(request: Request, current_user: Dict = Depends(require_admin)): """Get comprehensive system health information. Requires admin privileges.""" import psutil app_state = get_app_state() logger.debug(f"Health check requested by user: {current_user.get('sub', 'unknown')}", module="Health") # System metrics cpu_percent = psutil.cpu_percent(interval=0.1) memory = psutil.virtual_memory() disk = psutil.disk_usage('/') # All real disk partitions (physical + network shares only) real_fstypes = {'ext4', 'ext3', 'xfs', 'btrfs', 'zfs', 'ntfs', 'cifs', 'nfs', 'nfs4', 'fuse.mergerfs'} disks_info = [] seen_totals = set() # Deduplicate shares pointing to same physical disk # Collect mergerfs source mountpoints so we can hide their underlying drives mergerfs_sources = set() all_partitions = psutil.disk_partitions(all=True) mountpoint_by_dir = {p.mountpoint.rstrip('/').rsplit('/', 1)[-1]: p.mountpoint for p in all_partitions} for part in all_partitions: if part.fstype == 'fuse.mergerfs': # device is "dirname1:dirname2" — resolve to actual mountpoints for src in part.device.split(':'): src = src.strip() if src.startswith('/'): mergerfs_sources.add(src) elif src in mountpoint_by_dir: mergerfs_sources.add(mountpoint_by_dir[src]) for part in psutil.disk_partitions(all=True): if part.fstype not in real_fstypes: continue if part.mountpoint.startswith(('/snap/', '/boot/')): continue # Hide drives that are pooled into a mergerfs mount if part.mountpoint in mergerfs_sources: continue try: usage = psutil.disk_usage(part.mountpoint) if usage.total == 0: continue # Deduplicate by (total, used) - same physical disk behind multiple shares dedup_key = (usage.total, usage.used) if dedup_key in seen_totals: continue seen_totals.add(dedup_key) disks_info.append({ 'mountpoint': part.mountpoint, 'device': part.device, 'fstype': part.fstype, 'total': usage.total, 'used': usage.used, 'free': usage.free, 'percent': usage.percent, }) except (PermissionError, OSError): pass # Only show specific monitored drives monitored_mountpoints = {'/', '/media/c$', '/media/d$', '/media/e$', '/opt/immich'} disks_info = [d for d in disks_info if d['mountpoint'] in monitored_mountpoints] # Sort: local drives first (by mountpoint), then network shares (by mountpoint) disks_info.sort(key=lambda d: (0 if d['device'].startswith('/dev/') else 1, d['mountpoint'])) # Boot time and uptime boot_time = psutil.boot_time() uptime_seconds = time.time() - boot_time uptime_hours = uptime_seconds / 3600 # Service status checks db_status = 'healthy' if app_state.db else 'error' scheduler_status, active_tasks, total_tasks = _check_scheduler_status(app_state) websocket_status, active_websockets = _check_websocket_status() cache_builder_status, cache_stats = _check_cache_builder_status() # Download activity download_activity = _get_download_activity(app_state) # Database performance db_performance = _check_db_performance(app_state) # Process info process_info = { 'threads': psutil.Process().num_threads(), 'open_files': len(psutil.Process().open_files()) if hasattr(psutil.Process(), 'open_files') else 0 } # Determine overall status overall_status = _determine_overall_status( db_status, scheduler_status, cpu_percent, memory.percent, disk.percent ) return { 'overall_status': overall_status, 'timestamp': now_iso8601(), 'version': settings.API_VERSION, 'services': { 'api': 'healthy', 'database': db_status, 'scheduler': scheduler_status, 'websocket': websocket_status, 'cache_builder': cache_builder_status }, 'system': { 'cpu_percent': cpu_percent, 'cpu_count': psutil.cpu_count(), 'memory_percent': memory.percent, 'memory_used': memory.used, 'memory_total': memory.total, 'disk_percent': disk.percent, 'disk_used': disk.used, 'disk_total': disk.total, 'disk_free': disk.free, 'uptime_hours': round(uptime_hours, 2), 'disks': disks_info, }, 'scheduler_info': { 'active_tasks': active_tasks, 'total_tasks': total_tasks }, 'websocket_info': { 'active_connections': active_websockets }, 'cache_info': cache_stats, 'db_performance': db_performance, 'process_info': process_info, 'download_activity': download_activity } @router.get("/health/flaresolverr") @limiter.limit("30/minute") @handle_exceptions async def check_flaresolverr(request: Request, current_user: Dict = Depends(get_current_user)): """Check FlareSolverr service status""" import httpx app_state = get_app_state() # Get FlareSolverr URL from settings flaresolverr_url = "http://localhost:8191" if app_state.settings: scraper_settings = app_state.settings.get('scrapers', {}) if isinstance(scraper_settings, dict): flaresolverr_url = scraper_settings.get('flaresolverr_url', flaresolverr_url) try: async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get(f"{flaresolverr_url}/health") if response.status_code == 200: data = response.json() return { "status": "healthy", "url": flaresolverr_url, "version": data.get("version", "unknown"), "message": data.get("msg", "FlareSolverr is running"), "timestamp": now_iso8601() } else: return { "status": "unhealthy", "url": flaresolverr_url, "error": f"HTTP {response.status_code}", "timestamp": now_iso8601() } except httpx.ConnectError: return { "status": "unavailable", "url": flaresolverr_url, "error": "Connection refused - FlareSolverr may not be running", "timestamp": now_iso8601() } except httpx.TimeoutException: return { "status": "timeout", "url": flaresolverr_url, "error": "Request timed out", "timestamp": now_iso8601() } except Exception as e: return { "status": "error", "url": flaresolverr_url, "error": str(e), "timestamp": now_iso8601() } @router.get("/status") @limiter.limit("100/minute") @handle_exceptions async def get_status(request: Request, current_user: Dict = Depends(get_current_user)): """Get overall system status summary""" app_state = get_app_state() # Basic status info scheduler_running = False scheduler_status = "unknown" # Check scheduler try: result = subprocess.run( ['systemctl', 'is-active', 'media-downloader.service'], capture_output=True, text=True, timeout=settings.PROCESS_TIMEOUT_SHORT ) scheduler_running = result.stdout.strip() == 'active' scheduler_status = "running" if scheduler_running else "stopped" except subprocess.TimeoutExpired: scheduler_status = "timeout" except Exception: scheduler_status = "unknown" # Get active websocket count _, active_websockets = _check_websocket_status() status = { "api": "running", "database": "connected" if app_state.db else "disconnected", "scheduler": scheduler_status, "scheduler_running": scheduler_running, # Boolean for Dashboard compatibility "active_websockets": active_websockets, # Number for Dashboard "timestamp": now_iso8601(), "version": settings.API_VERSION } return status # ============================================================================ # HELPER FUNCTIONS # ============================================================================ def _check_scheduler_status(app_state) -> tuple: """Check scheduler service status""" scheduler_status = 'unknown' active_tasks = 0 total_tasks = 0 try: result = subprocess.run( ['systemctl', 'is-active', 'media-downloader.service'], capture_output=True, text=True, timeout=settings.PROCESS_TIMEOUT_SHORT ) if result.stdout.strip() == 'active': scheduler_status = 'healthy' # Get task counts from database try: with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='scheduler_state'") if cursor.fetchone(): cursor.execute("SELECT COUNT(*) FROM scheduler_state WHERE status = 'active'") active_tasks = cursor.fetchone()[0] cursor.execute("SELECT COUNT(*) FROM scheduler_state") total_tasks = cursor.fetchone()[0] except Exception as e: logger.debug(f"Failed to query scheduler state: {e}", module="Health") else: scheduler_status = 'error' except subprocess.TimeoutExpired: scheduler_status = 'timeout' except Exception: scheduler_status = 'warning' if app_state.scheduler else 'error' return scheduler_status, active_tasks, total_tasks def _check_websocket_status() -> tuple: """Check WebSocket connection manager status""" try: # Import manager from main module from ..core.dependencies import get_app_state app_state = get_app_state() active_connections = len(app_state.websocket_manager.active_connections) if app_state.websocket_manager else 0 return 'healthy', active_connections except Exception: return 'unknown', 0 def _check_cache_builder_status() -> tuple: """Check cache builder service status""" cache_builder_status = 'unknown' cache_stats = {'files_cached': 0, 'last_run': None} try: result = subprocess.run( ['systemctl', 'is-active', 'media-cache-builder.service'], capture_output=True, text=True, timeout=settings.PROCESS_TIMEOUT_SHORT ) if result.stdout.strip() in ['active', 'inactive']: cache_builder_status = 'healthy' # Get cache statistics thumb_db_path = settings.PROJECT_ROOT / 'database' / 'thumbnails.db' try: with sqlite3.connect(str(thumb_db_path)) as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM thumbnails") cache_stats['files_cached'] = cursor.fetchone()[0] cursor.execute("SELECT MAX(created_at) FROM thumbnails") last_cached = cursor.fetchone()[0] if last_cached: cache_stats['last_run'] = last_cached except Exception: pass # Cache stats are non-critical else: cache_builder_status = 'error' except subprocess.TimeoutExpired: cache_builder_status = 'timeout' except Exception as e: logger.debug(f"Failed to check cache builder status: {e}", module="Health") return cache_builder_status, cache_stats def _get_download_activity(app_state) -> dict: """Get download activity statistics""" from datetime import datetime as dt, timedelta activity = {'last_24h': 0, 'last_7d': 0, 'last_30d': 0} try: with app_state.db.get_connection() as conn: cursor = conn.cursor() # Compute cutoff dates in Python for proper parameterization now = dt.now() periods = { 'last_24h': (now - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S'), 'last_7d': (now - timedelta(days=7)).strftime('%Y-%m-%d %H:%M:%S'), 'last_30d': (now - timedelta(days=30)).strftime('%Y-%m-%d %H:%M:%S') } for period, cutoff_date in periods.items(): cursor.execute(f""" SELECT COUNT(*) FROM downloads WHERE download_date >= ? AND {MEDIA_FILTERS} """, (cutoff_date,)) activity[period] = cursor.fetchone()[0] except Exception as e: logger.warning(f"Failed to get download activity: {e}", module="Health") return activity def _check_db_performance(app_state) -> dict: """Check database performance metrics""" performance = {'query_time_ms': 0, 'connection_pool_size': 0} try: start = time.time() with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM downloads") cursor.fetchone() performance['query_time_ms'] = round((time.time() - start) * 1000, 2) if hasattr(app_state.db, 'pool'): performance['connection_pool_size'] = len(app_state.db.pool) except Exception: pass return performance def _determine_overall_status(db_status, scheduler_status, cpu_percent, memory_percent, disk_percent) -> str: """Determine overall system health status""" if db_status == 'error' or scheduler_status == 'error': return 'error' if cpu_percent > 90 or memory_percent > 95 or disk_percent > 95: return 'error' if cpu_percent > 70 or memory_percent > 80 or disk_percent > 80 or scheduler_status == 'warning': return 'warning' return 'healthy' # ============================================================================ # COOKIE HEALTH MONITORING # ============================================================================ _cookie_health_check_lock = threading.Lock() _last_cookie_health_check: float = 0 _cookie_health_check_running = False COOKIE_HEALTH_CHECK_INTERVAL = 1800 # 30 minutes def _determine_scraper_cookie_status(scraper: Dict) -> str: """Determine cookie health status for a scraper.""" last_test = scraper.get('last_test_status') if last_test == 'failed': return 'expired' cookies_json = scraper.get('cookies_json') if not cookies_json and scraper.get('flaresolverr_required'): return 'missing' if last_test == 'success': return 'healthy' if cookies_json: return 'healthy' return 'unknown' def _determine_paid_cookie_status(service: Dict) -> str: """Determine cookie/session health status for a paid content service.""" health_status = service.get('health_status', '') if health_status == 'down': return 'expired' if health_status == 'degraded': return 'degraded' session_cookie = service.get('session_cookie') if not session_cookie: service_id = service.get('id', '') if service_id in ('onlyfans_direct', 'fansly_direct'): return 'missing' return 'unknown' if health_status == 'healthy': return 'healthy' return 'unknown' # Services where the paid content client falls back to scraper cookies. # If the paid content service has no own session_cookie, skip it to avoid # showing duplicate entries for the same underlying cookie. # Note: Instagram is NOT shared — paid content and scrapers have independent cookies. _SHARED_COOKIE_SERVICES = {'snapchat'} # Instagram-related IDs to exclude from cookie health checks entirely. # These scrapers use Playwright/API-based auth, not persistent cookies. _INSTAGRAM_EXCLUDED_IDS = {'instagram', 'fastdl', 'imginn', 'imginn_api', 'toolzu', 'instagram_client'} # Map scraper IDs to the config modules that control them. # A scraper is only considered disabled if ALL related modules are hidden/disabled. _SCRAPER_TO_MODULES = { 'instagram': ['instagram', 'instagram_client'], 'snapchat': ['snapchat', 'snapchat_client'], 'fastdl': ['fastdl'], 'imginn': ['imginn'], 'toolzu': ['toolzu'], 'tiktok': ['tiktok'], 'coppermine': ['coppermine'], } def _is_scraper_module_enabled(scraper_id: str, config: dict) -> bool: """Check if a scraper's module is actually enabled in config. A scraper is disabled if ALL its related modules are either: - in hidden_modules, OR - have enabled=false in config """ hidden_modules = config.get('hidden_modules', []) if scraper_id.startswith('forum_'): related = ['forums'] else: related = _SCRAPER_TO_MODULES.get(scraper_id, []) if not related: return True for mod in related: if mod in hidden_modules: continue if config.get(mod, {}).get('enabled', False): return True return False def _is_monitoring_enabled(app_state, platform_id: str) -> bool: """Check if monitoring is enabled for a specific platform.""" try: val = app_state.settings.get(f"cookie_monitoring:{platform_id}") if val is not None: return str(val).lower() not in ('false', '0', 'no') except Exception: pass return True # Default: enabled def _is_global_monitoring_enabled(app_state) -> bool: """Check if global cookie monitoring is enabled.""" try: val = app_state.settings.get("cookie_monitoring:global") if val is not None: return str(val).lower() not in ('false', '0', 'no') except Exception: pass return True # Default: enabled @router.get("/health/cookies") @limiter.limit("30/minute") @handle_exceptions async def get_cookie_health(request: Request, current_user: Dict = Depends(get_current_user)): """Get cookie/session health status across all services.""" global _last_cookie_health_check app_state = get_app_state() db = app_state.db config = app_state.config or {} services = [] # If global monitoring is disabled, return empty if not _is_global_monitoring_enabled(app_state): return { 'services': [], 'has_issues': False, 'issue_count': 0, 'checked_at': datetime.now().isoformat(), } # Track which scraper IDs have cookie entries (for deduplication) scraper_cookie_ids = set() # 1. Scrapers with cookies or FlareSolverr requirement try: scrapers = db.get_all_scrapers() for scraper in scrapers: if not scraper.get('enabled'): continue # Skip if per-platform monitoring is disabled if not _is_monitoring_enabled(app_state, scraper['id']): continue # Instagram scrapers are excluded from automatic health testing, # but still shown if the scheduler has flagged them as failed is_instagram = scraper['id'] in _INSTAGRAM_EXCLUDED_IDS if is_instagram and scraper.get('last_test_status') != 'failed': continue if not is_instagram and not _is_scraper_module_enabled(scraper['id'], config): continue if scraper.get('cookies_json') or scraper.get('flaresolverr_required') or is_instagram: scraper_cookie_ids.add(scraper['id']) services.append({ 'id': f"scraper:{scraper['id']}", 'name': scraper.get('name', scraper['id']), 'type': 'scraper', 'status': _determine_scraper_cookie_status(scraper), 'last_updated': scraper.get('cookies_updated_at'), 'last_checked': scraper.get('last_test_at'), 'message': scraper.get('last_test_message') or '', }) except Exception as e: logger.warning(f"Failed to check scraper cookie health: {e}", module="Health") # 2. Paid content services with session cookies try: from modules.paid_content import PaidContentDBAdapter paid_db = PaidContentDBAdapter(db) paid_services = paid_db.get_services() for svc in paid_services: if not svc.get('enabled'): continue svc_id = svc['id'] # Skip if per-platform monitoring is disabled if not _is_monitoring_enabled(app_state, svc_id): continue # Skip Instagram — uses Playwright/API auth, not persistent cookies if svc_id in _INSTAGRAM_EXCLUDED_IDS: continue # Skip services that share cookies with a scraper and have no own session_cookie. # e.g. paid:snapchat uses scraper:snapchat cookies — don't show duplicates. if svc_id in _SHARED_COOKIE_SERVICES and not svc.get('session_cookie') and svc_id in scraper_cookie_ids: continue if svc.get('session_cookie') or svc_id in ('onlyfans_direct', 'fansly_direct'): services.append({ 'id': f"paid:{svc_id}", 'name': svc.get('name', svc_id), 'type': 'paid_content', 'status': _determine_paid_cookie_status(svc), 'last_updated': svc.get('session_updated_at'), 'last_checked': svc.get('last_health_check'), 'message': '', }) except Exception as e: logger.warning(f"Failed to check paid content cookie health: {e}", module="Health") # 3. Reddit community monitor (private gallery) - cookies stored encrypted if _is_monitoring_enabled(app_state, 'reddit'): try: from modules.reddit_community_monitor import RedditCommunityMonitor, REDDIT_MONITOR_KEY_FILE from modules.private_gallery_crypto import get_private_gallery_crypto, load_key_from_file db_path = str(Path(__file__).parent.parent.parent.parent / 'database' / 'media_downloader.db') reddit_monitor = RedditCommunityMonitor(db_path) reddit_settings = reddit_monitor.get_settings() if reddit_settings.get('enabled'): crypto = get_private_gallery_crypto() # If gallery is locked, try loading crypto from key file active_crypto = crypto if crypto.is_initialized() else load_key_from_file(REDDIT_MONITOR_KEY_FILE) if active_crypto and active_crypto.is_initialized(): has_cookies = reddit_monitor.has_cookies(active_crypto) reddit_status = 'healthy' if has_cookies else 'missing' reddit_message = '' if has_cookies else 'No cookies configured' else: reddit_status = 'unknown' reddit_message = 'Gallery locked — cannot verify cookies' services.append({ 'id': 'reddit_monitor', 'name': 'Reddit (Private Gallery)', 'type': 'private_gallery', 'status': reddit_status, 'last_updated': None, 'last_checked': None, 'message': reddit_message, }) except Exception as e: logger.debug(f"Failed to check Reddit cookie health: {e}", module="Health") # Trigger background health check if stale now = time.time() if (now - _last_cookie_health_check) > COOKIE_HEALTH_CHECK_INTERVAL and not _cookie_health_check_running: asyncio.create_task(_run_cookie_health_checks()) issue_statuses = ('expired', 'down', 'failed', 'degraded') has_issues = any(s['status'] in issue_statuses for s in services) issue_count = sum(1 for s in services if s['status'] in issue_statuses) return { 'services': services, 'has_issues': has_issues, 'issue_count': issue_count, 'checked_at': datetime.now().isoformat(), } async def _run_cookie_health_checks(): """Background task to run lightweight cookie health checks.""" global _last_cookie_health_check, _cookie_health_check_running if _cookie_health_check_running: return _cookie_health_check_running = True try: app_state = get_app_state() db = app_state.db # If global monitoring is disabled, skip all checks if not _is_global_monitoring_enabled(app_state): _last_cookie_health_check = time.time() logger.debug("Cookie health checks skipped (global monitoring disabled)", module="Health") return # Check scrapers with cookies via lightweight HTTP try: scrapers = db.get_all_scrapers() for scraper in scrapers: if not scraper.get('enabled'): continue if scraper['id'] in _INSTAGRAM_EXCLUDED_IDS: continue if not _is_monitoring_enabled(app_state, scraper['id']): continue if not scraper.get('cookies_json') and not scraper.get('flaresolverr_required'): continue old_status = _determine_scraper_cookie_status(scraper) new_status = await _test_scraper_cookies(scraper, db) if new_status and new_status != old_status and new_status in ('expired', 'failed'): _broadcast_cookie_alert( app_state, f"scraper:{scraper['id']}", scraper.get('name', scraper['id']), new_status, ) except Exception as e: logger.warning(f"Scraper cookie health check error: {e}", module="Health") # Check paid content services try: from modules.paid_content import PaidContentDBAdapter paid_db = PaidContentDBAdapter(db) paid_services = paid_db.get_services() for svc in paid_services: if not svc.get('enabled'): continue # Skip Instagram — uses Playwright/API auth, not persistent cookies if svc['id'] in _INSTAGRAM_EXCLUDED_IDS: continue if not _is_monitoring_enabled(app_state, svc['id']): continue if not svc.get('session_cookie') and svc['id'] not in ('onlyfans_direct', 'fansly_direct'): continue old_status = _determine_paid_cookie_status(svc) # Use the paid_content router's health check try: from web.backend.routers.paid_content import _check_single_service_health health = await asyncio.wait_for( _check_single_service_health(svc, app_state), timeout=30.0 ) new_health = health.get('status', 'unknown') paid_db.update_service(svc['id'], { 'health_status': new_health, 'last_health_check': datetime.now().isoformat(), }) new_status = _determine_paid_cookie_status({**svc, 'health_status': new_health}) if new_status != old_status and new_status in ('expired', 'degraded'): _broadcast_cookie_alert( app_state, f"paid:{svc['id']}", svc.get('name', svc['id']), new_status, ) except Exception as e: logger.debug(f"Paid content health check for {svc['id']}: {e}", module="Health") except Exception as e: logger.warning(f"Paid content cookie health check error: {e}", module="Health") # Check Reddit community monitor cookies if _is_monitoring_enabled(app_state, 'reddit'): try: from modules.reddit_community_monitor import RedditCommunityMonitor from modules.private_gallery_crypto import get_private_gallery_crypto db_path = str(Path(__file__).parent.parent.parent.parent / 'database' / 'media_downloader.db') reddit_monitor = RedditCommunityMonitor(db_path) reddit_settings = reddit_monitor.get_settings() if reddit_settings.get('enabled'): crypto = get_private_gallery_crypto() if crypto.is_initialized() and reddit_monitor.has_cookies(crypto): # Test Reddit cookies with a lightweight request reddit_status = await _test_reddit_cookies(reddit_monitor, crypto) if reddit_status == 'expired': _broadcast_cookie_alert(app_state, 'reddit_monitor', 'Reddit (Private Gallery)', 'expired') except Exception as e: logger.debug(f"Reddit cookie health check error: {e}", module="Health") _last_cookie_health_check = time.time() logger.info("Cookie health checks completed", module="Health") except Exception as e: logger.error(f"Cookie health check error: {e}", module="Health") finally: _cookie_health_check_running = False async def _test_scraper_cookies(scraper: Dict, db) -> Optional[str]: """Lightweight HTTP test for a scraper's cookie validity.""" import httpx scraper_id = scraper['id'] try: cookies_json = scraper.get('cookies_json', '') if not cookies_json: return 'missing' import json cookie_data = json.loads(cookies_json) cookie_list = cookie_data.get('cookies', cookie_data) if isinstance(cookie_data, dict) else cookie_data if not isinstance(cookie_list, list): return 'missing' test_urls = { 'instagram': 'https://www.instagram.com/api/v1/accounts/current_user/', 'tiktok': 'https://www.tiktok.com/passport/web/account/info/', 'snapchat': 'https://www.snapchat.com/', 'pornhub': 'https://www.pornhub.com/', } url = test_urls.get(scraper_id) if not url: return None cookie_dict = {} for c in cookie_list: name = c.get('name', '') value = c.get('value', '') if name and value: cookie_dict[name] = value headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'} async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, headers=headers) as client: resp = await client.get(url, cookies=cookie_dict) if resp.status_code in (401, 403): db.update_scraper_test_status(scraper_id, 'failed', f'HTTP {resp.status_code}') return 'expired' # TikTok passport endpoint: {"message":"success","data":{...}} # when logged in, {"message":"error"} when not. if scraper_id == 'tiktok': try: data = resp.json() msg = data.get('message', '') if msg == 'success': db.update_scraper_test_status(scraper_id, 'success', '') # Save refreshed cookies from Set-Cookie headers _save_tiktok_refreshed_cookies(resp, cookie_list, db) return 'healthy' else: db.update_scraper_test_status(scraper_id, 'failed', f'Session invalid ({msg})') return 'expired' except (json.JSONDecodeError, ValueError): # Empty or unparseable body — inconclusive, don't mark expired return None if resp.status_code == 200: db.update_scraper_test_status(scraper_id, 'success', '') return 'healthy' else: return None except Exception as e: logger.debug(f"Scraper cookie test for {scraper_id}: {e}", module="Health") return None def _save_tiktok_refreshed_cookies(resp, cookie_list: list, db): """Save refreshed cookies from TikTok Set-Cookie headers back to DB.""" import json try: set_cookies = resp.headers.get_list('set-cookie') if hasattr(resp.headers, 'get_list') else [] if not set_cookies: raw = resp.headers.get('set-cookie', '') if raw: set_cookies = [raw] if not set_cookies: return # Parse Set-Cookie headers for name=value pairs updated = {} for sc in set_cookies: parts = sc.split(';') if not parts: continue nv = parts[0].strip() if '=' not in nv: continue name, value = nv.split('=', 1) name = name.strip() value = value.strip() if not name: continue # Extract domain from cookie attributes domain = '.tiktok.com' for part in parts[1:]: part = part.strip().lower() if part.startswith('domain='): domain = part.split('=', 1)[1].strip() if not domain.startswith('.'): domain = '.' + domain break updated[(name, domain)] = value if not updated: return # Merge into existing cookie list cookie_map = {(c.get('name'), c.get('domain')): c for c in cookie_list} changed = False for (name, domain), value in updated.items(): key = (name, domain) if key in cookie_map: if cookie_map[key].get('value') != value: cookie_map[key] = {**cookie_map[key], 'value': value} changed = True # Don't add new cookies we didn't have before if changed: final_cookies = list(cookie_map.values()) db.save_scraper_cookies('tiktok', final_cookies, merge=False) logger.debug(f"Saved {len(updated)} refreshed TikTok cookies from health check", module="Health") except Exception as e: logger.debug(f"Failed to save refreshed TikTok cookies: {e}", module="Health") async def _test_reddit_cookies(reddit_monitor, crypto) -> Optional[str]: """Test Reddit cookies by making a lightweight API request.""" import httpx try: cookies_json = reddit_monitor._get_cookies_json(crypto) if not cookies_json: return 'missing' import json cookie_list = json.loads(cookies_json) cookie_dict = {} if isinstance(cookie_list, list): for c in cookie_list: name = c.get('name', '') value = c.get('value', '') if name and value: cookie_dict[name] = value elif isinstance(cookie_list, dict): cookie_dict = cookie_list if not cookie_dict: return 'missing' async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: resp = await client.get( 'https://www.reddit.com/api/me.json', cookies=cookie_dict, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} ) if resp.status_code in (401, 403): return 'expired' elif resp.status_code == 200: return 'healthy' else: return None except Exception as e: logger.debug(f"Reddit cookie test error: {e}", module="Health") return None def _broadcast_cookie_alert(app_state, service_id: str, service_name: str, status: str): """Broadcast a cookie health alert via WebSocket and send Pushover notification.""" # Check if global monitoring is disabled if not _is_global_monitoring_enabled(app_state): return # Extract platform_id from service_id (e.g. "scraper:tiktok" -> "tiktok", "paid:onlyfans_direct" -> "onlyfans_direct") platform_id = service_id.split(':', 1)[-1] if ':' in service_id else service_id if not _is_monitoring_enabled(app_state, platform_id): return status_messages = { 'expired': f'{service_name} session has expired', 'failed': f'{service_name} cookie test failed', 'degraded': f'{service_name} service is degraded', 'down': f'{service_name} service is down', } message = status_messages.get(status, f'{service_name} has cookie issues') # WebSocket broadcast manager = getattr(app_state, 'websocket_manager', None) if manager: try: manager.broadcast_sync({ 'type': 'cookie_health_alert', 'data': { 'service_id': service_id, 'service_name': service_name, 'status': status, 'message': message, 'timestamp': datetime.now().isoformat(), } }) except Exception as e: logger.debug(f"Failed to broadcast cookie alert: {e}", module="Health") # Pushover push notification try: from modules.pushover_notifier import create_notifier_from_config config = getattr(app_state, 'config', None) if config: notifier = create_notifier_from_config(config, unified_db=getattr(app_state, 'db', None)) if notifier: status_emoji = {'expired': '🔴', 'failed': '🔴', 'degraded': '🟡', 'down': '🔴'}.get(status, '⚠️') svc_type = 'Paid Content' if service_id.startswith('paid:') else 'Scraper' if service_id.startswith('scraper:') else 'Service' notifier.send_notification( title=f"🔑 Cookie Alert: {service_name}", message=f"{status_emoji} Status: {status.replace('_', ' ').title()}\n" f"🔧 Service: {service_name} ({svc_type})\n" f"📝 Action: Please re-login and update cookies\n" f"\n⏰ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", priority=1, # High priority sound="siren", html=True, ) except Exception as e: logger.debug(f"Failed to send cookie alert push notification: {e}", module="Health")