Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions
--- a/modules/activity_status.py
+++ b/modules/activity_status.py
@@ -0,0 +1,491 @@
+#!/usr/bin/env python3
+"""
+Activity Status Manager
+Centralized module for tracking and updating real-time download activity status
+Stores status in database for reliable, concurrent access
+
+Supports:
+- Single main activity (scheduler) via activity_status table
+- Multiple background tasks (YouTube monitor, etc.) via background_task_status table
+"""
+
+import json
+from datetime import datetime
+from typing import Optional, Dict, Any, List
+from pathlib import Path
+from modules.universal_logger import get_logger
+
+logger = get_logger('ActivityStatus')
+
+
+class ActivityStatusManager:
+    """Manages real-time activity status updates stored in database"""
+
+    def __init__(self, unified_db=None):
+        """
+        Initialize activity status manager
+
+        Args:
+            unified_db: UnifiedDatabase instance (optional, will create if needed)
+        """
+        self.db = unified_db
+        if not self.db:
+            from modules.unified_database import UnifiedDatabase
+            self.db = UnifiedDatabase()
+
+        self._ensure_table()
+
+    def _ensure_table(self):
+        """Ensure activity_status and background_task_status tables exist"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Main scheduler activity table (single row)
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS activity_status (
+                        id INTEGER PRIMARY KEY CHECK (id = 1),
+                        active INTEGER NOT NULL DEFAULT 0,
+                        task_id TEXT,
+                        platform TEXT,
+                        account TEXT,
+                        start_time TEXT,
+                        status TEXT,
+                        detailed_status TEXT,
+                        progress_current INTEGER,
+                        progress_total INTEGER,
+                        updated_at TEXT DEFAULT CURRENT_TIMESTAMP
+                    )
+                ''')
+
+                # Add account progress columns if missing
+                cursor.execute("PRAGMA table_info(activity_status)")
+                columns = [col[1] for col in cursor.fetchall()]
+                if 'account_current' not in columns:
+                    cursor.execute('ALTER TABLE activity_status ADD COLUMN account_current INTEGER')
+                if 'account_total' not in columns:
+                    cursor.execute('ALTER TABLE activity_status ADD COLUMN account_total INTEGER')
+
+                # Insert default row if doesn't exist
+                cursor.execute('''
+                    INSERT OR IGNORE INTO activity_status (id, active)
+                    VALUES (1, 0)
+                ''')
+
+                # Background tasks table (multiple concurrent tasks like YouTube monitor)
+                cursor.execute('''
+                    CREATE TABLE IF NOT EXISTS background_task_status (
+                        task_id TEXT PRIMARY KEY,
+                        active INTEGER NOT NULL DEFAULT 0,
+                        task_type TEXT,
+                        display_name TEXT,
+                        start_time TEXT,
+                        status TEXT,
+                        detailed_status TEXT,
+                        progress_current INTEGER,
+                        progress_total INTEGER,
+                        extra_data TEXT,
+                        updated_at TEXT DEFAULT CURRENT_TIMESTAMP
+                    )
+                ''')
+
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to create activity tables: {e}")
+
+    def start_activity(self, task_id: str, platform: str, account: str, status: str = "Running"):
+        """
+        Mark activity as started
+
+        Args:
+            task_id: Unique task identifier
+            platform: Platform name (instagram, snapchat, etc)
+            account: Account/username being processed
+            status: Initial status message
+        """
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE activity_status
+                    SET active = 1,
+                        task_id = ?,
+                        platform = ?,
+                        account = ?,
+                        start_time = ?,
+                        status = ?,
+                        detailed_status = NULL,
+                        progress_current = NULL,
+                        progress_total = NULL,
+                        account_current = NULL,
+                        account_total = NULL,
+                        updated_at = ?
+                    WHERE id = 1
+                ''', (task_id, platform, account, datetime.now().isoformat(),
+                      status, datetime.now().isoformat()))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to start activity: {e}")
+
+    def update_status(self, detailed_status: str, progress_current: Optional[int] = None,
+                     progress_total: Optional[int] = None):
+        """Update detailed status message and progress."""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE activity_status
+                    SET detailed_status = ?,
+                        progress_current = COALESCE(?, progress_current),
+                        progress_total = COALESCE(?, progress_total),
+                        updated_at = ?
+                    WHERE id = 1 AND active = 1
+                ''', (detailed_status, progress_current, progress_total,
+                      datetime.now().isoformat()))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to update status: {e}")
+
+    def update_account_name(self, account: str):
+        """Update the current account name being processed."""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE activity_status
+                    SET account = ?,
+                        updated_at = ?
+                    WHERE id = 1 AND active = 1
+                ''', (account, datetime.now().isoformat()))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to update account name: {e}")
+
+    def update_account_progress(self, account_current: int, account_total: int):
+        """Update account-level progress and reset file-level progress for the new account"""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE activity_status
+                    SET account_current = ?,
+                        account_total = ?,
+                        progress_current = NULL,
+                        progress_total = NULL,
+                        updated_at = ?
+                    WHERE id = 1 AND active = 1
+                ''', (account_current, account_total, datetime.now().isoformat()))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to update account progress: {e}")
+
+    def stop_activity(self):
+        """Mark activity as stopped"""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE activity_status
+                    SET active = 0,
+                        detailed_status = NULL,
+                        progress_current = NULL,
+                        progress_total = NULL,
+                        account_current = NULL,
+                        account_total = NULL,
+                        updated_at = ?
+                    WHERE id = 1
+                ''', (datetime.now().isoformat(),))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to stop activity: {e}")
+
+    def get_current_activity(self) -> Dict[str, Any]:
+        """
+        Get current activity status
+
+        Returns:
+            Dict with activity information
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT active, task_id, platform, account, start_time, status,
+                           detailed_status, progress_current, progress_total,
+                           account_current, account_total
+                    FROM activity_status
+                    WHERE id = 1
+                ''')
+                row = cursor.fetchone()
+
+                if row:
+                    result = {
+                        'active': bool(row[0]),
+                        'task_id': row[1],
+                        'platform': row[2],
+                        'account': row[3],
+                        'start_time': row[4],
+                        'status': row[5]
+                    }
+
+                    # Add optional fields only if they exist
+                    if row[6]:  # detailed_status
+                        result['detailed_status'] = row[6]
+                    if row[7] is not None and row[8] is not None:  # progress
+                        result['progress'] = {
+                            'current': row[7],
+                            'total': row[8]
+                        }
+                    if row[9] is not None and row[10] is not None:  # account_progress
+                        result['account_progress'] = {
+                            'current': row[9],
+                            'total': row[10]
+                        }
+
+                    return result
+
+                return {
+                    'active': False,
+                    'task_id': None,
+                    'platform': None,
+                    'account': None,
+                    'start_time': None,
+                    'status': None
+                }
+        except Exception as e:
+            logger.error(f"Failed to get current activity: {e}")
+            return {
+                'active': False,
+                'task_id': None,
+                'platform': None,
+                'account': None,
+                'start_time': None,
+                'status': None
+            }
+
+    # =========================================================================
+    # BACKGROUND TASK METHODS (for concurrent tasks like YouTube monitor)
+    # =========================================================================
+
+    def start_background_task(self, task_id: str, task_type: str, display_name: str,
+                               status: str = "Running", extra_data: Dict = None):
+        """
+        Start a background task (doesn't interfere with main activity).
+
+        Args:
+            task_id: Unique task identifier (e.g., 'youtube_monitor')
+            task_type: Type of task (e.g., 'youtube_monitor', 'video_processor')
+            display_name: Human-readable name for display
+            status: Initial status message
+            extra_data: Optional extra data to store as JSON
+        """
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                # Check if task is already running - don't reset if so
+                cursor.execute('''
+                    SELECT active FROM background_task_status WHERE task_id = ?
+                ''', (task_id,))
+                row = cursor.fetchone()
+                if row and row[0] == 1:
+                    # Task already running, just update status without resetting counter
+                    logger.debug(f"Background task {task_id} already running, not resetting")
+                    return
+
+                cursor.execute('''
+                    INSERT OR REPLACE INTO background_task_status
+                    (task_id, active, task_type, display_name, start_time, status,
+                     detailed_status, progress_current, progress_total, extra_data, updated_at)
+                    VALUES (?, 1, ?, ?, ?, ?, NULL, NULL, NULL, ?, ?)
+                ''', (task_id, task_type, display_name, datetime.now().isoformat(),
+                      status, json.dumps(extra_data) if extra_data else None,
+                      datetime.now().isoformat()))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to start background task {task_id}: {e}")
+
+    def update_background_task(self, task_id: str, detailed_status: str,
+                                progress_current: Optional[int] = None,
+                                progress_total: Optional[int] = None,
+                                extra_data: Dict = None):
+        """Update a background task's status."""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                if extra_data is not None:
+                    cursor.execute('''
+                        UPDATE background_task_status
+                        SET detailed_status = ?,
+                            progress_current = ?,
+                            progress_total = ?,
+                            extra_data = ?,
+                            updated_at = ?
+                        WHERE task_id = ? AND active = 1
+                    ''', (detailed_status, progress_current, progress_total,
+                          json.dumps(extra_data), datetime.now().isoformat(), task_id))
+                else:
+                    cursor.execute('''
+                        UPDATE background_task_status
+                        SET detailed_status = ?,
+                            progress_current = ?,
+                            progress_total = ?,
+                            updated_at = ?
+                        WHERE task_id = ? AND active = 1
+                    ''', (detailed_status, progress_current, progress_total,
+                          datetime.now().isoformat(), task_id))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to update background task {task_id}: {e}")
+
+    def stop_background_task(self, task_id: str):
+        """Mark a background task as stopped."""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE background_task_status
+                    SET active = 0,
+                        updated_at = ?
+                    WHERE task_id = ?
+                ''', (datetime.now().isoformat(), task_id))
+                conn.commit()
+        except Exception as e:
+            logger.error(f"Failed to stop background task {task_id}: {e}")
+
+    def stop_all_background_tasks(self):
+        """Mark all background tasks as stopped (used on scheduler startup to clear stale state)."""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE background_task_status
+                    SET active = 0,
+                        updated_at = ?
+                    WHERE active = 1
+                ''', (datetime.now().isoformat(),))
+                count = cursor.rowcount
+                conn.commit()
+                if count > 0:
+                    logger.info(f"Cleared {count} stale background task(s) from previous run")
+        except Exception as e:
+            logger.error(f"Failed to stop all background tasks: {e}")
+
+    def get_background_task(self, task_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a specific background task's status.
+
+        Args:
+            task_id: Task identifier
+
+        Returns:
+            Dict with task information or None
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT task_id, active, task_type, display_name, start_time,
+                           status, detailed_status, progress_current, progress_total,
+                           extra_data, updated_at
+                    FROM background_task_status
+                    WHERE task_id = ?
+                ''', (task_id,))
+                row = cursor.fetchone()
+
+                if row:
+                    result = {
+                        'task_id': row[0],
+                        'active': bool(row[1]),
+                        'task_type': row[2],
+                        'display_name': row[3],
+                        'start_time': row[4],
+                        'status': row[5],
+                        'updated_at': row[10]
+                    }
+
+                    if row[6]:  # detailed_status
+                        result['detailed_status'] = row[6]
+                    if row[7] is not None and row[8] is not None:  # progress
+                        result['progress'] = {
+                            'current': row[7],
+                            'total': row[8]
+                        }
+                    if row[9]:  # extra_data
+                        try:
+                            result['extra_data'] = json.loads(row[9])
+                        except (json.JSONDecodeError, TypeError, ValueError) as e:
+                            logger.debug(f"Failed to parse extra_data for task {task_id}: {e}")
+                            result['extra_data'] = {}
+
+                    return result
+                return None
+        except Exception as e:
+            logger.error(f"Failed to get background task {task_id}: {e}")
+            return None
+
+    def get_active_background_tasks(self) -> List[Dict[str, Any]]:
+        """
+        Get all active background tasks.
+
+        Returns:
+            List of active task dictionaries
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT task_id, active, task_type, display_name, start_time,
+                           status, detailed_status, progress_current, progress_total,
+                           extra_data, updated_at
+                    FROM background_task_status
+                    WHERE active = 1
+                    ORDER BY start_time DESC
+                ''')
+
+                tasks = []
+                for row in cursor.fetchall():
+                    task = {
+                        'task_id': row[0],
+                        'active': bool(row[1]),
+                        'task_type': row[2],
+                        'display_name': row[3],
+                        'start_time': row[4],
+                        'status': row[5],
+                        'updated_at': row[10]
+                    }
+
+                    if row[6]:  # detailed_status
+                        task['detailed_status'] = row[6]
+                    if row[7] is not None and row[8] is not None:  # progress
+                        task['progress'] = {
+                            'current': row[7],
+                            'total': row[8]
+                        }
+                    if row[9]:  # extra_data
+                        try:
+                            task['extra_data'] = json.loads(row[9])
+                        except (json.JSONDecodeError, TypeError, ValueError):
+                            task['extra_data'] = {}
+
+                    tasks.append(task)
+
+                return tasks
+        except Exception as e:
+            logger.error(f"Failed to get active background tasks: {e}")
+            return []
+
+
+# Global instance with thread-safe initialization
+_activity_manager = None
+_activity_manager_lock = __import__('threading').Lock()
+
+
+def get_activity_manager(unified_db=None):
+    """Get or create global activity manager instance (thread-safe)"""
+    global _activity_manager
+    if _activity_manager is None:
+        with _activity_manager_lock:
+            # Double-check inside lock to prevent race condition
+            if _activity_manager is None:
+                _activity_manager = ActivityStatusManager(unified_db)
+    return _activity_manager
--- a/modules/base_module.py
+++ b/modules/base_module.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python3
+"""
+Base Module - Shared functionality for all media downloader modules
+
+Provides:
+- LoggingMixin: Consistent logging with universal logger and backwards-compatible callback support
+- CookieManagerMixin: Centralized cookie loading/saving for scrapers
+- RateLimitMixin: Smart delay handling for rate limiting
+- DeferredDownloadsMixin: Track downloads for batch database recording
+"""
+
+import random
+import time
+from typing import Any, Dict, List, Optional
+
+from modules.universal_logger import get_logger
+
+
+class LoggingMixin:
+    """
+    Mixin providing consistent logging across all modules.
+
+    Uses the universal logger for all logging, with optional callback support
+    for backwards compatibility with existing code.
+
+    Usage:
+        class MyModule(LoggingMixin):
+            def __init__(self, log_callback=None):
+                self._init_logger('MyModule', log_callback)
+                # ... rest of init
+
+            def do_something(self):
+                self.log("Starting operation", "info")
+                # ...
+                self.log("Operation complete", "success")
+    """
+
+    _logger_name: str = 'Unknown'
+    _default_module: str = 'Core'
+    logger = None
+    log_callback = None
+    show_debug: bool = True
+
+    def _init_logger(self, logger_name: str, log_callback=None, default_module: str = 'Core', show_debug: bool = True):
+        """
+        Initialize logging for this module.
+
+        Args:
+            logger_name: Name for the logger (e.g., 'Instagram', 'TikTok', 'Forum')
+            log_callback: Optional callback function for backwards compatibility
+            default_module: Default module name for log messages (default: 'Core')
+            show_debug: Whether to show debug messages (default: True)
+        """
+        self._logger_name = logger_name
+        self._default_module = default_module
+        self.log_callback = log_callback
+        self.show_debug = show_debug
+        self.logger = get_logger(logger_name)
+
+    def log(self, message: str, level: str = "info", module: str = None):
+        """
+        Log a message using universal logger with optional callback.
+
+        Args:
+            message: The message to log
+            level: Log level ('debug', 'info', 'warning', 'error', 'success', 'critical')
+            module: Module name for the log entry (default: uses _default_module)
+        """
+        level_lower = level.lower()
+
+        # Skip debug messages if show_debug is False
+        if level_lower == "debug" and not self.show_debug:
+            return
+
+        # Use universal logger (always log here first)
+        actual_module = module or self._default_module
+        self.logger.log(message, level.upper(), module=actual_module)
+
+        # Call log_callback for backwards compatibility
+        if self.log_callback:
+            self.log_callback(f"[{self._logger_name}] {message}", level_lower)
+
+
+class CookieManagerMixin:
+    """
+    Mixin providing centralized cookie management for scrapers.
+
+    Handles loading and saving cookies to/from the database.
+
+    Usage:
+        class MyScraper(LoggingMixin, CookieManagerMixin):
+            def __init__(self, unified_db=None):
+                self._init_logger('MyScraper')
+                self._init_cookie_manager(unified_db, 'my_scraper')
+                self._load_cookies_from_db()
+
+            def after_auth(self, cookies):
+                self._save_cookies_to_db(cookies)
+    """
+
+    unified_db = None
+    scraper_id: str = ''
+    cf_handler = None  # CloudflareHandler if used
+    user_agent: str = ''
+
+    def _init_cookie_manager(self, unified_db, scraper_id: str, cf_handler=None, user_agent: str = ''):
+        """
+        Initialize cookie management.
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            scraper_id: ID for this scraper in database
+            cf_handler: Optional CloudflareHandler instance
+            user_agent: User agent string
+        """
+        self.unified_db = unified_db
+        self.scraper_id = scraper_id
+        self.cf_handler = cf_handler
+        self.user_agent = user_agent
+
+    def _load_cookies_from_db(self) -> Optional[List[Dict]]:
+        """
+        Load cookies from database if available.
+
+        Returns:
+            List of cookie dicts or None if not available
+        """
+        if not self.unified_db:
+            return None
+
+        try:
+            cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
+            if cookies:
+                # Load into CloudflareHandler if available
+                if self.cf_handler:
+                    self.cf_handler._cookies = cookies
+                if hasattr(self, 'log'):
+                    self.log(f"Loaded {len(cookies)} cookies from database", "debug")
+                return cookies
+        except Exception as e:
+            if hasattr(self, 'log'):
+                self.log(f"Error loading cookies from database: {e}", "warning")
+
+        return None
+
+    def _save_cookies_to_db(self, cookies: List[Dict], merge: bool = True, user_agent: str = None):
+        """
+        Save cookies to database.
+
+        Args:
+            cookies: List of cookie dicts
+            merge: Whether to merge with existing cookies
+            user_agent: User agent to associate with cookies (important for cf_clearance).
+                       If not provided, uses self.user_agent as fallback.
+        """
+        if not self.unified_db:
+            return
+
+        try:
+            # Use provided user_agent or fall back to self.user_agent
+            ua = user_agent or self.user_agent
+            self.unified_db.save_scraper_cookies(
+                self.scraper_id,
+                cookies,
+                user_agent=ua,
+                merge=merge
+            )
+            if hasattr(self, 'log'):
+                self.log(f"Saved {len(cookies)} cookies to database (UA: {ua[:50] if ua else 'None'}...)", "debug")
+        except Exception as e:
+            if hasattr(self, 'log'):
+                self.log(f"Error saving cookies to database: {e}", "warning")
+
+    def _cookies_expired(self) -> bool:
+        """
+        Check if cookies are expired.
+
+        Returns:
+            True if expired, False otherwise
+        """
+        if self.cf_handler:
+            return self.cf_handler.cookies_expired()
+        return True
+
+    def _get_cookies_for_requests(self) -> Dict[str, str]:
+        """
+        Get cookies in format for requests library.
+
+        Returns:
+            Dict of cookie name -> value
+        """
+        if self.cf_handler:
+            return self.cf_handler.get_cookies_dict()
+        return {}
+
+
+class RateLimitMixin:
+    """
+    Mixin providing smart rate limiting for scrapers.
+
+    Handles delays between requests to avoid detection and rate limiting.
+
+    Usage:
+        class MyScraper(LoggingMixin, RateLimitMixin):
+            def __init__(self):
+                self._init_logger('MyScraper')
+                self._init_rate_limiter(min_delay=5, max_delay=15, batch_delay=30)
+
+            def download_batch(self, items):
+                for i, item in enumerate(items):
+                    self.download_item(item)
+                    is_batch_end = (i + 1) % 10 == 0
+                    self._smart_delay(is_batch_end)
+    """
+
+    min_delay: float = 5.0
+    max_delay: float = 15.0
+    batch_delay_min: float = 30.0
+    batch_delay_max: float = 60.0
+    error_delay: float = 120.0
+
+    def _init_rate_limiter(
+        self,
+        min_delay: float = 5.0,
+        max_delay: float = 15.0,
+        batch_delay_min: float = 30.0,
+        batch_delay_max: float = 60.0,
+        error_delay: float = 120.0
+    ):
+        """
+        Initialize rate limiting.
+
+        Args:
+            min_delay: Minimum delay between requests (seconds)
+            max_delay: Maximum delay between requests (seconds)
+            batch_delay_min: Minimum delay between batches (seconds)
+            batch_delay_max: Maximum delay between batches (seconds)
+            error_delay: Delay after errors (seconds)
+        """
+        self.min_delay = min_delay
+        self.max_delay = max_delay
+        self.batch_delay_min = batch_delay_min
+        self.batch_delay_max = batch_delay_max
+        self.error_delay = error_delay
+
+    def _smart_delay(self, is_batch_end: bool = False, had_error: bool = False):
+        """
+        Apply smart delay between requests.
+
+        Args:
+            is_batch_end: True if this is the end of a batch
+            had_error: True if there was an error (uses longer delay)
+        """
+        if had_error:
+            delay = self.error_delay
+        elif is_batch_end:
+            delay = random.uniform(self.batch_delay_min, self.batch_delay_max)
+        else:
+            delay = random.uniform(self.min_delay, self.max_delay)
+
+        if hasattr(self, 'log'):
+            self.log(f"Waiting {delay:.1f}s before next request", "debug")
+
+        time.sleep(delay)
+
+    def _delay_after_error(self):
+        """Apply error delay."""
+        self._smart_delay(had_error=True)
+
+    def _delay_between_items(self):
+        """Apply normal delay between items."""
+        self._smart_delay(is_batch_end=False)
+
+    def _delay_between_batches(self):
+        """Apply batch delay."""
+        self._smart_delay(is_batch_end=True)
+
+
+class DeferredDownloadsMixin:
+    """
+    Mixin for tracking downloads to be recorded in batch.
+
+    Allows deferring database writes for better performance.
+
+    Usage:
+        class MyScraper(LoggingMixin, DeferredDownloadsMixin):
+            def __init__(self):
+                self._init_logger('MyScraper')
+                self._init_deferred_downloads()
+
+            def download_file(self, url, path):
+                # ... download logic ...
+                self._add_pending_download({
+                    'platform': 'my_platform',
+                    'source': 'username',
+                    'file_path': str(path),
+                    # ... other fields ...
+                })
+
+            def finish_batch(self):
+                downloads = self.get_pending_downloads()
+                self.db.record_downloads_batch(downloads)
+                self.clear_pending_downloads()
+    """
+
+    pending_downloads: List[Dict] = None
+
+    def _init_deferred_downloads(self):
+        """Initialize deferred downloads tracking."""
+        self.pending_downloads = []
+
+    def _add_pending_download(self, download_info: Dict[str, Any]):
+        """
+        Add a download to pending list.
+
+        Args:
+            download_info: Dict with download metadata
+        """
+        if self.pending_downloads is None:
+            self.pending_downloads = []
+        self.pending_downloads.append(download_info)
+
+    def get_pending_downloads(self) -> List[Dict[str, Any]]:
+        """
+        Get all pending downloads.
+
+        Returns:
+            List of pending download dicts
+        """
+        return self.pending_downloads or []
+
+    def clear_pending_downloads(self):
+        """Clear pending downloads list."""
+        self.pending_downloads = []
+
+    def has_pending_downloads(self) -> bool:
+        """Check if there are pending downloads."""
+        return bool(self.pending_downloads)
+
+
+class BaseDatabaseAdapter:
+    """
+    Base class for platform-specific database adapters.
+
+    Provides common functionality for recording and querying downloads.
+    Platform-specific adapters should inherit from this class.
+
+    Usage:
+        class MyPlatformAdapter(BaseDatabaseAdapter):
+            def __init__(self, unified_db):
+                super().__init__(unified_db, platform='my_platform')
+
+            def record_download(self, content_id, username, filename, **kwargs):
+                # Platform-specific URL construction
+                url = f"https://my_platform.com/{username}/{content_id}"
+                return self._record_download_internal(
+                    url=url,
+                    source=username,
+                    filename=filename,
+                    **kwargs
+                )
+    """
+
+    def __init__(self, unified_db, platform: str, method: str = None):
+        """
+        Initialize base adapter.
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            platform: Platform name (e.g., 'instagram', 'tiktok')
+            method: Optional method identifier for multi-method platforms
+        """
+        self.db = unified_db
+        self.unified_db = unified_db  # Alias for compatibility
+        self.platform = platform
+        self.method = method or platform
+
+    def get_connection(self, for_write: bool = False):
+        """Get database connection (delegates to UnifiedDatabase)."""
+        return self.db.get_connection(for_write)
+
+    def get_file_hash(self, file_path: str) -> Optional[str]:
+        """Calculate SHA256 hash of a file."""
+        return self.db.get_file_hash(file_path)
+
+    def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
+        """Get download record by file hash."""
+        return self.db.get_download_by_file_hash(file_hash)
+
+    def get_download_by_media_id(self, media_id: str) -> Optional[Dict]:
+        """Get download record by media_id."""
+        return self.db.get_download_by_media_id(media_id, self.platform, self.method)
+
+    def is_already_downloaded_by_hash(self, file_path: str) -> bool:
+        """Check if file is already downloaded by comparing file hash."""
+        file_hash = self.get_file_hash(file_path)
+        if not file_hash:
+            return False
+        return self.get_download_by_file_hash(file_hash) is not None
+
+    def is_already_downloaded_by_media_id(self, media_id: str) -> bool:
+        """Check if content is already downloaded by media_id."""
+        with self.db.get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT 1 FROM downloads
+                WHERE platform = ?
+                AND media_id = ?
+                LIMIT 1
+            ''', (self.platform, media_id))
+            return cursor.fetchone() is not None
+
+    def _calculate_file_hash(self, file_path: str) -> Optional[str]:
+        """Helper to safely calculate file hash."""
+        if not file_path:
+            return None
+        try:
+            from pathlib import Path
+            if Path(file_path).exists():
+                return self.get_file_hash(file_path)
+        except Exception:
+            pass
+        return None
+
+    def _detect_content_type(self, filename: str) -> str:
+        """Detect content type from filename extension."""
+        from pathlib import Path
+        ext = Path(filename).suffix.lower()
+        image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
+        return 'image' if ext in image_exts else 'video'
+
+    def _record_download_internal(
+        self,
+        url: str,
+        source: str,
+        filename: str,
+        content_type: str = None,
+        file_path: str = None,
+        post_date=None,
+        metadata: Dict = None,
+        file_hash: str = None,
+        **extra_kwargs
+    ) -> bool:
+        """
+        Internal method to record a download.
+
+        Args:
+            url: Unique URL/identifier for the content
+            source: Username or source identifier
+            filename: Downloaded filename
+            content_type: 'image' or 'video' (auto-detected if not provided)
+            file_path: Full path to downloaded file
+            post_date: Original post date
+            metadata: Additional metadata dict
+            file_hash: Pre-computed file hash (computed if not provided and file_path exists)
+            **extra_kwargs: Additional arguments passed to unified_db.record_download
+        """
+        # Auto-detect content type if not provided
+        if not content_type:
+            content_type = self._detect_content_type(filename)
+
+        # Calculate file hash if not provided
+        if not file_hash and file_path:
+            file_hash = self._calculate_file_hash(file_path)
+
+        return self.db.record_download(
+            url=url,
+            platform=self.platform,
+            source=source,
+            content_type=content_type,
+            filename=filename,
+            file_path=file_path,
+            file_hash=file_hash,
+            post_date=post_date,
+            metadata=metadata,
+            method=self.method,
+            **extra_kwargs
+        )
--- a/modules/cloudflare_handler.py
+++ b/modules/cloudflare_handler.py
--- a/modules/coppermine_module.py
+++ b/modules/coppermine_module.py
@@ -0,0 +1,873 @@
+#!/usr/bin/env python3
+"""
+Coppermine Photo Gallery Downloader Module
+Downloads full-resolution images from Coppermine-based galleries
+"""
+
+import os
+import re
+import time
+import hashlib
+import requests
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Set
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlparse, parse_qs
+from modules.base_module import LoggingMixin
+from modules.cloudflare_handler import CloudflareHandler, SiteStatus, get_flaresolverr_user_agent
+
+
+class CoppermineDownloader(LoggingMixin):
+    """
+    Coppermine Photo Gallery downloader
+
+    Example usage:
+        from coppermine_module import CoppermineDownloader
+
+        downloader = CoppermineDownloader()
+        count = downloader.download(
+            gallery_url="https://hqdiesel.net/thumbnails.php?album=lastup&cat=123",
+            output_dir="downloads/coppermine",
+            days_back=7
+        )
+        print(f"Downloaded {count} items")
+    """
+
+    def __init__(self, show_progress=True, use_database=True,
+                 log_callback=None, unified_db=None, config=None):
+        """
+        Initialize the downloader
+
+        Args:
+            show_progress: Print progress messages
+            use_database: Use database to track downloads
+            log_callback: Optional callback function for logging
+            unified_db: Optional UnifiedDatabase instance
+            config: Optional config dict with flaresolverr settings
+        """
+        # Initialize logging via mixin
+        self._init_logger('Coppermine', log_callback, default_module='Download')
+
+        self.show_progress = show_progress
+        self.use_database = use_database
+        self.downloaded_files = set()
+        self.download_count = 0
+        self.unified_db = unified_db  # Store for scraper config access
+        self.scraper_id = 'coppermine'  # Scraper ID in database
+
+        # Use unified database if provided
+        if unified_db and use_database:
+            from modules.unified_database import CoppermineDatabaseAdapter
+            self.db = CoppermineDatabaseAdapter(unified_db)
+        else:
+            self.db = None
+            self.use_database = False
+
+        # Initialize activity status manager for real-time updates
+        from modules.activity_status import get_activity_manager
+        self.activity_manager = get_activity_manager(unified_db)
+
+        # Rate limiting
+        self.min_delay = 1
+        self.max_delay = 3
+
+        self.pending_downloads = []  # Track downloads for deferred database recording
+
+        # Load scraper configuration from database if available
+        self.proxy_url = None
+        self.cookie_file = None  # Default to None (use database)
+
+        if unified_db:
+            scraper_config = unified_db.get_scraper(self.scraper_id)
+            if scraper_config:
+                # Get proxy configuration
+                if scraper_config.get('proxy_enabled') and scraper_config.get('proxy_url'):
+                    self.proxy_url = scraper_config['proxy_url']
+                    self.log(f"Using proxy: {self.proxy_url}", "info")
+
+        # Fall back to config file for cookie_file if database not available
+        if not unified_db and config:
+            self.cookie_file = config.get('cookie_file', '/opt/media-downloader/cookies/coppermine_cookies.json')
+
+        # Session with proper headers
+        self.session = requests.Session()
+        self.user_agent = get_flaresolverr_user_agent()
+        self.session.headers.update({
+            'User-Agent': self.user_agent,
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1'
+        })
+
+        # Configure session proxy if available
+        if self.proxy_url:
+            self.session.proxies = {
+                'http': self.proxy_url,
+                'https': self.proxy_url
+            }
+
+        # Initialize universal Cloudflare handler with conservative expiry
+        # Pass proxy_url if configured, and cookie_file=None for database storage
+        self.cf_handler = CloudflareHandler(
+            module_name="Coppermine",
+            cookie_file=self.cookie_file,  # None when using database
+            user_agent=self.user_agent,
+            logger=self.logger,
+            aggressive_expiry=False,  # Conservative mode for Coppermine
+            proxy_url=self.proxy_url  # Pass proxy to FlareSolverr
+        )
+
+        # Keep for backwards compatibility
+        self.flaresolverr_url = self.cf_handler.flaresolverr_url
+        self.flaresolverr_enabled = self.cf_handler.flaresolverr_enabled
+
+        # Load cookies from file if exists
+        self._load_cookies()
+
+    def _record_download(self, url: str, platform: str, source: str, content_type: str,
+                        filename: str, file_path: str, file_size: int, file_hash: str,
+                        post_date=None, metadata: dict = None, deferred: bool = False):
+        """Record a download in the database
+
+        Args:
+            deferred: If True, don't record to database now - add to pending_downloads list
+                     for later recording after file move is complete
+        """
+        # If deferred, store for later recording instead of recording now
+        if deferred:
+            self.pending_downloads.append({
+                'url': url,
+                'platform': platform,
+                'source': source,
+                'content_type': content_type,
+                'filename': filename,
+                'file_path': file_path,
+                'file_size': file_size,
+                'file_hash': file_hash,
+                'post_date': post_date.isoformat() if hasattr(post_date, 'isoformat') else post_date,
+                'metadata': metadata
+            })
+            self.log(f"Deferred recording for {filename}", "debug")
+            return True
+
+        if not self.use_database or not self.db:
+            return
+
+        try:
+            self.db.add_download(
+                url=url,
+                platform=platform,
+                source=source,
+                content_type=content_type,
+                filename=filename,
+                file_path=file_path,
+                file_size=file_size,
+                file_hash=file_hash,
+                post_date=post_date,
+                metadata=metadata
+            )
+        except Exception as e:
+            self.log(f"Failed to record download: {e}", "debug")
+
+    def get_pending_downloads(self):
+        """Get list of downloads that were deferred for later recording"""
+        return self.pending_downloads.copy()
+
+    def clear_pending_downloads(self):
+        """Clear the pending downloads list after they've been recorded"""
+        self.pending_downloads = []
+
+    def _load_cookies(self):
+        """Load cookies from database or file"""
+        # Try database first if available
+        if self.unified_db:
+            try:
+                cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
+                if cookies:
+                    cf_clearance_found = False
+                    for cookie in cookies:
+                        try:
+                            self.session.cookies.set(
+                                cookie['name'],
+                                cookie['value'],
+                                domain=cookie.get('domain', ''),
+                                path=cookie.get('path', '/')
+                            )
+                            if cookie['name'] == 'cf_clearance':
+                                cf_clearance_found = True
+                        except Exception as e:
+                            self.log(f"Error setting cookie {cookie.get('name')}: {e}", "warning")
+
+                    if cf_clearance_found:
+                        self.log(f"✓ Loaded {len(cookies)} cookies including cf_clearance from database", "info")
+                    else:
+                        self.log(f"⚠ Loaded {len(cookies)} cookies from database but cf_clearance NOT found", "warning")
+
+                    # Also load cookies into CloudflareHandler for consistency
+                    self.cf_handler._cookies = cookies
+                    return
+                else:
+                    self.log("No cookies found in database", "debug")
+            except Exception as e:
+                self.log(f"Error loading cookies from database: {e}", "warning")
+
+        # Fall back to cookie file if no database
+        if not self.cookie_file:
+            self.log("No cookie file configured", "debug")
+            return
+
+        cookie_path = Path(self.cookie_file)
+        if not cookie_path.exists():
+            self.log(f"Cookie file does not exist: {self.cookie_file}", "info")
+            return
+
+        try:
+            import json
+            with open(cookie_path, 'r') as f:
+                data = json.load(f)
+
+            # Handle both old format (list) and new format (dict with 'cookies' and 'timestamp')
+            if isinstance(data, dict) and 'cookies' in data:
+                cookies = data['cookies']
+            elif isinstance(data, list):
+                cookies = data
+            else:
+                self.log(f"Invalid cookie file format", "warning")
+                return
+
+            # Count critical cookies
+            cf_clearance_found = False
+            for cookie in cookies:
+                try:
+                    # Set cookie with basic attributes (requests.Session compatible)
+                    self.session.cookies.set(
+                        cookie['name'],
+                        cookie['value'],
+                        domain=cookie.get('domain', ''),
+                        path=cookie.get('path', '/')
+                    )
+                    if cookie['name'] == 'cf_clearance':
+                        cf_clearance_found = True
+                except Exception as e:
+                    self.log(f"Error setting cookie {cookie.get('name')}: {e}", "warning")
+
+            if cf_clearance_found:
+                self.log(f"✓ Loaded {len(cookies)} cookies including cf_clearance from {self.cookie_file}", "info")
+            else:
+                self.log(f"⚠ Loaded {len(cookies)} cookies but cf_clearance NOT found", "warning")
+
+        except Exception as e:
+            self.log(f"Error loading cookies: {e}", "warning")
+
+    def _cookies_expired(self):
+        """Check if cookies are expired - delegates to CloudflareHandler"""
+        return self.cf_handler.cookies_expired()
+
+    def _save_cookies(self, cookies: list, user_agent: str = None):
+        """Save cookies to database or file with timestamp
+
+        Args:
+            cookies: List of cookie dictionaries
+            user_agent: User agent to associate with cookies (important for cf_clearance).
+                       If not provided, uses self.user_agent as fallback.
+        """
+        # Use provided user_agent or fall back to self.user_agent
+        ua = user_agent or self.user_agent
+
+        # Try database first if available
+        if self.unified_db:
+            try:
+                self.unified_db.save_scraper_cookies(
+                    self.scraper_id,
+                    cookies,
+                    user_agent=ua,
+                    merge=True  # Merge with existing cookies
+                )
+                self.log(f"Saved {len(cookies)} cookies to database (UA: {ua[:50] if ua else 'None'}...)", "debug")
+                return
+            except Exception as e:
+                self.log(f"Error saving cookies to database: {e}", "warning")
+
+        # Fall back to file
+        if not self.cookie_file:
+            return
+
+        try:
+            import json
+            from datetime import datetime
+            cookie_path = Path(self.cookie_file)
+            cookie_path.parent.mkdir(parents=True, exist_ok=True)
+
+            storage_data = {
+                'cookies': cookies,
+                'timestamp': datetime.now().isoformat()
+            }
+
+            with open(cookie_path, 'w') as f:
+                json.dump(storage_data, f, indent=2)
+            self.log(f"Saved {len(cookies)} cookies to {self.cookie_file}", "debug")
+        except Exception as e:
+            self.log(f"Error saving cookies: {e}", "warning")
+
+    def _get_cookies_via_flaresolverr(self, url: str, max_retries: int = 2) -> bool:
+        """Use FlareSolverr to bypass Cloudflare - delegates to CloudflareHandler
+
+        Args:
+            url: URL to fetch
+            max_retries: Maximum number of retry attempts (default: 2)
+
+        Returns:
+            True if cookies obtained successfully, False otherwise
+        """
+        # Delegate to CloudflareHandler
+        success = self.cf_handler.get_cookies_via_flaresolverr(url, max_retries)
+
+        # If successful, also load cookies into the session and save to database
+        if success:
+            cookies_dict = self.cf_handler.get_cookies_dict()
+            for name, value in cookies_dict.items():
+                # Extract domain from URL
+                from urllib.parse import urlparse
+                parsed = urlparse(url)
+                domain = parsed.netloc
+                self.session.cookies.set(name, value, domain=domain, path='/')
+
+            # Save cookies to database (the handler already saved to file if configured)
+            if self.unified_db:
+                cookies_list = self.cf_handler.get_cookies_list()
+                if cookies_list:
+                    # CRITICAL: Get the user_agent from FlareSolverr solution, not self.user_agent
+                    # cf_clearance cookies are fingerprinted to the browser that solved the challenge
+                    flaresolverr_ua = self.cf_handler.get_user_agent()
+                    self._save_cookies(cookies_list, user_agent=flaresolverr_ua)
+
+        return success
+
+    def _request_with_retry(self, url: str, timeout: int = 30, max_attempts: int = 2):
+        """Make HTTP request with automatic Cloudflare challenge retry
+
+        Args:
+            url: URL to fetch
+            timeout: Request timeout in seconds
+            max_attempts: Maximum number of attempts (default: 2)
+
+        Returns:
+            requests.Response object
+
+        Raises:
+            Exception if all retry attempts fail
+        """
+        last_error = None
+
+        for attempt in range(1, max_attempts + 1):
+            try:
+                response = self.session.get(url, timeout=timeout)
+
+                # Detect Cloudflare challenges
+                is_cloudflare = False
+                if response.status_code in [403, 503]:
+                    is_cloudflare = True
+                    self.log(f"Cloudflare challenge detected (HTTP {response.status_code})", "warning")
+                elif len(response.text) < 1000:
+                    is_cloudflare = True
+                    self.log(f"Cloudflare challenge detected (short response: {len(response.text)} bytes)", "warning")
+                elif 'challenge' in response.text.lower()[:500]:
+                    is_cloudflare = True
+                    self.log("Cloudflare challenge detected in HTML", "warning")
+
+                # If Cloudflare detected and we have retry attempts left
+                if is_cloudflare and attempt < max_attempts:
+                    if self.flaresolverr_enabled:
+                        self.log(f"Attempt {attempt}/{max_attempts}: Refreshing cookies via FlareSolverr...", "info")
+                        if self._get_cookies_via_flaresolverr(url):
+                            self.log("Cookies refreshed, retrying request...", "info")
+                            continue  # Retry the request
+                        else:
+                            raise Exception("Failed to refresh cookies via FlareSolverr")
+                    else:
+                        raise Exception("Cloudflare challenge detected but FlareSolverr is disabled")
+
+                # No Cloudflare challenge or final attempt - check status and return
+                response.raise_for_status()
+                return response
+
+            except Exception as e:
+                last_error = e
+                if attempt < max_attempts:
+                    self.log(f"Attempt {attempt}/{max_attempts} failed: {e}", "warning")
+                else:
+                    self.log(f"All {max_attempts} attempts failed", "error")
+
+        # All attempts failed
+        raise last_error
+
+    def _parse_date(self, date_str: str) -> Optional[datetime]:
+        """
+        Parse Coppermine date format: 'Date added=Sep 29, 2025'
+
+        Args:
+            date_str: Date string from Coppermine
+
+        Returns:
+            datetime object or None
+        """
+        try:
+            # Extract date from "Date added=Sep 29, 2025" format
+            match = re.search(r'Date added=([A-Za-z]+ \d+, \d{4})', date_str)
+            if match:
+                date_part = match.group(1)
+                return datetime.strptime(date_part, '%b %d, %Y')
+        except Exception as e:
+            self.log(f"Error parsing date '{date_str}': {e}", "debug")
+        return None
+
+    def _extract_full_image_url(self, base_url: str, thumbnail_url: str) -> str:
+        """
+        Convert thumbnail URL to full-resolution URL
+
+        Pattern:
+            Thumbnail: albums/userpics/1052219/thumb_1000523798.jpg
+            Normal:    albums/userpics/1052219/normal_1000523798.jpg
+            Full:      albums/userpics/1052219/1000523798.jpg
+
+        Args:
+            base_url: Base URL of the gallery (e.g., https://hqdiesel.net)
+            thumbnail_url: Relative thumbnail URL
+
+        Returns:
+            Full-resolution image URL
+        """
+        # Remove thumb_ or normal_ prefix
+        full_path = re.sub(r'/(thumb_|normal_)', '/', thumbnail_url)
+        return urljoin(base_url, full_path)
+
+    def _parse_gallery_page(self, html: str, base_url: str) -> List[Dict]:
+        """
+        Parse a Coppermine gallery page to extract image information
+
+        Args:
+            html: HTML content of the page
+            base_url: Base URL of the gallery
+
+        Returns:
+            List of dicts with image info
+        """
+        soup = BeautifulSoup(html, 'html.parser')
+        images = []
+
+        # Find all thumbnail cells
+        thumbnail_cells = soup.find_all('td', class_='thumbnails')
+        self.log(f"Found {len(thumbnail_cells)} thumbnail cells on page", "debug")
+
+        for cell in thumbnail_cells:
+            try:
+                # Find image link
+                link = cell.find('a', href=re.compile(r'displayimage\.php'))
+                if not link:
+                    continue
+
+                # Extract PID from URL
+                href = link.get('href', '')
+                parsed = parse_qs(urlparse(href).query)
+                pid = parsed.get('pid', [None])[0]
+
+                if not pid:
+                    continue
+
+                # Find thumbnail image
+                img = link.find('img')
+                if not img:
+                    continue
+
+                thumbnail_url = img.get('src', '')
+                if not thumbnail_url:
+                    continue
+
+                # Get image title (contains metadata)
+                title = img.get('title', '')
+
+                # Extract filename
+                filename_match = re.search(r'Filename=([^\s]+)', title)
+                filename = filename_match.group(1) if filename_match else None
+
+                # Extract date from dedicated span (more reliable)
+                upload_date = None
+                date_span = cell.find('span', class_='thumb_caption_ctime')
+                if date_span and date_span.text.strip():
+                    try:
+                        upload_date = datetime.strptime(date_span.text.strip(), '%b %d, %Y')
+                    except Exception:
+                        # Fallback to title parsing
+                        upload_date = self._parse_date(title)
+                else:
+                    upload_date = self._parse_date(title)
+
+                # Extract uploader
+                uploader = None
+                uploader_link = cell.find('a', href=re.compile(r'profile\.php'))
+                if uploader_link:
+                    uploader = uploader_link.text.strip()
+
+                # Extract dimensions
+                dimensions_match = re.search(r'Dimensions=(\d+x\d+)', title)
+                dimensions = dimensions_match.group(1) if dimensions_match else None
+
+                # Extract filesize
+                filesize_match = re.search(r'Filesize=([^\s]+)', title)
+                filesize = filesize_match.group(1) if filesize_match else None
+
+                # Extract views
+                views = None
+                views_span = cell.find('span', class_='thumb_title_views')
+                if views_span:
+                    views_match = re.search(r'(\d+)\s+views?', views_span.text)
+                    if views_match:
+                        views = int(views_match.group(1))
+
+                # Construct full-resolution URL
+                full_url = self._extract_full_image_url(base_url, thumbnail_url)
+
+                images.append({
+                    'pid': pid,
+                    'filename': filename,
+                    'thumbnail_url': urljoin(base_url, thumbnail_url),
+                    'full_url': full_url,
+                    'upload_date': upload_date,
+                    'dimensions': dimensions,
+                    'filesize': filesize,
+                    'uploader': uploader,
+                    'views': views,
+                    'title': title
+                })
+
+            except Exception as e:
+                self.log(f"Error parsing thumbnail cell: {e}", "debug")
+                continue
+
+        return images
+
+    def _get_total_pages(self, html: str) -> int:
+        """
+        Extract total number of pages from gallery
+
+        Args:
+            html: HTML content
+
+        Returns:
+            Number of pages
+        """
+        try:
+            soup = BeautifulSoup(html, 'html.parser')
+            # Look for pagination info like "2005 files on 20 page(s)"
+            text = soup.get_text()
+            match = re.search(r'(\d+)\s+files?\s+on\s+(\d+)\s+page', text)
+            if match:
+                return int(match.group(2))
+        except Exception as e:
+            self.log(f"Error extracting page count: {e}", "debug")
+        return 1
+
+    def _download_image(self, image_info: Dict, output_dir: Path,
+                       gallery_name: str) -> Optional[str]:
+        """
+        Download a single image
+
+        Args:
+            image_info: Image information dict
+            output_dir: Output directory
+            gallery_name: Name of gallery for database tracking
+
+        Returns:
+            Path to downloaded file or None
+        """
+        try:
+            url = image_info['full_url']
+            pid = image_info['pid']
+            filename = image_info['filename']
+
+            # Check if already downloaded
+            if self.use_database and self.db:
+                if self.db.is_downloaded(url, platform='coppermine'):
+                    self.log(f"Already downloaded (database): {filename} (PID: {pid})", "info")
+                    return None
+
+            # Create output directory
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Construct output filename
+            output_file = output_dir / filename
+
+            # Skip if file exists
+            if output_file.exists():
+                self.log(f"File already exists: {filename}", "info")
+                return str(output_file)
+
+            # Download image
+            self.log(f"Downloading: {filename} (PID: {pid})", "info")
+
+            response = self._request_with_retry(url, timeout=30)
+
+            # Save image
+            with open(output_file, 'wb') as f:
+                f.write(response.content)
+
+            # Check for duplicate hash before recording
+            if self.db and hasattr(self.db, 'unified_db'):
+                from pathlib import Path as PathLib
+                # Check for duplicate hash (hash blacklist persists even if original deleted)
+                file_hash_check = self.db.unified_db.get_file_hash(str(output_file))
+                if file_hash_check:
+                    existing = self.db.unified_db.get_download_by_file_hash(file_hash_check)
+                    if existing and existing.get('file_path') and str(output_file) != existing.get('file_path'):
+                        # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
+                        self.log(f"⚠ Duplicate content detected (hash match): {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
+                        # Delete the duplicate regardless of whether original file still exists
+                        try:
+                            output_file.unlink()
+                            self.log(f"Deleted duplicate (hash blacklist): {filename}", "debug")
+                            return
+                        except Exception as e:
+                            self.log(f"Failed to delete duplicate {filename}: {e}", "warning")
+                            return
+
+            # Calculate SHA256 file hash from saved file (consistent with other modules)
+            file_hash = None
+            if self.db and hasattr(self.db, 'unified_db'):
+                try:
+                    file_hash = self.db.unified_db.get_file_hash(str(output_file))
+                except Exception as e:
+                    self.log(f"Failed to calculate file hash: {e}", "warning")
+
+            # Track timestamp for this file
+            if image_info.get('upload_date'):
+                self.file_timestamps[filename] = image_info['upload_date']
+
+            # Record in database
+            self._record_download(
+                url=url,
+                platform='coppermine',
+                source=gallery_name,
+                content_type='image',
+                filename=filename,
+                file_path=str(output_file),
+                file_size=len(response.content),
+                file_hash=file_hash,
+                post_date=image_info.get('upload_date'),
+                metadata={
+                    'pid': pid,
+                    'dimensions': image_info.get('dimensions'),
+                    'filesize': image_info.get('filesize')
+                },
+                deferred=getattr(self, 'defer_database', False)
+            )
+
+            self.download_count += 1
+            time.sleep(self.min_delay + (self.max_delay - self.min_delay) * __import__('random').random())
+
+            return str(output_file)
+
+        except Exception as e:
+            self.log(f"Error downloading {image_info.get('filename', 'unknown')}: {e}", "error")
+            return None
+
+    def download(self, gallery_url: str, output_dir: str,
+                 days_back: Optional[int] = None, max_pages: Optional[int] = None,
+                 gallery_name: Optional[str] = None, defer_database: bool = False) -> tuple:
+        """
+        Download images from a Coppermine gallery
+
+        Args:
+            gallery_url: URL to the gallery page (e.g., thumbnails.php?album=lastup&cat=123)
+            output_dir: Directory to save images
+            days_back: Only download images from last N days (None = all)
+            max_pages: Maximum number of pages to process (None = all)
+            gallery_name: Name for database tracking (extracted from URL if not provided)
+            defer_database: If True, don't record to database immediately - store in
+                           pending_downloads for later recording after file move is complete
+
+        Returns:
+            Tuple of (file_timestamps dict, download_count)
+            file_timestamps: Dict mapping filename -> upload_date
+        """
+        self.defer_database = defer_database  # Store for use in download methods
+        # Clear downloaded_files cache between galleries to prevent memory growth
+        self.downloaded_files.clear()
+
+        # Check site status before doing anything else
+        self.log("Checking Coppermine gallery site status...", "debug")
+        site_status, error_msg = self.cf_handler.check_site_status(gallery_url, timeout=10)
+
+        if self.cf_handler.should_skip_download(site_status):
+            self.log(f"Skipping download - Coppermine gallery is unavailable: {error_msg}", "warning")
+            return ({}, 0)
+        elif site_status == SiteStatus.CLOUDFLARE_CHALLENGE:
+            self.log("Cloudflare challenge detected, will attempt bypass during download", "info")
+
+        self.download_count = 0
+        self.file_timestamps = {}  # Track timestamps for each file
+        output_path = Path(output_dir)
+
+        # Extract base URL and gallery name
+        parsed_url = urlparse(gallery_url)
+        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+
+        if not gallery_name:
+            # Extract category from URL
+            query_params = parse_qs(parsed_url.query)
+            cat = query_params.get('cat', ['unknown'])[0]
+            album = query_params.get('album', ['unknown'])[0]
+            gallery_name = f"{parsed_url.netloc}_cat{cat}_{album}"
+
+        self.log(f"Starting download from: {gallery_url}", "info")
+        self.activity_manager.update_status(f"Checking gallery: {gallery_name}")
+        self.log(f"Gallery: {gallery_name}", "info")
+        if days_back:
+            self.log(f"Filtering: Last {days_back} days", "info")
+
+        # Calculate cutoff date
+        cutoff_date = None
+        if days_back:
+            cutoff_date = datetime.now() - timedelta(days=days_back)
+
+        # Check if cookies have expired before testing
+        cookies_valid = False
+        cookie_count = len(self.session.cookies)
+
+        # Check for short-lived session cookies that may have expired
+        if self.cf_handler.cookies_expired():
+            self.log(f"Cookies expired, skipping test and refreshing via FlareSolverr", "info")
+        else:
+            self.log(f"Testing with {cookie_count} existing cookies...", "info")
+
+            try:
+                # Try with existing cookies first (short timeout for fast fail)
+                test_response = self.session.get(gallery_url, timeout=5)
+
+                # Check if we got a Cloudflare challenge or error
+                if test_response.status_code == 403 or test_response.status_code == 503:
+                    self.log(f"Existing cookies failed (HTTP {test_response.status_code}), need FlareSolverr", "info")
+                elif len(test_response.text) < 1000:
+                    self.log(f"Response too short ({len(test_response.text)} bytes), likely Cloudflare challenge", "info")
+                elif 'challenge' in test_response.text.lower()[:500]:
+                    self.log("Cloudflare challenge detected in response", "info")
+                else:
+                    # Cookies work (or no challenge presented)!
+                    cookies_valid = True
+                    self.log(f"✓ Existing cookies valid ({cookie_count} cookies, skipped FlareSolverr)", "info")
+                    response = test_response
+            except Exception as e:
+                self.log(f"Test request failed ({type(e).__name__}: {e}), need FlareSolverr", "info")
+
+        # Only call FlareSolverr if existing cookies don't work
+        if not cookies_valid:
+            if self.flaresolverr_enabled:
+                self.log("Calling FlareSolverr to get fresh cookies...", "info")
+                if not self._get_cookies_via_flaresolverr(gallery_url):
+                    self.log("Failed to bypass Cloudflare", "error")
+                    return ({}, 0)
+            else:
+                self.log("FlareSolverr disabled and cookies invalid", "error")
+                return ({}, 0)
+
+        # Fetch first page to get total pages (reuse response if cookies were valid)
+        try:
+            if not cookies_valid:
+                response = self._request_with_retry(gallery_url, timeout=30)
+
+            total_pages = self._get_total_pages(response.text)
+
+            if max_pages:
+                total_pages = min(total_pages, max_pages)
+
+            self.log(f"Total pages to process: {total_pages}", "info")
+
+        except Exception as e:
+            self.log(f"Error fetching gallery: {e}", "error")
+            return ({}, 0)
+
+        # Set initial progress so dashboard shows 0/N immediately
+        self.activity_manager.update_status(
+            "Downloading images",
+            progress_current=0,
+            progress_total=total_pages
+        )
+
+        # Process each page
+        for page_num in range(1, total_pages + 1):
+            try:
+                # Construct page URL
+                if page_num == 1:
+                    page_url = gallery_url
+                else:
+                    separator = '&' if '?' in gallery_url else '?'
+                    page_url = f"{gallery_url}{separator}page={page_num}"
+
+                self.log(f"Processing page {page_num}/{total_pages}...", "info")
+
+                # Fetch page with automatic Cloudflare retry
+                response = self._request_with_retry(page_url, timeout=30)
+
+                # Debug: Check what we received
+                self.log(f"Fetched page, status: {response.status_code}, length: {len(response.text)} bytes", "debug")
+                if len(response.text) < 10000:
+                    self.log(f"WARNING: Response seems too short! First 1000 chars: {response.text[:1000]}", "warning")
+
+                # Parse images
+                images = self._parse_gallery_page(response.text, base_url)
+                self.log(f"Found {len(images)} images on page {page_num}", "info")
+
+                # Track if we found any new images on this page
+                found_new_images = False
+                skipped_old_images = 0
+
+                # Filter by date and download
+                for image_info in images:
+                    # Apply date filter
+                    if cutoff_date and image_info.get('upload_date'):
+                        if image_info['upload_date'] < cutoff_date:
+                            skipped_old_images += 1
+                            self.log(f"Skipping old image: {image_info['filename']} "
+                                   f"(uploaded {image_info['upload_date'].date()})", "debug")
+                            continue
+
+                    # Log image being processed
+                    upload_date_str = image_info.get('upload_date').strftime('%Y-%m-%d') if image_info.get('upload_date') else 'unknown'
+                    self.log(f"Processing image: {image_info['filename']} (uploaded {upload_date_str})", "info")
+
+                    # This image is within date range
+                    found_new_images = True
+
+                    # Download image
+                    self._download_image(image_info, output_path, gallery_name)
+
+                # If using date filter and ALL images on this page were too old, stop processing
+                # (assumes gallery is sorted newest-first, which is true for album=lastup)
+                if cutoff_date and not found_new_images and len(images) > 0:
+                    self.log(f"All {skipped_old_images} images on page {page_num} are older than {days_back} days. "
+                           f"Stopping pagination (assuming chronological order).", "info")
+                    break
+
+                # Update activity status with page progress
+                self.activity_manager.update_status(
+                    "Downloading images",
+                    progress_current=page_num,
+                    progress_total=total_pages
+                )
+
+                # Rate limiting between pages
+                if page_num < total_pages:
+                    time.sleep(self.min_delay)
+
+            except Exception as e:
+                self.log(f"Error processing page {page_num}: {e}", "error")
+                continue
+
+        self.log(f"Download complete! Total: {self.download_count} images", "info")
+        return (self.file_timestamps, self.download_count)
+
+    def cleanup(self):
+        """Cleanup resources"""
+        if self.session:
+            self.session.close()
--- a/modules/date_utils.py
+++ b/modules/date_utils.py
@@ -0,0 +1,473 @@
+#!/usr/bin/env python3
+"""
+Shared date utilities module for media downloaders
+Provides comprehensive date extraction and timestamp updating
+
+Features:
+- Extract dates from text/titles (multiple formats)
+- Extract TV show season/episode info and lookup air dates via OMDB
+- Update filesystem timestamps (mtime, atime)
+- Update creation time (platform-specific)
+- Update EXIF metadata for images
+- Update video metadata
+"""
+
+import os
+import re
+import platform
+import subprocess
+import requests
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, Union, Tuple
+from modules.universal_logger import get_logger
+
+logger = get_logger('DateUtils')
+
+
+class DateHandler:
+    """Comprehensive date extraction and timestamp updating"""
+    
+    # OMDB API key (should be set by user)
+    OMDB_API_KEY = None
+    
+    # TV show season/episode patterns
+    TV_PATTERNS = [
+        r'S(\d{1,2})E(\d{1,2})',  # S01E01
+        r'Season\s+(\d{1,2})\s+Episode\s+(\d{1,2})',  # Season 1 Episode 1
+        r'(\d{1,2})x(\d{1,2})',  # 1x01
+        r's(\d{1,2})\s*e(\d{1,2})',  # s01 e01 or s01e01
+    ]
+    
+    # Year pattern for fallback
+    YEAR_PATTERN = r'\b(19\d{2}|20\d{2})\b'
+    
+    # Date patterns for extraction from text
+    DATE_PATTERNS = [
+        # Instagram filename format: YYYYMMDD_HHMMSS (e.g., "20251027_155842")
+        (r'(\d{4})(\d{2})(\d{2})_(\d{2})(\d{2})(\d{2})', 'instagram'),
+        # DD.MM.YYYY or DD/MM/YYYY or DD-MM-YYYY or DD_MM_YYYY (underscore for forum titles)
+        (r'(\d{1,2})[\.\/\-_](\d{1,2})[\.\/\-_](\d{4})', 'dmy'),
+        # YYYY-MM-DD or YYYY/MM/DD or YYYY_MM_DD
+        (r'(\d{4})[\-\/_](\d{1,2})[\-\/_](\d{1,2})', 'ymd'),
+        # Month DD, YYYY (e.g., "August 15, 2025")
+        (r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_name'),
+        # Month YYYY (e.g., "April 2025") - use first day of month
+        (r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})', 'my_name'),
+        # DD Mon YYYY (e.g., "15 Aug 2025")
+        (r'(\d{1,2})\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'dmy_abbr'),
+        # Mon DD, YYYY (e.g., "Aug 15, 2025")
+        (r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})', 'mdy_abbr'),
+        # Mon YYYY (e.g., "Apr 2025") - use first day of month
+        (r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{4})', 'my_abbr'),
+    ]
+    
+    MONTH_MAP = {
+        'January': 1, 'February': 2, 'March': 3, 'April': 4,
+        'May': 5, 'June': 6, 'July': 7, 'August': 8,
+        'September': 9, 'October': 10, 'November': 11, 'December': 12,
+        'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4,
+        'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8,
+        'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
+    }
+    
+    @classmethod
+    def set_omdb_api_key(cls, api_key: str):
+        """Set OMDB API key for TV show lookups"""
+        cls.OMDB_API_KEY = api_key
+    
+    @classmethod
+    def extract_tv_info(cls, text: str) -> Optional[Tuple[str, int, int]]:
+        """
+        Extract TV show name, season, and episode from text
+        
+        Returns:
+            Tuple of (show_name, season, episode) or None
+        """
+        for pattern in cls.TV_PATTERNS:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                season = int(match.group(1))
+                episode = int(match.group(2))
+                
+                # Extract show name (everything before the season/episode)
+                show_part = text[:match.start()].strip()
+                
+                # Look for common TV show names in the text
+                # Common pattern: "Actor Name & Actor Name - Show Name S01E01"
+                if ' - ' in show_part:
+                    # Split on dash and take the last part as show name
+                    parts = show_part.split(' - ')
+                    show_name = parts[-1].strip()
+                else:
+                    # Clean up common separators
+                    show_name = re.sub(r'[-_.]', ' ', show_part)
+                    show_name = re.sub(r'\s+', ' ', show_name).strip()
+                
+                # Remove trailing "Season" or similar words
+                show_name = re.sub(r'\s+(Season|Series|S)\s*$', '', show_name, re.IGNORECASE)
+                
+                if show_name:
+                    return (show_name, season, episode)
+        return None
+    
+    @classmethod
+    def lookup_tv_episode_date(cls, show_name: str, season: int, episode: int) -> Optional[datetime]:
+        """
+        Lookup TV episode air date using OMDB API
+        
+        Args:
+            show_name: Name of the TV show
+            season: Season number
+            episode: Episode number
+            
+        Returns:
+            Air date of the episode or None
+        """
+        if not cls.OMDB_API_KEY:
+            logger.debug("OMDB API key not set")
+            return None
+            
+        try:
+            # First, search for the show
+            search_url = "http://www.omdbapi.com/"
+            params = {
+                'apikey': cls.OMDB_API_KEY,
+                't': show_name,
+                'type': 'series'
+            }
+            
+            response = requests.get(search_url, params=params, timeout=5)
+            if response.status_code != 200:
+                return None
+                
+            show_data = response.json()
+            if show_data.get('Response') != 'True':
+                return None
+            
+            # Get the IMDB ID
+            imdb_id = show_data.get('imdbID')
+            if not imdb_id:
+                return None
+            
+            # Now get the specific episode
+            episode_params = {
+                'apikey': cls.OMDB_API_KEY,
+                'i': imdb_id,
+                'Season': season,
+                'Episode': episode
+            }
+            
+            episode_response = requests.get(search_url, params=episode_params, timeout=5)
+            if episode_response.status_code != 200:
+                return None
+                
+            episode_data = episode_response.json()
+            if episode_data.get('Response') != 'True':
+                return None
+            
+            # Parse the release date
+            release_date = episode_data.get('Released')
+            if release_date and release_date != 'N/A':
+                # Try different date formats
+                for fmt in ['%d %b %Y', '%Y-%m-%d', '%d %B %Y']:
+                    try:
+                        return datetime.strptime(release_date, fmt)
+                    except ValueError:
+                        continue
+                        
+        except Exception as e:
+            logger.debug(f"OMDB lookup failed: {e}")
+            
+        return None
+    
+    @classmethod
+    def extract_date_from_text(cls, text: str, fallback_date: Optional[datetime] = None, use_omdb: bool = True) -> Optional[datetime]:
+        """
+        Extract date from text using multiple format patterns
+        
+        Args:
+            text: Text to search for dates (e.g., post title, caption)
+            fallback_date: Date to use if no date found in text
+            use_omdb: Whether to try OMDB lookup for TV shows
+            
+        Returns:
+            Extracted datetime or fallback_date if no date found
+        """
+        if not text:
+            return fallback_date
+        
+        # First, try TV show lookup if enabled
+        if use_omdb:
+            tv_info = cls.extract_tv_info(text)
+            if tv_info:
+                show_name, season, episode = tv_info
+                tv_date = cls.lookup_tv_episode_date(show_name, season, episode)
+                if tv_date:
+                    logger.info(f"Found TV episode date via OMDB: {show_name} S{season:02d}E{episode:02d} -> {tv_date}")
+                    return tv_date
+        
+        # Try standard date patterns
+        for pattern, format_type in cls.DATE_PATTERNS:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                try:
+                    if format_type == 'instagram':
+                        # Instagram format: YYYYMMDD_HHMMSS
+                        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                        hour, minute, second = int(match.group(4)), int(match.group(5)), int(match.group(6))
+                        return datetime(year, month, day, hour, minute, second)
+
+                    elif format_type == 'dmy':
+                        day, month, year = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                        # Handle ambiguous dates (could be DD/MM or MM/DD)
+                        if '.' in text[match.start():match.end()]:
+                            # European format with dots: DD.MM.YYYY
+                            return datetime(year, month, day)
+                        elif day <= 12 and month <= 12:
+                            # Ambiguous, assume MM/DD/YYYY for US format
+                            return datetime(year, day, month)
+                        else:
+                            # Clear from values which is day/month
+                            if day > 12:
+                                return datetime(year, month, day)
+                            else:
+                                return datetime(year, day, month)
+                    
+                    elif format_type == 'ymd':
+                        year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
+                        return datetime(year, month, day)
+                    
+                    elif format_type == 'mdy_name':
+                        month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
+                        month = cls.MONTH_MAP.get(month_str, 0)
+                        if month:
+                            return datetime(year, month, day)
+
+                    elif format_type == 'my_name':
+                        # Month YYYY (no day) - use first day of month
+                        month_str, year = match.group(1), int(match.group(2))
+                        month = cls.MONTH_MAP.get(month_str, 0)
+                        if month:
+                            return datetime(year, month, 1)
+
+                    elif format_type == 'dmy_abbr':
+                        day, month_str, year = int(match.group(1)), match.group(2), int(match.group(3))
+                        month = cls.MONTH_MAP.get(month_str, 0)
+                        if month:
+                            return datetime(year, month, day)
+
+                    elif format_type == 'mdy_abbr':
+                        month_str, day, year = match.group(1), int(match.group(2)), int(match.group(3))
+                        month = cls.MONTH_MAP.get(month_str, 0)
+                        if month:
+                            return datetime(year, month, day)
+
+                    elif format_type == 'my_abbr':
+                        # Mon YYYY (no day) - use first day of month
+                        month_str, year = match.group(1), int(match.group(2))
+                        month = cls.MONTH_MAP.get(month_str, 0)
+                        if month:
+                            return datetime(year, month, 1)
+                            
+                except (ValueError, IndexError) as e:
+                    logger.debug(f"Failed to parse date from pattern {pattern}: {e}")
+                    continue
+
+        # Don't use year-only as fallback - it's too unreliable
+        # Examples: "Moments of 2025" shouldn't default to Jan 1, 2025
+        # Instead, use the actual post date from the forum
+        return fallback_date
+    
+    @classmethod
+    def update_file_timestamps(cls, filepath: Union[str, Path], date: datetime) -> bool:
+        """
+        Update all timestamps for a file: filesystem, creation time, and EXIF data
+        
+        Args:
+            filepath: Path to the file to update
+            date: DateTime to set
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        filepath = Path(filepath)
+        if not filepath.exists():
+            logger.error(f"File not found: {filepath}")
+            return False
+            
+        if not date:
+            logger.warning(f"No date provided for {filepath}")
+            return False
+        
+        success = True
+
+        # 1. Update EXIF data for images FIRST (this modifies the file)
+        if filepath.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tiff', '.bmp', '.gif']:
+            try:
+                cls._update_exif_data(filepath, date)
+            except Exception as e:
+                logger.debug(f"Failed to update EXIF data: {e}")
+                # Don't mark as failure since not all images support EXIF
+
+        # 2. Update video metadata SECOND (this also modifies the file)
+        if filepath.suffix.lower() in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']:
+            try:
+                cls._update_video_metadata(filepath, date)
+            except Exception as e:
+                logger.debug(f"Failed to update video metadata: {e}")
+                # Don't mark as failure since this requires ffmpeg
+
+        # 3. Update creation time (platform-specific)
+        try:
+            if platform.system() == 'Darwin':  # macOS
+                cls._update_macos_creation_time(filepath, date)
+            elif platform.system() == 'Windows':
+                cls._update_windows_creation_time(filepath, date)
+            # Linux doesn't have a reliable way to set creation time
+        except Exception as e:
+            logger.debug(f"Failed to update creation time: {e}")
+            # Don't mark as failure since this is platform-specific
+
+        # 4. Update filesystem timestamps LAST (mtime and atime)
+        # This must be last because EXIF/video updates modify the file and change mtime
+        try:
+            timestamp = date.timestamp()
+            os.utime(filepath, (timestamp, timestamp))
+            logger.debug(f"Updated filesystem timestamps for {filepath}")
+        except Exception as e:
+            logger.error(f"Failed to update filesystem timestamps: {e}")
+            success = False
+
+        return success
+    
+    @classmethod
+    def _update_macos_creation_time(cls, filepath: Path, date: datetime):
+        """Update creation time on macOS using SetFile"""
+        date_str = date.strftime("%m/%d/%Y %H:%M:%S")
+        try:
+            result = subprocess.run(
+                ['SetFile', '-d', date_str, str(filepath)],
+                capture_output=True,
+                text=True,
+                check=False
+            )
+            if result.returncode == 0:
+                logger.debug(f"Updated macOS creation time for {filepath}")
+            else:
+                logger.debug(f"SetFile failed: {result.stderr}")
+        except FileNotFoundError:
+            logger.debug("SetFile not found (Xcode Command Line Tools not installed)")
+    
+    @classmethod
+    def _update_windows_creation_time(cls, filepath: Path, date: datetime):
+        """Update creation time on Windows using PowerShell"""
+        date_str = date.strftime("%Y-%m-%d %H:%M:%S")
+        ps_command = f'''
+        $file = Get-Item "{filepath}"
+        $file.CreationTime = "{date_str}"
+        '''
+        try:
+            result = subprocess.run(
+                ['powershell', '-Command', ps_command],
+                capture_output=True,
+                text=True,
+                check=False
+            )
+            if result.returncode == 0:
+                logger.debug(f"Updated Windows creation time for {filepath}")
+        except FileNotFoundError:
+            logger.debug("PowerShell not available")
+    
+    @classmethod
+    def _update_exif_data(cls, filepath: Path, date: datetime):
+        """Update EXIF metadata using exiftool
+
+        Sets all date fields comprehensively to ensure consistent timestamps
+        across all metadata readers (including Immich):
+        - AllDates (DateTimeOriginal, CreateDate, ModifyDate)
+        - MetadataDate (used by some photo managers)
+        - FileModifyDate (filesystem modification time)
+        - Clears HistoryWhen to avoid conflicting timestamps
+        """
+        date_str = date.strftime("%Y:%m:%d %H:%M:%S")
+        try:
+            result = subprocess.run([
+                'exiftool',
+                '-overwrite_original',
+                f'-AllDates={date_str}',
+                f'-MetadataDate={date_str}',
+                '-HistoryWhen=',
+                f'-FileModifyDate={date_str}',
+                str(filepath)
+            ], capture_output=True, text=True, check=False)
+
+            if result.returncode == 0:
+                logger.debug(f"Updated EXIF data for {filepath}")
+            else:
+                logger.debug(f"exiftool failed: {result.stderr}")
+        except FileNotFoundError:
+            logger.debug("exiftool not found")
+    
+    @classmethod
+    def _update_video_metadata(cls, filepath: Path, date: datetime):
+        """Update video metadata using ffmpeg"""
+        date_str = date.strftime("%Y-%m-%d %H:%M:%S")
+        temp_file = filepath.with_suffix('.tmp' + filepath.suffix)
+        
+        try:
+            result = subprocess.run([
+                'ffmpeg', '-i', str(filepath),
+                '-c', 'copy',
+                '-metadata', f'creation_time={date_str}',
+                '-y', str(temp_file)
+            ], capture_output=True, text=True, check=False)
+            
+            if result.returncode == 0 and temp_file.exists():
+                # Replace original with updated file
+                temp_file.replace(filepath)
+                logger.debug(f"Updated video metadata for {filepath}")
+            else:
+                if temp_file.exists():
+                    temp_file.unlink()
+                logger.debug(f"ffmpeg failed: {result.stderr}")
+        except FileNotFoundError:
+            logger.debug("ffmpeg not found")
+        except Exception as e:
+            if temp_file.exists():
+                temp_file.unlink()
+            logger.debug(f"Video metadata update failed: {e}")
+
+
+# Convenience functions for direct use
+def extract_date(text: str, fallback: Optional[datetime] = None) -> Optional[datetime]:
+    """Extract date from text"""
+    return DateHandler.extract_date_from_text(text, fallback)
+
+
+def update_timestamps(filepath: Union[str, Path], date: datetime) -> bool:
+    """Update all timestamps for a file"""
+    return DateHandler.update_file_timestamps(filepath, date)
+
+
+if __name__ == "__main__":
+    # Test examples
+    test_texts = [
+        "Eva Longoria - 15.08.2025 Event Photos",
+        "Photos from 08/15/2025",
+        "August 15, 2025 - Red Carpet",
+        "15 Aug 2025 Photoshoot",
+        "Event 2025-08-15",
+    ]
+    
+    print("Date extraction tests:")
+    for text in test_texts:
+        extracted = extract_date(text)
+        print(f"  '{text}' -> {extracted}")
+    
+    # Test file timestamp update
+    test_file = Path("test_image.jpg")
+    if test_file.exists():
+        test_date = datetime(2025, 8, 15, 18, 30, 0)
+        if update_timestamps(test_file, test_date):
+            print(f"\nSuccessfully updated timestamps for {test_file}")
--- a/modules/db_bootstrap.py
+++ b/modules/db_bootstrap.py
@@ -0,0 +1,27 @@
+"""
+Database Backend Bootstrap
+
+Import this module before any other imports that use sqlite3.
+When DATABASE_BACKEND=postgresql, it monkey-patches sys.modules['sqlite3']
+with pg_adapter so every subsequent `import sqlite3` gets the PostgreSQL adapter.
+
+Default is 'sqlite' (no change — original behavior preserved).
+"""
+
+import os
+from pathlib import Path
+
+# Load .env BEFORE checking DATABASE_BACKEND — systemd services don't set
+# this env var, so .env is the primary source of truth.
+try:
+    from dotenv import load_dotenv
+    _env_path = Path(__file__).resolve().parent.parent / '.env'
+    if _env_path.exists():
+        load_dotenv(_env_path)
+except ImportError:
+    pass  # rely on system env vars
+
+if os.getenv('DATABASE_BACKEND', 'sqlite').lower() == 'postgresql':
+    import sys
+    from modules import pg_adapter
+    sys.modules['sqlite3'] = pg_adapter
--- a/modules/dependency_updater.py
+++ b/modules/dependency_updater.py
@@ -0,0 +1,634 @@
+#!/usr/bin/env python3
+"""
+Dependency Updater - Automatically updates critical dependencies
+Only runs in scheduler mode, once per day
+
+Version Compatibility:
+- bcrypt <5.0 required for passlib 1.7.4 compatibility
+- passlib 1.7.4 requires bcrypt 4.x (not 5.x)
+- uvicorn <0.35.0 required (0.40.0+ has breaking loop_factory changes)
+- Pinned packages are skipped during auto-updates to prevent incompatibilities
+"""
+
+import json
+import subprocess
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict
+from modules.universal_logger import get_logger
+
+
+class DependencyUpdater:
+    """Manages automatic updates for critical dependencies"""
+
+    def __init__(self,
+                 state_file: str = "/opt/media-downloader/database/dependency_updates.json",
+                 config: dict = None,
+                 pushover_notifier = None,
+                 scheduler_mode: bool = False):
+        """
+        Initialize dependency updater
+
+        Args:
+            state_file: Path to JSON file storing update state
+            config: Configuration dict from settings.json
+            pushover_notifier: Instance of PushoverNotifier for alerts
+            scheduler_mode: Only run updates when True (scheduler mode)
+        """
+        self.state_file = Path(state_file)
+        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+        self.pushover = pushover_notifier
+        self.scheduler_mode = scheduler_mode
+
+        # Derive venv paths from module location (more portable than hardcoded path)
+        import sys
+        self._base_dir = Path(__file__).parent.parent
+        self._venv_pip = self._base_dir / 'venv' / 'bin' / 'pip'
+        self._venv_python = self._base_dir / 'venv' / 'bin' / 'python'
+        # Fallback to sys.executable's directory if venv not found
+        if not self._venv_pip.exists():
+            self._venv_pip = Path(sys.executable).parent / 'pip'
+        if not self._venv_python.exists():
+            self._venv_python = Path(sys.executable)
+
+        # Default configuration
+        self.config = {
+            'enabled': True,
+            'check_interval_hours': 24,
+            'auto_install': True,
+            'components': {
+                'flaresolverr': {
+                    'enabled': True,
+                    'notify_on_update': True
+                },
+                'playwright': {
+                    'enabled': True,
+                    'notify_on_update': False
+                },
+                'yt_dlp': {
+                    'enabled': True,
+                    'notify_on_update': False
+                },
+                'python_packages': {
+                    'enabled': True,
+                    'notify_on_update': True,
+                    'packages': [
+                        # Core API framework
+                        'fastapi',
+                        'uvicorn',
+                        'pydantic',
+                        'python-jose',
+                        'passlib',
+                        'slowapi',
+                        'starlette',
+                        'python-multipart',
+                        'websockets',
+                        # Security & Auth
+                        'bcrypt',
+                        'cryptography',
+                        'certifi',
+                        '2captcha-python',
+                        'duo-universal',
+                        # Image processing
+                        'pillow',
+                        'numpy',
+                        # Face recognition
+                        'insightface',
+                        'onnxruntime',
+                        'deepface',
+                        'tensorflow',
+                        'face-recognition',
+                        'dlib',
+                        # Web scraping & downloads
+                        'requests',
+                        'beautifulsoup4',
+                        'selenium',
+                        'playwright',
+                        'playwright-stealth',
+                        'instaloader',
+                        'yt-dlp',
+                        'curl-cffi',
+                        'gallery-dl',
+                        # Database
+                        'psycopg2-binary',
+                        # Utilities
+                        'python-dotenv',
+                        'python-dateutil',
+                        'pyotp',
+                        'click',
+                        'attrs',
+                        'charset-normalizer',
+                        'idna',
+                        'websocket-client',
+                        'trio',
+                        'typing_extensions'
+                    ]
+                }
+            },
+            'pushover': {
+                'enabled': True,
+                'priority': -1,
+                'sound': 'magic'
+            }
+        }
+
+        # Merge user config
+        if config:
+            self._deep_update(self.config, config)
+
+        # Load or initialize state
+        self.state = self._load_state()
+
+        # Setup logging
+        self.logger = get_logger('DependencyUpdater')
+
+        # Known version incompatibilities and constraints
+        # Format: package_name: [constraints, incompatible_with, reason]
+        self.version_constraints = {
+            'bcrypt': {
+                'constraint': '<5.0',
+                'reason': 'bcrypt 5.x is incompatible with passlib 1.7.4',
+                'incompatible_with': ['passlib>=1.7.4,<2.0']
+            },
+            'passlib': {
+                'constraint': '>=1.7.4,<2.0',
+                'reason': 'passlib 1.7.4 requires bcrypt <5.0',
+                'requires': ['bcrypt>=4.0.0,<5.0']
+            },
+            'uvicorn': {
+                'constraint': '<0.35.0',
+                'reason': 'uvicorn 0.40.0+ has breaking changes with loop_factory parameter that crashes on startup',
+                'known_working': '0.34.0'
+            }
+        }
+
+        # Packages that should not be auto-updated
+        self.pinned_packages = {
+            'bcrypt': 'Version constrained for passlib compatibility',
+            'passlib': 'Version constrained for bcrypt compatibility',
+            'uvicorn': 'Version 0.40.0+ has breaking changes with loop_factory parameter'
+        }
+
+    def _deep_update(self, base: dict, update: dict):
+        """Deep update dict (recursive merge)"""
+        for key, value in update.items():
+            if isinstance(value, dict) and key in base and isinstance(base[key], dict):
+                self._deep_update(base[key], value)
+            else:
+                base[key] = value
+
+    def _load_state(self) -> Dict:
+        """Load update state from file"""
+        if self.state_file.exists():
+            try:
+                with open(self.state_file, 'r') as f:
+                    return json.load(f)
+            except Exception as e:
+                self.logger.error(f"Failed to load update state: {e}")
+
+        # Initialize empty state
+        return {
+            'last_check': None,
+            'components': {}
+        }
+
+    def _save_state(self):
+        """Save update state to file"""
+        try:
+            with open(self.state_file, 'w') as f:
+                json.dump(self.state, f, indent=2, default=str)
+        except Exception as e:
+            self.logger.error(f"Failed to save update state: {e}")
+
+    def _should_check_updates(self, force: bool = False) -> bool:
+        """Check if enough time has passed since last update check
+
+        Args:
+            force: If True, bypass all checks and return True
+
+        Returns:
+            True if updates should be checked, False otherwise
+        """
+        if force:
+            return True
+
+        if not self.config.get('enabled', True):
+            return False
+
+        # Allow manual checks even outside scheduler mode
+        if not self.scheduler_mode:
+            # In non-scheduler mode, only proceed if explicitly called
+            # This allows manual force_update_check() to work
+            return False
+
+        last_check = self.state.get('last_check')
+        if not last_check:
+            return True
+
+        try:
+            last_check_time = datetime.fromisoformat(last_check)
+            interval_hours = self.config.get('check_interval_hours', 24)
+            return datetime.now() - last_check_time > timedelta(hours=interval_hours)
+        except Exception:
+            return True
+
+    def check_and_update_all(self, force: bool = False) -> Dict[str, bool]:
+        """
+        Check and update all enabled components
+
+        Args:
+            force: If True, bypass interval checks and update immediately
+
+        Returns:
+            Dict mapping component name to update success status
+        """
+        if not self._should_check_updates(force=force):
+            return {}
+
+        # Check if auto_install is enabled (default: True)
+        auto_install = self.config.get('auto_install', True)
+
+        if auto_install:
+            self.logger.info("Checking for dependency updates...")
+        else:
+            self.logger.info("Checking for dependency updates (auto_install disabled - check only)...")
+            return {}  # Skip updates if auto_install is disabled
+
+        results = {}
+
+        # Update last check timestamp
+        self.state['last_check'] = datetime.now().isoformat()
+        self._save_state()
+
+        # Check each component
+        components = self.config.get('components', {})
+
+        if components.get('flaresolverr', {}).get('enabled', True):
+            results['flaresolverr'] = self._update_flaresolverr()
+
+        if components.get('playwright', {}).get('enabled', True):
+            results['playwright'] = self._update_playwright()
+
+        if components.get('yt_dlp', {}).get('enabled', True):
+            results['yt_dlp'] = self._update_yt_dlp()
+
+        if components.get('python_packages', {}).get('enabled', True):
+            results['python_packages'] = self._update_python_packages()
+
+        # Send summary notification if any updates installed
+        if any(results.values()) and self.pushover:
+            self._send_update_notification(results)
+
+        return results
+
+    def _update_flaresolverr(self) -> bool:
+        """
+        Update FlareSolverr Docker container
+
+        Returns:
+            True if update was installed, False otherwise
+        """
+        try:
+            self.logger.info("Checking FlareSolverr for updates...")
+
+            # Pull latest image
+            result = subprocess.run(
+                ['docker', 'pull', 'ghcr.io/flaresolverr/flaresolverr:latest'],
+                capture_output=True,
+                text=True,
+                timeout=300
+            )
+
+            if result.returncode != 0:
+                self.logger.error(f"Failed to pull FlareSolverr image: {result.stderr}")
+                return False
+
+            # Check if image was updated (look for "Downloaded newer image" or "Image is up to date")
+            output = result.stdout + result.stderr
+            updated = "Downloaded newer image" in output or "pulling from" in output.lower()
+
+            if not updated:
+                self.logger.info("FlareSolverr is already up to date")
+                self._update_component_state('flaresolverr', False)
+                return False
+
+            # Image was updated - restart container if running
+            self.logger.info("FlareSolverr image updated, restarting container...")
+
+            # Check if container exists
+            check_result = subprocess.run(
+                ['docker', 'ps', '-a', '--filter', 'name=flaresolverr', '--format', '{{.Names}}'],
+                capture_output=True,
+                text=True
+            )
+
+            if 'flaresolverr' in check_result.stdout:
+                # Stop and remove old container
+                subprocess.run(['docker', 'stop', 'flaresolverr'], capture_output=True)
+                subprocess.run(['docker', 'rm', 'flaresolverr'], capture_output=True)
+
+                # Start new container with latest image
+                subprocess.run([
+                    'docker', 'run', '-d',
+                    '--name', 'flaresolverr',
+                    '-p', '8191:8191',
+                    '-e', 'LOG_LEVEL=info',
+                    '--restart', 'unless-stopped',
+                    'ghcr.io/flaresolverr/flaresolverr:latest'
+                ], capture_output=True)
+
+                self.logger.info("✓ FlareSolverr updated and restarted successfully")
+            else:
+                self.logger.info("✓ FlareSolverr image updated (container not running)")
+
+            self._update_component_state('flaresolverr', True)
+            return True
+
+        except subprocess.TimeoutExpired:
+            self.logger.error("FlareSolverr update timed out")
+            return False
+        except Exception as e:
+            self.logger.error(f"FlareSolverr update error: {e}")
+            return False
+
+    def _update_playwright(self) -> bool:
+        """
+        Update Playwright browsers (Chromium and Firefox)
+
+        Returns:
+            True if update was installed, False otherwise
+        """
+        try:
+            self.logger.info("Checking Playwright browsers for updates...")
+
+            # Use venv python for playwright commands
+            venv_python = str(self._venv_python)
+
+            # Update Chromium
+            result_chromium = subprocess.run(
+                [venv_python, '-m', 'playwright', 'install', 'chromium'],
+                capture_output=True,
+                text=True,
+                timeout=600,
+                cwd=str(self._base_dir)
+            )
+
+            # Update Firefox
+            result_firefox = subprocess.run(
+                [venv_python, '-m', 'playwright', 'install', 'firefox'],
+                capture_output=True,
+                text=True,
+                timeout=600,
+                cwd=str(self._base_dir)
+            )
+
+            success = result_chromium.returncode == 0 and result_firefox.returncode == 0
+
+            if success:
+                # Check if anything was actually updated
+                output = result_chromium.stdout + result_firefox.stdout
+                updated = "Downloading" in output or "Installing" in output
+
+                if updated:
+                    self.logger.info("✓ Playwright browsers updated successfully")
+                    self._update_component_state('playwright', True)
+                    return True
+                else:
+                    self.logger.info("Playwright browsers already up to date")
+                    self._update_component_state('playwright', False)
+                    return False
+            else:
+                self.logger.error("Failed to update Playwright browsers")
+                return False
+
+        except subprocess.TimeoutExpired:
+            self.logger.error("Playwright update timed out")
+            return False
+        except Exception as e:
+            self.logger.error(f"Playwright update error: {e}")
+            return False
+
+    def _update_yt_dlp(self) -> bool:
+        """
+        Update yt-dlp (critical for TikTok downloads)
+
+        Returns:
+            True if update was installed, False otherwise
+        """
+        try:
+            self.logger.info("Checking yt-dlp for updates...")
+
+            # Use venv pip (derived from module location for portability)
+            venv_pip = str(self._venv_pip)
+
+            # Try updating via pip
+            result = subprocess.run(
+                [venv_pip, 'install', '--upgrade', 'yt-dlp'],
+                capture_output=True,
+                text=True,
+                timeout=120
+            )
+
+            if result.returncode != 0:
+                self.logger.error(f"Failed to update yt-dlp: {result.stderr}")
+                return False
+
+            # Check if update was installed
+            output = result.stdout + result.stderr
+            updated = "Successfully installed" in output and "yt-dlp" in output
+
+            if updated:
+                self.logger.info("✓ yt-dlp updated successfully")
+                self._update_component_state('yt_dlp', True)
+                return True
+            else:
+                self.logger.info("yt-dlp already up to date")
+                self._update_component_state('yt_dlp', False)
+                return False
+
+        except subprocess.TimeoutExpired:
+            self.logger.error("yt-dlp update timed out")
+            return False
+        except Exception as e:
+            self.logger.error(f"yt-dlp update error: {e}")
+            return False
+
+    def _update_python_packages(self) -> bool:
+        """
+        Update Python packages (FastAPI, Uvicorn, Pydantic, etc.)
+
+        Returns:
+            True if any updates were installed, False otherwise
+        """
+        try:
+            self.logger.info("Checking Python packages for updates...")
+
+            # Get list of packages to update
+            packages = self.config.get('components', {}).get('python_packages', {}).get('packages', [])
+            if not packages:
+                self.logger.info("No Python packages configured for updates")
+                return False
+
+            # Use venv pip (derived from module location for portability)
+            venv_pip = str(self._venv_pip)
+
+            updated_packages = []
+
+            for package in packages:
+                try:
+                    # Check if package is pinned (should not be auto-updated)
+                    if package in self.pinned_packages:
+                        self.logger.info(f"⚠ Skipping {package}: {self.pinned_packages[package]}")
+                        continue
+
+                    # Check for version constraints
+                    if package in self.version_constraints:
+                        constraint_info = self.version_constraints[package]
+                        constraint = constraint_info.get('constraint', '')
+                        reason = constraint_info.get('reason', 'Version constraint')
+
+                        if constraint:
+                            # Install with constraint instead of --upgrade
+                            package_spec = f"{package}{constraint}"
+                            self.logger.info(f"📌 {package}: Applying constraint {constraint} ({reason})")
+
+                            result = subprocess.run(
+                                [venv_pip, 'install', package_spec],
+                                capture_output=True,
+                                text=True,
+                                timeout=120
+                            )
+                        else:
+                            # No constraint, normal upgrade
+                            result = subprocess.run(
+                                [venv_pip, 'install', '--upgrade', package],
+                                capture_output=True,
+                                text=True,
+                                timeout=120
+                            )
+                    else:
+                        # Update package normally
+                        result = subprocess.run(
+                            [venv_pip, 'install', '--upgrade', package],
+                            capture_output=True,
+                            text=True,
+                            timeout=120
+                        )
+
+                    if result.returncode == 0:
+                        output = result.stdout + result.stderr
+                        # Check if package was actually updated
+                        if "Successfully installed" in output and package in output:
+                            updated_packages.append(package)
+                            self.logger.info(f"✓ {package} updated")
+                        elif "Requirement already satisfied" in output:
+                            self.logger.debug(f"  {package} already up to date")
+                        else:
+                            self.logger.debug(f"  {package} checked")
+                    else:
+                        self.logger.warning(f"Failed to update {package}: {result.stderr}")
+
+                except subprocess.TimeoutExpired:
+                    self.logger.warning(f"{package} update timed out")
+                except Exception as e:
+                    self.logger.warning(f"Error updating {package}: {e}")
+
+            if updated_packages:
+                self.logger.info(f"✓ Updated {len(updated_packages)} Python package(s): {', '.join(updated_packages)}")
+                self._update_component_state('python_packages', True)
+
+                # Store list of updated packages in state
+                if 'components' not in self.state:
+                    self.state['components'] = {}
+                if 'python_packages' not in self.state['components']:
+                    self.state['components']['python_packages'] = {}
+                self.state['components']['python_packages']['updated_packages'] = updated_packages
+                self._save_state()
+
+                return True
+            else:
+                self.logger.info("All Python packages already up to date")
+                self._update_component_state('python_packages', False)
+                return False
+
+        except Exception as e:
+            self.logger.error(f"Python packages update error: {e}")
+            return False
+
+    def _update_component_state(self, component: str, updated: bool):
+        """Update component state in JSON"""
+        if 'components' not in self.state:
+            self.state['components'] = {}
+
+        if component not in self.state['components']:
+            self.state['components'][component] = {}
+
+        self.state['components'][component]['last_update'] = datetime.now().isoformat() if updated else self.state['components'][component].get('last_update')
+        self.state['components'][component]['last_check'] = datetime.now().isoformat()
+        self.state['components'][component]['status'] = 'updated' if updated else 'current'
+
+        self._save_state()
+
+    def _send_update_notification(self, results: Dict[str, bool]):
+        """Send Pushover notification about installed updates"""
+        if not self.config.get('pushover', {}).get('enabled', True):
+            return
+
+        # Build list of updated components
+        updated_components = [name for name, updated in results.items() if updated]
+
+        if not updated_components:
+            return
+
+        # Check which components should send notifications
+        notify_components = []
+        for component in updated_components:
+            component_config = self.config.get('components', {}).get(component, {})
+            if component_config.get('notify_on_update', True):
+                notify_components.append(component)
+
+        if not notify_components:
+            return
+
+        # Format component names
+        component_map = {
+            'flaresolverr': 'FlareSolverr',
+            'playwright': 'Playwright Browsers',
+            'yt_dlp': 'yt-dlp',
+            'python_packages': 'Python Packages'
+        }
+
+        formatted_names = [component_map.get(c, c) for c in notify_components]
+
+        title = "🔄 Dependencies Updated"
+        if len(formatted_names) == 1:
+            message = f"{formatted_names[0]} has been updated to the latest version."
+        else:
+            message = f"The following components have been updated:\n\n"
+            for name in formatted_names:
+                message += f"• {name}\n"
+
+        message += f"\nUpdated at: {datetime.now().strftime('%b %d, %I:%M %p')}"
+
+        try:
+            priority = self.config.get('pushover', {}).get('priority', -1)
+            sound = self.config.get('pushover', {}).get('sound', 'magic')
+
+            self.pushover.send_notification(
+                title=title,
+                message=message,
+                priority=priority,
+                sound=sound
+            )
+
+            self.logger.info(f"Sent update notification for: {', '.join(formatted_names)}")
+        except Exception as e:
+            self.logger.error(f"Failed to send update notification: {e}")
+
+    def get_update_status(self) -> Dict:
+        """Get current update status for all components"""
+        return self.state.copy()
+
+    def force_update_check(self) -> Dict[str, bool]:
+        """Force immediate update check regardless of interval or scheduler mode"""
+        return self.check_and_update_all(force=True)
--- a/modules/discovery_system.py
+++ b/modules/discovery_system.py
--- a/modules/download_manager.py
+++ b/modules/download_manager.py
@@ -0,0 +1,940 @@
+#!/usr/bin/env python3
+"""
+Multi-threaded Download Manager
+Handles concurrent downloads with rate limiting, retries, and progress tracking
+Can be used by forum_downloader, fastdl_module, and other downloaders
+"""
+
+import os
+import re
+import time
+import hashlib
+import requests
+import threading
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional, Any, Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from threading import Lock, Semaphore
+from dataclasses import dataclass
+import sqlite3
+from urllib.parse import urlparse
+from modules.base_module import LoggingMixin
+from modules.universal_logger import get_logger
+
+logger = get_logger('DownloadManager')  # For standalone/example usage
+
+
+@dataclass
+class DownloadItem:
+    """Single download item"""
+    url: str
+    save_path: Path
+    referer: Optional[str] = None
+    headers: Optional[Dict[str, str]] = None
+    metadata: Optional[Dict[str, Any]] = None
+    post_date: Optional[datetime] = None  # Timestamp to set on downloaded file
+    retry_count: int = 0
+    max_retries: int = 3
+    
+    
+@dataclass
+class DownloadResult:
+    """Result of a download"""
+    success: bool
+    item: DownloadItem
+    file_size: Optional[int] = None
+    download_time: Optional[float] = None
+    error: Optional[str] = None
+    file_hash: Optional[str] = None
+
+
+class DownloadManager(LoggingMixin):
+    """
+    Multi-threaded download manager with:
+    - Concurrent downloads
+    - Rate limiting
+    - Automatic retries
+    - Progress tracking
+    - Database tracking
+    - Playwright support for authenticated downloads
+    """
+    
+    def __init__(self,
+                 max_workers: int = 5,
+                 rate_limit: float = 0.5,
+                 timeout: int = 30,
+                 chunk_size: int = 8192,
+                 use_database: bool = False,
+                 db_path: str = None,
+                 show_progress: bool = True,
+                 show_debug: bool = False):
+        """
+        Initialize download manager
+        
+        Args:
+            max_workers: Maximum concurrent downloads
+            rate_limit: Seconds between downloads per thread
+            timeout: Download timeout in seconds
+            chunk_size: Chunk size for streaming downloads
+            use_database: Track downloads in database
+            db_path: Path to database file
+            show_progress: Show download progress
+            show_debug: Show debug messages
+        """
+        self.max_workers = max_workers
+        self.rate_limit = rate_limit
+        self.timeout = timeout
+        self.chunk_size = chunk_size
+        self.use_database = use_database
+        self.db_path = db_path
+        self.show_progress = show_progress
+
+        # Initialize logging via mixin
+        self._init_logger('DownloadManager', None, default_module='Download', show_debug=show_debug)
+
+        # Thread synchronization
+        self.download_lock = Lock()
+        self.rate_limiter = Semaphore(max_workers)
+        self.last_download_time = {}
+
+        # Thread-local storage for ImageBam sessions (each thread gets its own session)
+        self._imagebam_session_local = threading.local()
+        
+        # Statistics
+        self.stats = {
+            'total': 0,
+            'successful': 0,
+            'failed': 0,
+            'skipped': 0,
+            'total_bytes': 0,
+            'total_time': 0
+        }
+        
+        # User agent
+        self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+        
+        # Playwright context for authenticated downloads
+        self.playwright_context = None
+        
+        # Initialize database only if explicitly enabled AND path provided
+        if self.use_database and self.db_path:
+            self._init_database()
+        elif self.use_database and not self.db_path:
+            # Disable database if no path provided to prevent creating files in CWD
+            self.use_database = False
+
+    def _init_database(self):
+        """Initialize download tracking database"""
+        if not self.db_path:
+            return
+        conn = sqlite3.connect(self.db_path)
+        try:
+            cursor = conn.cursor()
+
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS downloads (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    url TEXT UNIQUE NOT NULL,
+                    file_path TEXT NOT NULL,
+                    file_hash TEXT,
+                    file_size INTEGER,
+                    download_date DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    metadata TEXT
+                )
+            ''')
+
+            cursor.execute('''
+                CREATE INDEX IF NOT EXISTS idx_downloads_url ON downloads(url)
+            ''')
+            cursor.execute('''
+                CREATE INDEX IF NOT EXISTS idx_downloads_hash ON downloads(file_hash)
+            ''')
+
+            conn.commit()
+        finally:
+            conn.close()
+    
+    def set_playwright_context(self, context):
+        """Set Playwright context for authenticated downloads"""
+        self.playwright_context = context
+        # Extract cookies from context for requests library
+        if context:
+            try:
+                self.cookies = {}
+                cookies = context.cookies()
+                for cookie in cookies:
+                    self.cookies[cookie['name']] = cookie['value']
+            except Exception:
+                self.cookies = {}
+    
+    def _is_already_downloaded(self, url: str, file_path: Path) -> bool:
+        """Check if file was already downloaded"""
+        if not self.use_database:
+            return file_path.exists() and file_path.stat().st_size > 0
+
+        conn = sqlite3.connect(self.db_path)
+        try:
+            cursor = conn.cursor()
+
+            cursor.execute(
+                "SELECT file_path, file_size FROM downloads WHERE url = ?",
+                (url,)
+            )
+            result = cursor.fetchone()
+        finally:
+            conn.close()
+
+        if result:
+            # Check if file still exists and has expected size
+            saved_path = Path(result[0])
+            if saved_path.exists() and saved_path.stat().st_size == result[1]:
+                return True
+        
+        return False
+    
+    def _apply_rate_limit(self, thread_id: int):
+        """Apply rate limiting per thread"""
+        with self.download_lock:
+            if thread_id in self.last_download_time:
+                elapsed = time.time() - self.last_download_time[thread_id]
+                if elapsed < self.rate_limit:
+                    time.sleep(self.rate_limit - elapsed)
+            self.last_download_time[thread_id] = time.time()
+    
+    def _extract_pixhost_direct_url(self, show_url: str) -> Optional[str]:
+        """Extract direct image URL from pixhost show URL"""
+        try:
+            # Pattern to extract ID and filename from show URL
+            show_pattern = re.compile(r"https?://(?:www\.)?pixhost\.to/show/(\d+)/([^/]+)$", re.IGNORECASE)
+            match = show_pattern.match(show_url)
+            
+            if not match:
+                return None
+            
+            img_id = match.group(1)
+            filename = match.group(2)
+            
+            # Try common hosts in order
+            common_hosts = [1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100]
+            
+            for host_num in common_hosts:
+                test_url = f"https://img{host_num}.pixhost.to/images/{img_id}/{filename}"
+                
+                try:
+                    # Quick HEAD request to check if URL exists
+                    response = requests.head(test_url, timeout=2, allow_redirects=False)
+                    if response.status_code == 200:
+                        return test_url
+                except requests.RequestException:
+                    continue
+
+            # Try sequential scan if common hosts don't work
+            for host_num in range(1, 121):
+                if host_num in common_hosts:
+                    continue
+
+                test_url = f"https://img{host_num}.pixhost.to/images/{img_id}/{filename}"
+
+                try:
+                    response = requests.head(test_url, timeout=1, allow_redirects=False)
+                    if response.status_code == 200:
+                        return test_url
+                except requests.RequestException:
+                    continue
+            
+            return None
+        except Exception as e:
+            self.log(f"Error extracting pixhost URL: {e}", "error")
+            return None
+    
+    def _extract_imagebam_direct_url(self, imagebam_url: str) -> Optional[str]:
+        """Extract direct image URL from ImageBam page"""
+        try:
+            # Get or create thread-local ImageBam session (thread-safe)
+            session = getattr(self._imagebam_session_local, 'session', None)
+            if session is None:
+                session = requests.Session()
+                session.headers.update({
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+                })
+                # Set cookies to bypass the interstitial ad page (both old and new cookies)
+                session.cookies.set('nsfw_inter', '1', domain='.imagebam.com')
+                session.cookies.set('sfw_inter', '1', domain='.imagebam.com')
+                self._imagebam_session_local.session = session
+
+            # ImageBam now requires two requests - first to get session cookies, second to get image
+            # First request sets up the session
+            response = session.get(imagebam_url, timeout=5)
+
+            if response.status_code != 200:
+                self.log(f"ImageBam page returned {response.status_code}", "warning")
+                return None
+
+            # Check if we got the interstitial page (contains "Continue to your image")
+            if 'Continue to your image' in response.text or 'Please wait' in response.text:
+                # Make sure bypass cookies are set and request again
+                session.cookies.set('sfw_inter', '1', domain='.imagebam.com')
+                session.cookies.set('nsfw_inter', '1', domain='.imagebam.com')
+                response = session.get(imagebam_url, timeout=5)
+
+            # Look for the direct image URL in the HTML
+            # ImageBam stores the full image with _o suffix
+            # First try to find the full resolution image
+            full_img_pattern = r'(https?://images\d*\.imagebam\.com/[a-f0-9/]+/[A-Z0-9]+_o\.\w+)'
+            matches = re.findall(full_img_pattern, response.text, re.IGNORECASE)
+
+            if matches:
+                # Return the first full resolution image found
+                direct_url = matches[0]
+                self.log(f"Extracted ImageBam direct URL: {direct_url}", "debug")
+                return direct_url
+
+            # Fallback: look for any image on images*.imagebam.com
+            fallback_patterns = [
+                r'<img[^>]+src="(https?://images\d*\.imagebam\.com/[^"]+)"',
+                r'"(https?://images\d*\.imagebam\.com/[^"]+\.(?:jpg|jpeg|png|gif))"',
+            ]
+
+            for pattern in fallback_patterns:
+                matches = re.findall(pattern, response.text, re.IGNORECASE)
+                if matches:
+                    direct_url = matches[0]
+                    self.log(f"Extracted ImageBam direct URL (fallback): {direct_url}", "debug")
+                    return direct_url
+
+            self.log("No direct image URL found in ImageBam HTML", "warning")
+            return None
+
+        except requests.Timeout:
+            self.log(f"ImageBam extraction timed out for {imagebam_url}", "warning")
+            return None
+        except Exception as e:
+            self.log(f"Error extracting ImageBam URL: {e}", "error")
+            return None
+
+    def _download_with_gallery_dl(self, item: DownloadItem) -> DownloadResult:
+        """Download using gallery-dl for supported hosts (ImageTwist, etc.)"""
+        import subprocess
+        start_time = time.time()
+
+        try:
+            # Ensure parent directory exists
+            item.save_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Build gallery-dl command
+            cmd = [
+                "gallery-dl",
+                "--dest", str(item.save_path.parent),
+                "--filename", item.save_path.name,
+                "--no-skip",
+                "--no-part",
+                "--quiet"
+            ]
+
+            # Add referer if provided
+            if item.referer:
+                cmd.extend(["--header", f"Referer: {item.referer}"])
+
+            cmd.append(item.url)
+
+            # Run gallery-dl with timeout
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=60
+            )
+
+            if result.returncode == 0 and item.save_path.exists():
+                file_size = item.save_path.stat().st_size
+                download_time = time.time() - start_time
+
+                # Calculate hash (SHA256 for consistency with unified database)
+                with open(item.save_path, 'rb') as f:
+                    file_hash = hashlib.sha256(f.read()).hexdigest()
+
+                # Set file timestamp if we have a date
+                if item.post_date:
+                    try:
+                        timestamp_unix = item.post_date.timestamp()
+                        os.utime(item.save_path, (timestamp_unix, timestamp_unix))
+                    except Exception as e:
+                        self.log(f"Failed to set timestamp: {e}", "warning")
+
+                self.log(f"Downloaded via gallery-dl: {item.save_path.name}", "success")
+                return DownloadResult(
+                    success=True,
+                    item=item,
+                    file_size=file_size,
+                    download_time=download_time,
+                    file_hash=file_hash
+                )
+            else:
+                error_msg = result.stderr or "Unknown error"
+                return DownloadResult(
+                    success=False,
+                    item=item,
+                    error=f"gallery-dl failed: {error_msg}"
+                )
+
+        except subprocess.TimeoutExpired:
+            return DownloadResult(
+                success=False,
+                item=item,
+                error="gallery-dl timed out"
+            )
+        except Exception as e:
+            return DownloadResult(
+                success=False,
+                item=item,
+                error=str(e)
+            )
+
+    def _download_from_imagetwist(self, item: DownloadItem) -> DownloadResult:
+        """Download image from ImageTwist using gallery-dl for URL resolution"""
+        import subprocess
+        start_time = time.time()
+
+        # Rate limiting for ImageTwist (they return error images if too fast)
+        if not hasattr(self, '_imagetwist_last_request'):
+            self._imagetwist_last_request = 0
+
+        with self.download_lock:
+            elapsed = time.time() - self._imagetwist_last_request
+            if elapsed < 2.0:  # Minimum 2 seconds between ImageTwist requests
+                time.sleep(2.0 - elapsed)
+            self._imagetwist_last_request = time.time()
+
+        try:
+            # Use gallery-dl to get the actual image URL
+            result = subprocess.run(
+                ['/opt/media-downloader/venv/bin/gallery-dl', '-g', item.url],
+                capture_output=True, text=True, timeout=30
+            )
+
+            if result.returncode != 0 or not result.stdout.strip():
+                # Fallback to manual parsing
+                return self._download_from_imagetwist_fallback(item, start_time)
+
+            img_url = result.stdout.strip().split('\n')[0]
+
+            if not img_url or 'imagetwist' not in img_url:
+                return self._download_from_imagetwist_fallback(item, start_time)
+
+            # Rate limit again before actual download
+            with self.download_lock:
+                elapsed = time.time() - self._imagetwist_last_request
+                if elapsed < 2.0:
+                    time.sleep(2.0 - elapsed)
+                self._imagetwist_last_request = time.time()
+
+            # Download the actual image - use imagetwist page as Referer
+            item.save_path.parent.mkdir(parents=True, exist_ok=True)
+
+            headers = {
+                'User-Agent': self.user_agent,
+                'Referer': item.url  # Use imagetwist page URL as Referer
+            }
+
+            img_response = requests.get(img_url, headers=headers, timeout=30, stream=True)
+            img_response.raise_for_status()
+
+            # Check for ImageTwist error placeholder (8346 bytes - rate limited or deleted)
+            content_length = img_response.headers.get('Content-Length', '')
+            if content_length == '8346':
+                self.log(f"ImageTwist rate limited or unavailable: {item.url}", "warning")
+                return DownloadResult(success=False, item=item, error="ImageTwist error image (rate limited)")
+
+            # Validate it's an image, not HTML
+            chunks = []
+            for chunk in img_response.iter_content(chunk_size=8192):
+                if not chunks:  # First chunk
+                    if chunk[:100].lower().find(b'<html') != -1 or chunk[:100].lower().find(b'<!doctype') != -1:
+                        return DownloadResult(
+                            success=False,
+                            item=item,
+                            error="Got HTML instead of image"
+                        )
+                chunks.append(chunk)
+
+            # Save the image
+            with open(item.save_path, 'wb') as f:
+                for chunk in chunks:
+                    f.write(chunk)
+
+            file_size = item.save_path.stat().st_size
+            download_time = time.time() - start_time
+
+            # Calculate hash (SHA256 for consistency with unified database)
+            with open(item.save_path, 'rb') as f:
+                file_hash = hashlib.sha256(f.read()).hexdigest()
+
+            # Set file timestamp if we have a date
+            if item.post_date:
+                try:
+                    timestamp_unix = item.post_date.timestamp()
+                    os.utime(item.save_path, (timestamp_unix, timestamp_unix))
+                except Exception:
+                    pass
+
+            self.log(f"Downloaded ImageTwist: {item.save_path.name}", "success")
+            return DownloadResult(
+                success=True,
+                item=item,
+                file_size=file_size,
+                download_time=download_time,
+                file_hash=file_hash
+            )
+
+        except Exception as e:
+            return DownloadResult(
+                success=False,
+                item=item,
+                error=f"ImageTwist download failed: {e}"
+            )
+
+    def _download_from_imagetwist_fallback(self, item: DownloadItem, start_time: float) -> DownloadResult:
+        """Fallback method using manual page parsing"""
+        from bs4 import BeautifulSoup
+        import re
+
+        try:
+            headers = {
+                'User-Agent': self.user_agent,
+                'Referer': item.referer or 'https://forum.phun.org/'
+            }
+
+            response = requests.get(item.url, headers=headers, timeout=30)
+            response.raise_for_status()
+
+            page_content = response.text
+            img_url = None
+
+            # Method 1: Look for pic class
+            soup = BeautifulSoup(page_content, 'html.parser')
+            pic_img = soup.find('img', class_='pic')
+            if pic_img and pic_img.get('src'):
+                img_url = pic_img['src']
+
+            # Method 2: Regex for i*.imagetwist.com/i/ pattern
+            if not img_url:
+                match = re.search(r'(https?://i\d*(?:phun)?\.imagetwist\.com/i/[^"\'>\s]+)', page_content)
+                if match:
+                    img_url = match.group(1)
+
+            if not img_url:
+                return DownloadResult(
+                    success=False,
+                    item=item,
+                    error="Could not find direct image URL on ImageTwist page"
+                )
+
+            # Download the actual image
+            item.save_path.parent.mkdir(parents=True, exist_ok=True)
+
+            img_response = requests.get(img_url, headers=headers, timeout=30, stream=True)
+            img_response.raise_for_status()
+
+            chunks = []
+            for chunk in img_response.iter_content(chunk_size=8192):
+                if not chunks:
+                    if chunk[:100].lower().find(b'<html') != -1:
+                        return DownloadResult(success=False, item=item, error="Got HTML instead of image")
+                chunks.append(chunk)
+
+            with open(item.save_path, 'wb') as f:
+                for chunk in chunks:
+                    f.write(chunk)
+
+            file_size = item.save_path.stat().st_size
+            download_time = time.time() - start_time
+
+            with open(item.save_path, 'rb') as f:
+                file_hash = hashlib.sha256(f.read()).hexdigest()
+
+            self.log(f"Downloaded ImageTwist (fallback): {item.save_path.name}", "success")
+            return DownloadResult(success=True, item=item, file_size=file_size, download_time=download_time, file_hash=file_hash)
+
+        except Exception as e:
+            return DownloadResult(success=False, item=item, error=f"ImageTwist fallback failed: {e}")
+
+    def _download_with_playwright(self, item: DownloadItem) -> DownloadResult:
+        """Download using Playwright for authenticated sessions"""
+        if not self.playwright_context:
+            return self._download_with_requests(item)
+        
+        start_time = time.time()
+        
+        try:
+            page = self.playwright_context.new_page()
+            try:
+                # Set headers
+                headers = item.headers or {}
+                if item.referer:
+                    headers['Referer'] = item.referer
+                if headers:
+                    page.set_extra_http_headers(headers)
+                
+                # Direct download (pixhost should already be processed)
+                response = page.goto(item.url, wait_until='networkidle', 
+                                   timeout=self.timeout * 1000)
+                
+                if response and response.ok:
+                    content = response.body()
+                    
+                    # Check for HTML error pages
+                    if content[:1000].lower().find(b'<!doctype') != -1 or \
+                       content[:1000].lower().find(b'<html') != -1:
+                        return DownloadResult(
+                            success=False,
+                            item=item,
+                            error="Got HTML instead of expected file"
+                        )
+                    
+                    # Save file
+                    item.save_path.parent.mkdir(parents=True, exist_ok=True)
+                    item.save_path.write_bytes(content)
+                    
+                    # Calculate hash (SHA256 for consistency with unified database)
+                    file_hash = hashlib.sha256(content).hexdigest()
+                    
+                    # Update timestamps if we have a date
+                    if item.post_date:
+                        try:
+                            timestamp_unix = item.post_date.timestamp()
+                            os.utime(item.save_path, (timestamp_unix, timestamp_unix))
+                            self.log(f"Set timestamp to {item.post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
+                        except Exception as e:
+                            self.log(f"Failed to set timestamp: {e}", "warning")
+                    
+                    download_time = time.time() - start_time
+                    
+                    return DownloadResult(
+                        success=True,
+                        item=item,
+                        file_size=len(content),
+                        download_time=download_time,
+                        file_hash=file_hash
+                    )
+                else:
+                    return DownloadResult(
+                        success=False,
+                        item=item,
+                        error=f"HTTP {response.status if response else 'No response'}"
+                    )
+                    
+            finally:
+                page.close()
+                
+        except Exception as e:
+            return DownloadResult(
+                success=False,
+                item=item,
+                error=str(e)
+            )
+    
+    def _download_with_requests(self, item: DownloadItem) -> DownloadResult:
+        """Download using requests library"""
+        start_time = time.time()
+        
+        try:
+            headers = item.headers or {}
+            headers['User-Agent'] = self.user_agent
+            if item.referer:
+                headers['Referer'] = item.referer
+            
+            # Use cookies if available
+            cookies = getattr(self, 'cookies', {})
+            
+            response = requests.get(
+                item.url,
+                headers=headers,
+                cookies=cookies if cookies else None,
+                timeout=self.timeout,
+                stream=True
+            )
+            response.raise_for_status()
+            
+            # Stream download to memory first to validate content
+            item.save_path.parent.mkdir(parents=True, exist_ok=True)
+            content = b''
+            first_chunk_checked = False
+
+            for chunk in response.iter_content(chunk_size=self.chunk_size):
+                if chunk:
+                    # Check first chunk for HTML error pages
+                    if not first_chunk_checked:
+                        first_chunk_checked = True
+                        if chunk[:100].lower().find(b'<html') != -1 or \
+                           chunk[:100].lower().find(b'<!doctype') != -1 or \
+                           chunk[:100].lower().find(b'<head>') != -1:
+                            return DownloadResult(
+                                success=False,
+                                item=item,
+                                error="Got HTML instead of image"
+                            )
+                    content += chunk
+
+            # Save to file only after validation
+            with open(item.save_path, 'wb') as f:
+                f.write(content)
+
+            # Calculate hash (SHA256 for consistency with unified database)
+            file_hash = hashlib.sha256(content).hexdigest()
+
+            # Set file timestamp if we have a date
+            if item.post_date:
+                try:
+                    timestamp_unix = item.post_date.timestamp()
+                    os.utime(item.save_path, (timestamp_unix, timestamp_unix))
+                    self.log(f"Set timestamp to {item.post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
+                except Exception as e:
+                    self.log(f"Failed to set timestamp: {e}", "warning")
+
+            download_time = time.time() - start_time
+
+            return DownloadResult(
+                success=True,
+                item=item,
+                file_size=len(content),
+                download_time=download_time,
+                file_hash=file_hash
+            )
+            
+        except Exception as e:
+            # Clean up partial download
+            if item.save_path.exists():
+                item.save_path.unlink()
+            
+            return DownloadResult(
+                success=False,
+                item=item,
+                error=str(e)
+            )
+    
+    def _download_worker(self, item: DownloadItem, thread_id: int) -> DownloadResult:
+        """Worker function for downloading a single item"""
+        # Process image hosting URLs to get direct URLs
+        if 'pixhost.to/show/' in item.url:
+            direct_url = self._extract_pixhost_direct_url(item.url)
+            if direct_url:
+                self.log(f"Converted pixhost URL to direct: {direct_url.split('/')[-1]}", "debug")
+                item.url = direct_url
+            else:
+                self.log(f"Failed to extract pixhost direct URL: {item.url}", "warning")
+        
+        elif 'imagebam.com' in item.url:
+            direct_url = self._extract_imagebam_direct_url(item.url)
+            if direct_url:
+                self.log(f"Converted ImageBam URL to direct: {direct_url.split('/')[-1]}", "debug")
+                item.url = direct_url
+            else:
+                self.log(f"Failed to extract ImageBam direct URL: {item.url}", "warning")
+
+        elif 'imagetwist.com' in item.url:
+            # ImageTwist requires parsing the page to get direct image URL
+            result = self._download_from_imagetwist(item)
+            if result.success:
+                return result
+            self.log(f"ImageTwist download failed: {item.url}", "warning")
+
+        # Check if already downloaded
+        if self._is_already_downloaded(item.url, item.save_path):
+            self.log(f"Already downloaded: {item.save_path.name}", "skip")
+            return DownloadResult(
+                success=True,
+                item=item,
+                file_size=item.save_path.stat().st_size if item.save_path.exists() else 0
+            )
+        
+        # Apply rate limiting
+        self._apply_rate_limit(thread_id)
+        
+        # Always use requests for direct image downloads (faster)
+        result = self._download_with_requests(item)
+        
+        # Handle retries
+        if not result.success and item.retry_count < item.max_retries:
+            item.retry_count += 1
+            self.log(f"Retrying {item.url} ({item.retry_count}/{item.max_retries})", "warning")
+            time.sleep(self.rate_limit * 2)  # Extra delay before retry
+            return self._download_worker(item, thread_id)
+        
+        # Save to database if successful
+        if result.success and self.use_database:
+            self._save_to_database(result)
+        
+        # Update statistics
+        with self.download_lock:
+            if result.success:
+                self.stats['successful'] += 1
+                if result.file_size:
+                    self.stats['total_bytes'] += result.file_size
+                if result.download_time:
+                    self.stats['total_time'] += result.download_time
+            else:
+                self.stats['failed'] += 1
+        
+        return result
+    
+    def _save_to_database(self, result: DownloadResult):
+        """Save successful download to database"""
+        conn = sqlite3.connect(self.db_path)
+        try:
+            cursor = conn.cursor()
+
+            metadata_str = None
+            if result.item.metadata:
+                import json
+                metadata_str = json.dumps(result.item.metadata)
+
+            cursor.execute('''
+                INSERT OR REPLACE INTO downloads
+                (url, file_path, file_hash, file_size, metadata)
+                VALUES (?, ?, ?, ?, ?)
+            ''', (
+                result.item.url,
+                str(result.item.save_path),
+                result.file_hash,
+                result.file_size,
+                metadata_str
+            ))
+
+            conn.commit()
+        finally:
+            conn.close()
+    
+    def download_batch(self, items: List[DownloadItem], 
+                      progress_callback: Optional[Callable] = None) -> List[DownloadResult]:
+        """
+        Download multiple items concurrently
+        
+        Args:
+            items: List of DownloadItem objects
+            progress_callback: Optional callback for progress updates
+        
+        Returns:
+            List of DownloadResult objects
+        """
+        self.stats['total'] = len(items)
+        results = []
+        
+        self.log(f"Starting batch download of {len(items)} items with {self.max_workers} workers", "info")
+        
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            # Submit all downloads
+            futures = {
+                executor.submit(self._download_worker, item, i % self.max_workers): item
+                for i, item in enumerate(items)
+            }
+            
+            # Process completed downloads
+            completed = 0
+            for future in as_completed(futures):
+                result = future.result()
+                results.append(result)
+                completed += 1
+                
+                # Progress update
+                if progress_callback:
+                    progress_callback(completed, len(items), result)
+                
+                if self.show_progress:
+                    pct = (completed / len(items)) * 100
+                    status = "✓" if result.success else "✗"
+                    self.log(
+                        f"[{completed}/{len(items)}] {pct:.1f}% - {status} {result.item.save_path.name}",
+                        "success" if result.success else "error"
+                    )
+        
+        # Summary
+        self.log(f"Batch complete: {self.stats['successful']} successful, {self.stats['failed']} failed", "info")
+        
+        if self.stats['successful'] > 0:
+            avg_speed = self.stats['total_bytes'] / self.stats['total_time'] / 1024 / 1024
+            self.log(f"Average speed: {avg_speed:.2f} MB/s", "info")
+        
+        return results
+    
+    def download_urls(self, urls: List[str], base_path: Path, 
+                     referer: Optional[str] = None,
+                     metadata: Optional[Dict] = None) -> List[DownloadResult]:
+        """
+        Convenience method to download URLs to a directory
+        
+        Args:
+            urls: List of URLs to download
+            base_path: Directory to save files
+            referer: Optional referer header
+            metadata: Optional metadata for all downloads
+        
+        Returns:
+            List of DownloadResult objects
+        """
+        items = []
+        for url in urls:
+            filename = os.path.basename(urlparse(url).path) or f"download_{hashlib.sha256(url.encode()).hexdigest()[:8]}"
+            save_path = base_path / filename
+            
+            items.append(DownloadItem(
+                url=url,
+                save_path=save_path,
+                referer=referer,
+                metadata=metadata
+            ))
+        
+        return self.download_batch(items)
+    
+    def get_statistics(self) -> Dict:
+        """Get download statistics"""
+        return self.stats.copy()
+    
+    def cleanup_old_downloads(self, days: int = 30):
+        """Remove old download records from database"""
+        if not self.use_database:
+            return 0
+
+        conn = sqlite3.connect(self.db_path)
+        try:
+            cursor = conn.cursor()
+
+            cursor.execute('''
+                DELETE FROM downloads
+                WHERE download_date < datetime('now', ? || ' days')
+            ''', (-days,))
+
+            deleted = cursor.rowcount
+            conn.commit()
+        finally:
+            conn.close()
+
+        self.log(f"Cleaned up {deleted} old download records", "info")
+        return deleted
+
+
+# Example usage
+if __name__ == "__main__":
+    from pathlib import Path
+    
+    # Test download manager
+    manager = DownloadManager(
+        max_workers=3,
+        rate_limit=0.5,
+        show_progress=True
+    )
+    
+    # Test URLs
+    urls = [
+        "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
+        "https://sample-videos.com/img/Sample-jpg-image-50kb.jpg",
+        "https://www.w3schools.com/html/img_girl.jpg"
+    ]
+    
+    # Download
+    results = manager.download_urls(urls, Path("/tmp/test-downloads"))
+    
+    # Print results
+    logger.info(f"Downloaded {len([r for r in results if r.success])} of {len(results)} files")
+    logger.info(f"Total bytes: {manager.stats['total_bytes'] / 1024:.1f} KB")
+    logger.info(f"Total time: {manager.stats['total_time']:.2f} seconds")
--- a/modules/downloader_monitor.py
+++ b/modules/downloader_monitor.py
@@ -0,0 +1,375 @@
+#!/usr/bin/env python3
+"""
+Downloader Monitoring Module
+Tracks download success/failure and sends alerts when downloaders are consistently failing
+"""
+
+import sqlite3
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List
+from modules.universal_logger import get_logger
+
+
+class DownloaderMonitor:
+    """Monitor downloader health and send alerts on persistent failures"""
+
+    def __init__(self, unified_db=None, settings_manager=None):
+        """
+        Initialize monitor
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            settings_manager: SettingsManager instance for config
+        """
+        self.db = unified_db
+        self.settings_manager = settings_manager
+        self.logger = get_logger('DownloaderMonitor')
+
+        # Default config
+        self.config = {
+            'enabled': True,
+            'failure_window_hours': 3,
+            'min_consecutive_failures': 2,
+            'pushover': {
+                'enabled': True,
+                'priority': 1  # High priority
+            },
+            'downloaders': {
+                'fastdl': True,
+                'imginn': True,
+                'toolzu': True,
+                'instagram': True,
+                'snapchat': True,
+                'tiktok': True,
+                'forums': True
+            }
+        }
+
+        # Load config from settings manager
+        if self.settings_manager:
+            try:
+                monitoring_config = self.settings_manager.get('monitoring', {})
+                if monitoring_config:
+                    self.config.update(monitoring_config)
+            except Exception as e:
+                self.logger.warning(f"Could not load monitoring config: {e}")
+
+    def log_download_attempt(self, downloader: str, username: str, success: bool,
+                            file_count: int = 0, error_message: str = None):
+        """
+        Log a download attempt
+
+        Args:
+            downloader: Downloader name (fastdl, imginn, toolzu, etc.)
+            username: Username being downloaded
+            success: Whether download succeeded
+            file_count: Number of files downloaded
+            error_message: Error message if failed
+        """
+        if not self.config.get('enabled', True):
+            return
+
+        # Check if this downloader is being monitored
+        if not self.config.get('downloaders', {}).get(downloader, True):
+            return
+
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("""
+                    INSERT INTO download_monitor
+                    (downloader, username, timestamp, success, file_count, error_message, alert_sent)
+                    VALUES (?, ?, ?, ?, ?, ?, 0)
+                """, (
+                    downloader,
+                    username,
+                    datetime.now().isoformat(),
+                    1 if success else 0,
+                    file_count,
+                    error_message
+                ))
+                conn.commit()
+
+                self.logger.debug(f"Logged {downloader}/{username}: {'success' if success else 'failure'} ({file_count} files)")
+
+                # Check if we should send an alert
+                if not success:
+                    self._check_and_alert(downloader, username)
+
+        except Exception as e:
+            self.logger.error(f"Failed to log download attempt: {e}")
+
+    def _check_and_alert(self, downloader: str, username: str):
+        """
+        Check if downloader has been failing consistently and send alert
+
+        Args:
+            downloader: Downloader name
+            username: Username
+        """
+        try:
+            window_hours = self.config.get('failure_window_hours', 3)
+            min_failures = self.config.get('min_consecutive_failures', 2)
+
+            cutoff_time = datetime.now() - timedelta(hours=window_hours)
+
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Get recent attempts within the window
+                cursor.execute("""
+                    SELECT timestamp, success, file_count, error_message, alert_sent
+                    FROM download_monitor
+                    WHERE downloader = ? AND username = ?
+                    AND timestamp > ?
+                    ORDER BY timestamp DESC
+                    LIMIT 10
+                """, (downloader, username, cutoff_time.isoformat()))
+
+                attempts = cursor.fetchall()
+
+                if not attempts:
+                    return
+
+                # Count consecutive failures from most recent
+                consecutive_failures = 0
+                latest_error = None
+                last_success_time = None
+
+                for attempt in attempts:
+                    if attempt['success'] == 0:
+                        consecutive_failures += 1
+                        if latest_error is None and attempt['error_message']:
+                            latest_error = attempt['error_message']
+                    else:
+                        last_success_time = attempt['timestamp']
+                        break
+
+                # Check if we should alert
+                if consecutive_failures >= min_failures:
+                    # Check if we already sent an alert recently
+                    cursor.execute("""
+                        SELECT COUNT(*) FROM download_monitor
+                        WHERE downloader = ? AND username = ?
+                        AND alert_sent = 1
+                        AND timestamp > ?
+                    """, (downloader, username, cutoff_time.isoformat()))
+
+                    result = cursor.fetchone()
+                    alert_count = result[0] if result else 0
+
+                    if alert_count == 0:
+                        # Send alert
+                        self._send_alert(
+                            downloader,
+                            username,
+                            consecutive_failures,
+                            last_success_time,
+                            latest_error
+                        )
+
+                        # Mark most recent failure as alerted
+                        cursor.execute("""
+                            UPDATE download_monitor
+                            SET alert_sent = 1
+                            WHERE id = (
+                                SELECT id FROM download_monitor
+                                WHERE downloader = ? AND username = ?
+                                ORDER BY timestamp DESC
+                                LIMIT 1
+                            )
+                        """, (downloader, username))
+                        conn.commit()
+
+        except Exception as e:
+            self.logger.error(f"Failed to check for alerts: {e}")
+
+    def _send_alert(self, downloader: str, username: str, failure_count: int,
+                   last_success_time: str, error_message: str):
+        """
+        Send Pushover alert for persistent failures
+
+        Args:
+            downloader: Downloader name
+            username: Username
+            failure_count: Number of consecutive failures
+            last_success_time: Timestamp of last success (ISO format)
+            error_message: Latest error message
+        """
+        if not self.config.get('pushover', {}).get('enabled', True):
+            return
+
+        try:
+            from modules.pushover_notifier import PushoverNotifier
+
+            # Get pushover config from settings
+            pushover_config = {}
+            if self.settings_manager:
+                pushover_config = self.settings_manager.get('pushover', {})
+
+            if not pushover_config.get('enabled'):
+                return
+
+            notifier = PushoverNotifier(
+                api_token=pushover_config.get('api_token'),
+                user_key=pushover_config.get('user_key')
+            )
+
+            # Calculate time since last success
+            time_since_success = "Never"
+            if last_success_time:
+                try:
+                    last_success = datetime.fromisoformat(last_success_time)
+                    delta = datetime.now() - last_success
+                    hours = int(delta.total_seconds() / 3600)
+                    if hours < 24:
+                        time_since_success = f"{hours} hours ago"
+                    else:
+                        days = hours // 24
+                        time_since_success = f"{days} days ago"
+                except (ValueError, TypeError) as e:
+                    self.logger.warning(f"Failed to parse last_success_time '{last_success_time}': {e}")
+                    time_since_success = "Unknown (parse error)"
+
+            # Format downloader name nicely
+            downloader_display = downloader.replace('_', ' ').title()
+
+            # Build message
+            title = f"🚨 {downloader_display} Failing"
+            message = f"""Downloader has been failing for {self.config.get('failure_window_hours', 3)}+ hours
+
+Username: {username}
+Consecutive Failures: {failure_count}
+Last Success: {time_since_success}
+Latest Error: {error_message or 'Unknown'}
+
+Check logs for details."""
+
+            # Send notification with high priority
+            notifier.send_notification(
+                title=title,
+                message=message,
+                priority=self.config.get('pushover', {}).get('priority', 1)
+            )
+
+            self.logger.warning(f"Sent alert for {downloader}/{username} ({failure_count} failures)")
+
+        except Exception as e:
+            self.logger.error(f"Failed to send alert: {e}")
+
+    def get_downloader_status(self, downloader: str = None, hours: int = 24) -> List[Dict]:
+        """
+        Get recent status for downloader(s)
+
+        Args:
+            downloader: Specific downloader (None = all)
+            hours: How many hours to look back
+
+        Returns:
+            List of status dicts with stats per downloader
+        """
+        try:
+            cutoff = datetime.now() - timedelta(hours=hours)
+
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                if downloader:
+                    cursor.execute("""
+                        SELECT
+                            downloader,
+                            COUNT(*) as total_attempts,
+                            SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
+                            SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failed,
+                            SUM(file_count) as total_files,
+                            MAX(CASE WHEN success = 1 THEN timestamp END) as last_success,
+                            MAX(timestamp) as last_attempt
+                        FROM download_monitor
+                        WHERE downloader = ? AND timestamp > ?
+                        GROUP BY downloader
+                    """, (downloader, cutoff.isoformat()))
+                else:
+                    cursor.execute("""
+                        SELECT
+                            downloader,
+                            COUNT(*) as total_attempts,
+                            SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
+                            SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failed,
+                            SUM(file_count) as total_files,
+                            MAX(CASE WHEN success = 1 THEN timestamp END) as last_success,
+                            MAX(timestamp) as last_attempt
+                        FROM download_monitor
+                        WHERE timestamp > ?
+                        GROUP BY downloader
+                        ORDER BY downloader
+                    """, (cutoff.isoformat(),))
+
+                results = []
+                for row in cursor.fetchall():
+                    results.append({
+                        'downloader': row['downloader'],
+                        'total_attempts': row['total_attempts'],
+                        'successful': row['successful'] or 0,
+                        'failed': row['failed'] or 0,
+                        'total_files': row['total_files'] or 0,
+                        'success_rate': round((row['successful'] or 0) / row['total_attempts'] * 100, 1) if row['total_attempts'] > 0 else 0,
+                        'last_success': row['last_success'],
+                        'last_attempt': row['last_attempt']
+                    })
+
+                return results
+
+        except Exception as e:
+            self.logger.error(f"Failed to get downloader status: {e}")
+            return []
+
+    def clear_old_logs(self, days: int = 30):
+        """
+        Clear monitoring logs older than specified days
+
+        Args:
+            days: How many days to keep
+        """
+        try:
+            cutoff = datetime.now() - timedelta(days=days)
+
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("""
+                    DELETE FROM download_monitor
+                    WHERE timestamp < ?
+                """, (cutoff.isoformat(),))
+                deleted = cursor.rowcount
+                conn.commit()
+
+                self.logger.info(f"Cleared {deleted} old monitoring logs (older than {days} days)")
+
+        except Exception as e:
+            self.logger.error(f"Failed to clear old logs: {e}")
+
+
+# Singleton instance with thread-safe initialization
+_monitor_instance = None
+_monitor_instance_lock = __import__('threading').Lock()
+
+
+def get_monitor(unified_db=None, settings_manager=None):
+    """Get or create monitor singleton (thread-safe)"""
+    global _monitor_instance
+    if _monitor_instance is None:
+        with _monitor_instance_lock:
+            # Double-check inside lock to prevent race condition
+            if _monitor_instance is None:
+                # Auto-initialize database if not provided
+                if unified_db is None:
+                    from modules.unified_database import UnifiedDatabase
+                    unified_db = UnifiedDatabase()
+
+                # Auto-initialize settings manager if not provided
+                if settings_manager is None:
+                    from modules.settings_manager import SettingsManager
+                    settings_manager = SettingsManager('/opt/media-downloader/database/media_downloader.db')
+
+                _monitor_instance = DownloaderMonitor(unified_db, settings_manager)
+    return _monitor_instance
--- a/modules/easynews_client.py
+++ b/modules/easynews_client.py
@@ -0,0 +1,502 @@
+"""
+Easynews Client Module
+
+Provides a client for interacting with the Easynews API to search for and download files.
+All connections use HTTPS with HTTP Basic Auth.
+"""
+
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Callable, Dict, List, Optional
+from urllib.parse import quote, urljoin
+
+import requests
+from requests.auth import HTTPBasicAuth
+
+from modules.universal_logger import get_logger
+
+logger = get_logger('EasynewsClient')
+
+
+@dataclass
+class EasynewsResult:
+    """Represents a single search result from Easynews."""
+    filename: str
+    download_url: str
+    size_bytes: int
+    post_date: Optional[str]
+    subject: Optional[str]
+    poster: Optional[str]
+    newsgroup: Optional[str]
+    extension: Optional[str]
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'filename': self.filename,
+            'download_url': self.download_url,
+            'size_bytes': self.size_bytes,
+            'post_date': self.post_date,
+            'subject': self.subject,
+            'poster': self.poster,
+            'newsgroup': self.newsgroup,
+            'extension': self.extension,
+        }
+
+
+class EasynewsClient:
+    """
+    Client for interacting with Easynews search and download APIs.
+
+    All connections use HTTPS with HTTP Basic Auth.
+    Supports HTTP, HTTPS, SOCKS4, and SOCKS5 proxies.
+    """
+
+    BASE_URL = "https://members.easynews.com"
+    SEARCH_URL = "https://members.easynews.com/2.0/search/solr-search/advanced"
+
+    # Quality patterns for parsing
+    QUALITY_PATTERNS = [
+        (r'2160p|4k|uhd', '2160p'),
+        (r'1080p|fhd', '1080p'),
+        (r'720p|hd', '720p'),
+        (r'480p|sd', '480p'),
+        (r'360p', '360p'),
+    ]
+
+    # Audio codec patterns (order matters - check combinations first)
+    AUDIO_PATTERNS = [
+        (r'truehd.*atmos|atmos.*truehd', 'Atmos'),
+        (r'atmos', 'Atmos'),
+        (r'truehd', 'TrueHD'),
+        (r'dts[\.\-]?hd[\.\-]?ma', 'DTS-HD'),
+        (r'dts[\.\-]?hd', 'DTS-HD'),
+        (r'dts[\.\-]?x', 'DTS:X'),
+        (r'dts', 'DTS'),
+        (r'7[\.\-]?1', '7.1'),
+        (r'ddp[\.\-\s]?5[\.\-]?1|eac3|e[\.\-]?ac[\.\-]?3|dd[\.\-]?5[\.\-]?1|ac3|5[\.\-]?1', '5.1'),
+        (r'ddp|dd\+', '5.1'),
+        (r'aac[\.\-]?5[\.\-]?1', '5.1'),
+        (r'aac', 'AAC'),
+        (r'flac', 'FLAC'),
+        (r'mp3', 'MP3'),
+    ]
+
+    # Source/release type patterns
+    SOURCE_PATTERNS = [
+        (r'remux', 'Remux'),
+        (r'blu[\.\-]?ray|bdrip|brrip', 'BluRay'),
+        (r'web[\.\-]?dl', 'WEB-DL'),
+        (r'webrip', 'WEBRip'),
+        (r'web', 'WEB'),
+        (r'hdtv', 'HDTV'),
+        (r'dvdrip', 'DVDRip'),
+        (r'dvd', 'DVD'),
+        (r'hdcam|cam', 'CAM'),
+    ]
+
+    def __init__(
+        self,
+        username: str,
+        password: str,
+        proxy_enabled: bool = False,
+        proxy_type: str = 'http',
+        proxy_host: Optional[str] = None,
+        proxy_port: Optional[int] = None,
+        proxy_username: Optional[str] = None,
+        proxy_password: Optional[str] = None,
+    ):
+        """
+        Initialize the Easynews client.
+
+        Args:
+            username: Easynews username
+            password: Easynews password
+            proxy_enabled: Whether to use a proxy
+            proxy_type: Proxy type (http, https, socks4, socks5)
+            proxy_host: Proxy hostname/IP
+            proxy_port: Proxy port
+            proxy_username: Proxy auth username (optional)
+            proxy_password: Proxy auth password (optional)
+        """
+        self.username = username
+        self.password = password
+        self.auth = HTTPBasicAuth(username, password)
+
+        # Set up session with retry logic
+        self.session = requests.Session()
+        self.session.auth = self.auth
+
+        # Configure proxy if enabled
+        self.proxies = {}
+        if proxy_enabled and proxy_host and proxy_port:
+            proxy_url = self._build_proxy_url(
+                proxy_type, proxy_host, proxy_port,
+                proxy_username, proxy_password
+            )
+            self.proxies = {
+                'http': proxy_url,
+                'https': proxy_url,
+            }
+            self.session.proxies.update(self.proxies)
+            logger.info(f"Easynews client configured with {proxy_type} proxy: {proxy_host}:{proxy_port}")
+
+    def _build_proxy_url(
+        self,
+        proxy_type: str,
+        host: str,
+        port: int,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+    ) -> str:
+        """Build a proxy URL with optional authentication."""
+        scheme = proxy_type.lower()
+        if scheme not in ('http', 'https', 'socks4', 'socks5'):
+            scheme = 'http'
+
+        if username and password:
+            return f"{scheme}://{quote(username)}:{quote(password)}@{host}:{port}"
+        return f"{scheme}://{host}:{port}"
+
+    def test_connection(self) -> Dict[str, Any]:
+        """
+        Test the connection to Easynews with current credentials.
+
+        Returns:
+            Dict with 'success' bool and 'message' string
+        """
+        try:
+            # Try to access the members area
+            response = self.session.get(
+                f"{self.BASE_URL}/",
+                timeout=30,
+            )
+
+            if response.status_code == 200:
+                # Check if we're actually authenticated (not redirected to login)
+                if 'login' in response.url.lower() or 'sign in' in response.text.lower():
+                    return {
+                        'success': False,
+                        'message': 'Invalid credentials - authentication failed'
+                    }
+                return {
+                    'success': True,
+                    'message': 'Successfully connected to Easynews'
+                }
+            elif response.status_code == 401:
+                return {
+                    'success': False,
+                    'message': 'Invalid credentials - authentication failed'
+                }
+            else:
+                return {
+                    'success': False,
+                    'message': f'Unexpected response: HTTP {response.status_code}'
+                }
+        except requests.exceptions.ProxyError as e:
+            return {
+                'success': False,
+                'message': f'Proxy connection failed: {str(e)}'
+            }
+        except requests.exceptions.ConnectionError as e:
+            return {
+                'success': False,
+                'message': f'Connection failed: {str(e)}'
+            }
+        except requests.exceptions.Timeout:
+            return {
+                'success': False,
+                'message': 'Connection timed out'
+            }
+        except Exception as e:
+            logger.error(f"Easynews connection test failed: {e}")
+            return {
+                'success': False,
+                'message': f'Connection test failed: {str(e)}'
+            }
+
+    def search(
+        self,
+        query: str,
+        page: int = 1,
+        results_per_page: int = 50,
+        file_types: Optional[List[str]] = None,
+    ) -> List[EasynewsResult]:
+        """
+        Search Easynews for files matching the query.
+
+        Args:
+            query: Search query string
+            page: Page number (1-indexed)
+            results_per_page: Number of results per page (max 250)
+            file_types: Optional list of file extensions to filter (e.g., ['mkv', 'mp4'])
+
+        Returns:
+            List of EasynewsResult objects
+        """
+        try:
+            # Build search parameters
+            params = {
+                'gps': query,
+                'pby': min(results_per_page, 250),
+                'pno': page,
+                'sS': 1,  # Safe search off
+                'saession': '',  # Session
+                'sb': 1,  # Sort by date
+                'sbj': 1,  # Subject search
+                'fly': 2,  # File type filter mode
+                'fex': 'mkv,mp4',  # Only mkv and mp4 files
+            }
+
+            # Add file type filter if specified
+            if file_types:
+                params['fty[]'] = file_types
+            else:
+                # Default to video file types
+                params['fty[]'] = ['VIDEO']
+
+            response = self.session.get(
+                self.SEARCH_URL,
+                params=params,
+                timeout=60,
+            )
+            response.raise_for_status()
+
+            # Check for empty response
+            if not response.content or not response.content.strip():
+                logger.warning(f"Easynews search for '{query}' returned empty response (HTTP {response.status_code})")
+                return []
+
+            try:
+                data = response.json()
+            except (ValueError, Exception) as json_err:
+                logger.warning(f"Easynews search for '{query}' returned invalid JSON (HTTP {response.status_code}, body: {response.text[:200]}): {json_err}")
+                return []
+            results = []
+
+            # Parse the response
+            if 'data' in data and isinstance(data['data'], list):
+                for item in data['data']:
+                    result = self._parse_search_result(item)
+                    if result:
+                        results.append(result)
+
+            logger.info(f"Easynews search for '{query}' returned {len(results)} results")
+            return results
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Easynews search failed: {e}")
+            return []
+        except Exception as e:
+            logger.error(f"Error parsing Easynews search results: {e}")
+            return []
+
+    def _parse_search_result(self, item: Dict[str, Any]) -> Optional[EasynewsResult]:
+        """Parse a single search result from the API response."""
+        try:
+            # Extract the filename
+            filename = item.get('fn', '') or item.get('0', '')
+            if not filename:
+                return None
+
+            # Build download URL
+            # Format: https://username:password@members.easynews.com/dl/{hash}/{filename}
+            file_hash = item.get('hash', '') or item.get('0', '')
+            sig = item.get('sig', '')
+
+            if file_hash and sig:
+                # Use the authenticated download URL format
+                download_path = f"/dl/{file_hash}/{quote(filename)}?sig={sig}"
+                download_url = f"https://{quote(self.username)}:{quote(self.password)}@members.easynews.com{download_path}"
+            else:
+                # Fallback to basic URL
+                download_url = item.get('url', '') or item.get('rawURL', '')
+                if download_url and not download_url.startswith('http'):
+                    download_url = urljoin(self.BASE_URL, download_url)
+
+            if not download_url:
+                return None
+
+            # Parse size
+            size_bytes = 0
+            size_str = item.get('rawSize', '') or item.get('size', '')
+            if isinstance(size_str, (int, float)):
+                size_bytes = int(size_str)
+            elif isinstance(size_str, str):
+                size_bytes = self._parse_size(size_str)
+
+            # Parse date
+            post_date = item.get('date', '') or item.get('d', '')
+            if post_date:
+                try:
+                    # Try to parse and standardize the date format
+                    if isinstance(post_date, str):
+                        post_date = post_date.strip()
+                except Exception:
+                    pass
+
+            # Get extension from API field (more reliable than parsing filename)
+            extension = item.get('extension', '') or item.get('11', '') or item.get('2', '')
+            if extension and not extension.startswith('.'):
+                extension = '.' + extension
+
+            return EasynewsResult(
+                filename=filename,
+                download_url=download_url,
+                size_bytes=size_bytes,
+                post_date=post_date if post_date else None,
+                subject=item.get('subject', '') or item.get('s', ''),
+                poster=item.get('poster', '') or item.get('p', ''),
+                newsgroup=item.get('newsgroup', '') or item.get('ng', ''),
+                extension=extension if extension else self._get_extension(filename),
+            )
+        except Exception as e:
+            logger.debug(f"Failed to parse search result: {e}")
+            return None
+
+    def _parse_size(self, size_str: str) -> int:
+        """Parse a size string like '1.5 GB' to bytes."""
+        try:
+            size_str = size_str.strip().upper()
+            multipliers = {
+                'B': 1,
+                'KB': 1024,
+                'MB': 1024 ** 2,
+                'GB': 1024 ** 3,
+                'TB': 1024 ** 4,
+            }
+
+            for suffix, multiplier in multipliers.items():
+                if size_str.endswith(suffix):
+                    value = float(size_str[:-len(suffix)].strip())
+                    return int(value * multiplier)
+
+            # Try to parse as plain number
+            return int(float(size_str))
+        except Exception:
+            return 0
+
+    def _get_extension(self, filename: str) -> Optional[str]:
+        """Extract file extension from filename."""
+        if '.' in filename:
+            return filename.rsplit('.', 1)[-1].lower()
+        return None
+
+    @staticmethod
+    def detect_quality(filename: str) -> Optional[str]:
+        """Detect video quality from filename."""
+        filename_lower = filename.lower()
+        for pattern, quality in EasynewsClient.QUALITY_PATTERNS:
+            if re.search(pattern, filename_lower):
+                return quality
+        return None
+
+    @staticmethod
+    def detect_audio(filename: str) -> Optional[str]:
+        """Detect audio codec from filename."""
+        filename_lower = filename.lower()
+        for pattern, audio in EasynewsClient.AUDIO_PATTERNS:
+            if re.search(pattern, filename_lower):
+                return audio
+        return None
+
+    @staticmethod
+    def detect_source(filename: str) -> Optional[str]:
+        """Detect source/release type from filename."""
+        filename_lower = filename.lower()
+        for pattern, source in EasynewsClient.SOURCE_PATTERNS:
+            if re.search(pattern, filename_lower):
+                return source
+        return None
+
+    def download_file(
+        self,
+        url: str,
+        dest_path: str,
+        progress_callback: Optional[Callable[[int, int], None]] = None,
+        chunk_size: int = 8192,
+    ) -> Dict[str, Any]:
+        """
+        Download a file from Easynews.
+
+        Args:
+            url: Download URL (with authentication embedded or using session)
+            dest_path: Destination file path
+            progress_callback: Optional callback(downloaded_bytes, total_bytes)
+            chunk_size: Download chunk size in bytes
+
+        Returns:
+            Dict with 'success' bool and 'message' or 'path'
+        """
+        try:
+            # Start the download with streaming
+            response = self.session.get(
+                url,
+                stream=True,
+                timeout=30,
+            )
+            response.raise_for_status()
+
+            total_size = int(response.headers.get('content-length', 0))
+            downloaded = 0
+
+            with open(dest_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=chunk_size):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded += len(chunk)
+                        if progress_callback:
+                            progress_callback(downloaded, total_size)
+
+            logger.info(f"Downloaded file to {dest_path} ({downloaded} bytes)")
+            return {
+                'success': True,
+                'path': dest_path,
+                'size': downloaded,
+            }
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Download failed: {e}")
+            return {
+                'success': False,
+                'message': f'Download failed: {str(e)}'
+            }
+        except IOError as e:
+            logger.error(f"Failed to write file: {e}")
+            return {
+                'success': False,
+                'message': f'Failed to write file: {str(e)}'
+            }
+        except Exception as e:
+            logger.error(f"Unexpected error during download: {e}")
+            return {
+                'success': False,
+                'message': f'Download error: {str(e)}'
+            }
+
+    def get_file_info(self, url: str) -> Dict[str, Any]:
+        """
+        Get information about a file without downloading it.
+
+        Args:
+            url: File URL
+
+        Returns:
+            Dict with file information (size, content-type, etc.)
+        """
+        try:
+            response = self.session.head(url, timeout=30)
+            response.raise_for_status()
+
+            return {
+                'success': True,
+                'size': int(response.headers.get('content-length', 0)),
+                'content_type': response.headers.get('content-type', ''),
+                'last_modified': response.headers.get('last-modified', ''),
+            }
+        except Exception as e:
+            logger.error(f"Failed to get file info: {e}")
+            return {
+                'success': False,
+                'message': str(e)
+            }
--- a/modules/easynews_monitor.py
+++ b/modules/easynews_monitor.py
--- a/modules/face_recognition_module.py
+++ b/modules/face_recognition_module.py
--- a/modules/fastdl_module.py
+++ b/modules/fastdl_module.py
--- a/modules/filename_parser.py
+++ b/modules/filename_parser.py
@@ -0,0 +1,382 @@
+#!/usr/bin/env python3
+"""
+Filename Parser Module for Manual Import
+Parses filenames based on configurable patterns to extract metadata
+"""
+
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+
+class FilenameParser:
+    """
+    Parse filenames using configurable patterns to extract metadata.
+
+    Supported pattern tokens:
+    - {username} - Username/source (alphanumeric, underscores, periods)
+    - {YYYYMMDD} - Date as 8 digits (20251127)
+    - {HHMMSS} - Time as 6 digits (172753)
+    - {YYYYMMDD_HHMMSS} - Combined date_time with underscore
+    - {id} - Media ID (any characters until next separator)
+    - {description} - Text content (any characters until next separator)
+    - {num} - Sequence number (digits)
+    - {ext} - File extension (optional, auto-handled)
+
+    Example patterns:
+    - Instagram Stories: "{username}_{YYYYMMDD}_{HHMMSS}_{id}"
+    - Instagram Posts: "{username}_{YYYYMMDD}_{HHMMSS}_{id}"
+    - TikTok: "{YYYYMMDD}_{description}_{id}_{num}"
+    """
+
+    # Token definitions: token_name -> (regex_pattern, is_greedy)
+    TOKEN_PATTERNS = {
+        'username': (r'[a-zA-Z0-9_.]+', False),
+        'YYYYMMDD': (r'\d{8}', False),
+        'HHMMSS': (r'\d{6}', False),
+        'YYYYMMDD_HHMMSS': (r'\d{8}_\d{6}', False),
+        'id': (r'.+', True),  # Greedy - matches everything until separator
+        'description': (r'.+', True),  # Greedy
+        'num': (r'\d+', False),
+        'ext': (r'\.[a-zA-Z0-9]+', False),
+    }
+
+    def __init__(self, pattern: str):
+        """
+        Initialize parser with a filename pattern.
+
+        Args:
+            pattern: Pattern string like "{username}-{YYYYMMDD}_{HHMMSS}-{id}"
+        """
+        self.pattern = pattern
+        self.regex, self.token_order = self._compile_pattern(pattern)
+
+    def _compile_pattern(self, pattern: str) -> tuple:
+        """
+        Convert pattern string to compiled regex.
+
+        Returns:
+            Tuple of (compiled_regex, list_of_token_names)
+        """
+        # Find all tokens in the pattern
+        token_regex = r'\{(\w+)\}'
+        tokens = re.findall(token_regex, pattern)
+
+        # Build regex pattern
+        regex_pattern = pattern
+
+        # Escape special regex characters in the pattern (except our tokens)
+        # First, temporarily replace tokens
+        for i, token in enumerate(tokens):
+            regex_pattern = regex_pattern.replace(f'{{{token}}}', f'__TOKEN_{i}__', 1)
+
+        # Escape special chars
+        regex_pattern = re.escape(regex_pattern)
+
+        # Replace tokens back with their regex patterns
+        for i, token in enumerate(tokens):
+            if token in self.TOKEN_PATTERNS:
+                token_pattern, is_greedy = self.TOKEN_PATTERNS[token]
+                # Use non-greedy for greedy tokens when there's a separator after
+                if is_greedy:
+                    # Make it non-greedy so it stops at the next separator
+                    token_pattern = r'.+?'
+                regex_pattern = regex_pattern.replace(f'__TOKEN_{i}__', f'({token_pattern})', 1)
+            else:
+                # Unknown token - treat as any characters
+                regex_pattern = regex_pattern.replace(f'__TOKEN_{i}__', r'(.+?)', 1)
+
+        # Handle the last greedy token specially - it should be truly greedy
+        # Find the last greedy token and make it greedy
+        for token in reversed(tokens):
+            if token in self.TOKEN_PATTERNS:
+                _, is_greedy = self.TOKEN_PATTERNS[token]
+                if is_greedy:
+                    # The last occurrence of .+? for this token should be .+
+                    # We need to be more careful here - just make the whole pattern work
+                    break
+
+        # Add start anchor, but allow extension at end
+        regex_pattern = '^' + regex_pattern + r'(?:\.[a-zA-Z0-9]+)?$'
+
+        try:
+            compiled = re.compile(regex_pattern)
+        except re.error as e:
+            raise ValueError(f"Invalid pattern '{pattern}': {e}")
+
+        return compiled, tokens
+
+    def parse(self, filename: str) -> Dict[str, Any]:
+        """
+        Parse a filename and extract metadata.
+
+        Args:
+            filename: Filename to parse (with or without extension)
+
+        Returns:
+            Dictionary with extracted metadata:
+            - username: str or None
+            - datetime: datetime object or None
+            - media_id: str or None
+            - description: str or None
+            - num: int or None
+            - extension: str or None
+            - valid: bool
+            - error: str or None (if valid is False)
+        """
+        result = {
+            'username': None,
+            'datetime': None,
+            'media_id': None,
+            'description': None,
+            'num': None,
+            'extension': None,
+            'valid': False,
+            'error': None,
+            'raw_values': {}
+        }
+
+        # Extract extension
+        path = Path(filename)
+        extension = path.suffix.lower() if path.suffix else None
+        basename = path.stem
+        result['extension'] = extension
+
+        # Try to match the pattern
+        match = self.regex.match(basename) or self.regex.match(filename)
+
+        if not match:
+            result['error'] = f"Filename doesn't match pattern: {self.pattern}"
+            return result
+
+        # Extract values for each token
+        groups = match.groups()
+        for i, token in enumerate(self.token_order):
+            if i < len(groups):
+                value = groups[i]
+                result['raw_values'][token] = value
+
+                # Map tokens to result fields
+                if token == 'username':
+                    result['username'] = value.lower()
+                elif token == 'id':
+                    result['media_id'] = value
+                elif token == 'description':
+                    result['description'] = value
+                elif token == 'num':
+                    try:
+                        result['num'] = int(value)
+                    except ValueError:
+                        result['num'] = value
+
+        # Parse datetime from date/time tokens
+        result['datetime'] = self._parse_datetime(result['raw_values'])
+
+        result['valid'] = True
+        return result
+
+    def _parse_datetime(self, raw_values: Dict[str, str]) -> Optional[datetime]:
+        """
+        Parse datetime from extracted raw values.
+
+        Supports:
+        - YYYYMMDD_HHMMSS combined
+        - YYYYMMDD + HHMMSS separate
+        - YYYYMMDD only (time defaults to 00:00:00)
+        """
+        try:
+            if 'YYYYMMDD_HHMMSS' in raw_values:
+                dt_str = raw_values['YYYYMMDD_HHMMSS']
+                return datetime.strptime(dt_str, '%Y%m%d_%H%M%S')
+
+            if 'YYYYMMDD' in raw_values:
+                date_str = raw_values['YYYYMMDD']
+
+                if 'HHMMSS' in raw_values:
+                    time_str = raw_values['HHMMSS']
+                    return datetime.strptime(f'{date_str}_{time_str}', '%Y%m%d_%H%M%S')
+                else:
+                    # Date only, no time
+                    return datetime.strptime(date_str, '%Y%m%d')
+
+            return None
+        except ValueError:
+            return None
+
+    def validate_pattern(self) -> tuple:
+        """
+        Validate the pattern is properly formed.
+
+        Returns:
+            Tuple of (is_valid: bool, error_message: str or None)
+        """
+        try:
+            # Check for at least one recognized token
+            token_regex = r'\{(\w+)\}'
+            tokens = re.findall(token_regex, self.pattern)
+
+            if not tokens:
+                return False, "Pattern must contain at least one token"
+
+            # Check all tokens are recognized
+            unknown_tokens = [t for t in tokens if t not in self.TOKEN_PATTERNS]
+            if unknown_tokens:
+                return False, f"Unknown tokens: {', '.join(unknown_tokens)}"
+
+            return True, None
+        except Exception as e:
+            return False, str(e)
+
+
+def create_parser(pattern: str) -> FilenameParser:
+    """
+    Factory function to create a FilenameParser.
+
+    Args:
+        pattern: Pattern string
+
+    Returns:
+        FilenameParser instance
+    """
+    return FilenameParser(pattern)
+
+
+def parse_with_fallbacks(filename: str, patterns: List[str]) -> Dict[str, Any]:
+    """
+    Try parsing a filename with multiple patterns, return first successful match.
+
+    Args:
+        filename: Filename to parse
+        patterns: List of pattern strings to try in order
+
+    Returns:
+        Dictionary with extracted metadata (same as FilenameParser.parse)
+    """
+    last_error = None
+    for pattern in patterns:
+        try:
+            parser = FilenameParser(pattern)
+            result = parser.parse(filename)
+            if result['valid']:
+                result['matched_pattern'] = pattern
+                return result
+            last_error = result.get('error')
+        except Exception as e:
+            last_error = str(e)
+
+    # Return failure with last error
+    return {
+        'username': None,
+        'datetime': None,
+        'media_id': None,
+        'description': None,
+        'num': None,
+        'extension': Path(filename).suffix.lower() if Path(filename).suffix else None,
+        'valid': False,
+        'error': last_error or f"Filename doesn't match any of {len(patterns)} patterns",
+        'raw_values': {}
+    }
+
+
+# Instagram has many filename formats from different download sources
+INSTAGRAM_PATTERNS = [
+    # Standard gallery-dl formats
+    '{username}_{YYYYMMDD}_{HHMMSS}_{id}',      # gallery-dl default (underscores)
+    '{username}-{YYYYMMDD}_{HHMMSS}-{id}',      # alternative format (dashes around date)
+    # Formats with _n suffix (common from some scrapers)
+    '{username}_{YYYYMMDD}_{HHMMSS}_{id}_n',    # with _n suffix
+    '{username}-{YYYYMMDD}_{HHMMSS}-{id}_n',    # dashes + _n suffix
+    # Formats with hl=en language parameter (imginn/instaloader variants)
+    '{username}_hl=en-{YYYYMMDD}_{HHMMSS}-{id}_n',   # language tag + _n suffix
+    '{username}_hl=en-{YYYYMMDD}_{HHMMSS}-{id}',     # language tag, no _n suffix
+    # Formats with leading underscore (some scrapers prefix underscore)
+    '_{username}_{YYYYMMDD}_{HHMMSS}_{id}_n',   # leading underscore + _n suffix
+    '_{username}_hl=en-{YYYYMMDD}_{HHMMSS}-{id}_n',  # leading underscore + lang + _n
+    # Formats with media shortcode before date (some browser extensions / save tools)
+    '{username}-video-{id}-{YYYYMMDD}_{HHMMSS}_{description}',  # username-video-shortcode-date_hash
+    '{username}-photo-{id}-{YYYYMMDD}_{HHMMSS}_{description}',  # username-photo-shortcode-date_hash
+    '{username}-{id}-{YYYYMMDD}_{HHMMSS}_{description}',        # username-shortcode-date_hash (no type prefix, must be last)
+]
+
+
+# Predefined patterns for common platforms
+PRESET_PATTERNS = {
+    'instagram_stories': {
+        'name': 'Instagram Stories',
+        'pattern': '{username}_{YYYYMMDD}_{HHMMSS}_{id}',
+        'alt_patterns': INSTAGRAM_PATTERNS,
+        'example': 'evalongoria_20251127_172753_AQOGOcCUbrMy...',
+        'platform': 'instagram',
+        'content_type': 'stories'
+    },
+    'instagram_posts': {
+        'name': 'Instagram Posts',
+        'pattern': '{username}_{YYYYMMDD}_{HHMMSS}_{id}',
+        'alt_patterns': INSTAGRAM_PATTERNS,
+        'example': 'evalongoria_20251127_172753_18538674661006538',
+        'platform': 'instagram',
+        'content_type': 'posts'
+    },
+    'instagram_reels': {
+        'name': 'Instagram Reels',
+        'pattern': '{username}_{YYYYMMDD}_{HHMMSS}_{id}',
+        'alt_patterns': INSTAGRAM_PATTERNS,
+        'example': 'evalongoria_20251127_172753_18538674661006538',
+        'platform': 'instagram',
+        'content_type': 'reels'
+    },
+    'tiktok_videos': {
+        'name': 'TikTok Videos',
+        'pattern': '{YYYYMMDD}_{description}_{id}_{num}',
+        'example': '20251127_beautiful_sunset_1234567890_1',
+        'platform': 'tiktok',
+        'content_type': 'videos'
+    },
+    'snapchat_stories': {
+        'name': 'Snapchat Stories',
+        'pattern': '{username}_{YYYYMMDD}_{HHMMSS}_{id}',
+        'example': 'username_20251127_172753_story123',
+        'platform': 'snapchat',
+        'content_type': 'stories'
+    },
+    'youtube_videos': {
+        'name': 'YouTube Videos',
+        'pattern': '{id}',
+        'example': 'dQw4w9WgXcQ',
+        'platform': 'youtube',
+        'content_type': 'videos',
+        'use_ytdlp': True
+    }
+}
+
+
+def get_preset_patterns() -> Dict[str, Dict]:
+    """Get all predefined filename patterns."""
+    return PRESET_PATTERNS.copy()
+
+
+# Test/demo function
+if __name__ == '__main__':
+    # Test with the user's example
+    test_pattern = '{username}-{YYYYMMDD}_{HHMMSS}-{id}'
+    test_filename = 'tiannahcgarcia-20251127_172753-AQOGOcCUbrMyAL0VXcQjnpHr6aY6U25C1SbaREqFJv7_MVXNVUvBd290MwlNFmwOTK5PuLx6DtK9cYoot0c5Y6a4vuDtOaug2heLank.jpg'
+
+    parser = FilenameParser(test_pattern)
+    result = parser.parse(test_filename)
+
+    print(f"Pattern: {test_pattern}")
+    print(f"Filename: {test_filename}")
+    print(f"Result: {result}")
+    print()
+
+    # Test Instagram post format
+    test_pattern2 = '{username}_{YYYYMMDD}_{HHMMSS}_{id}'
+    test_filename2 = 'evalongoria_20251027_155842_18538674661006538.jpg'
+
+    parser2 = FilenameParser(test_pattern2)
+    result2 = parser2.parse(test_filename2)
+
+    print(f"Pattern: {test_pattern2}")
+    print(f"Filename: {test_filename2}")
+    print(f"Result: {result2}")
--- a/modules/forum_db_adapter.py
+++ b/modules/forum_db_adapter.py
@@ -0,0 +1,485 @@
+#!/usr/bin/env python3
+"""
+Forum Database Adapter for Unified Database
+Provides compatibility layer for forum_downloader to use UnifiedDatabase
+"""
+
+import sqlite3
+import json
+import hashlib
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+import time
+from modules.universal_logger import get_logger
+
+logger = get_logger('ForumAdapter')
+
+class ForumDatabaseAdapter:
+    """
+    Adapter to allow forum_downloader to use UnifiedDatabase
+    Mimics the original forum database interface
+    """
+    
+    def __init__(self, unified_db, db_path=None):
+        """
+        Initialize the adapter
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            db_path: Ignored - kept for compatibility
+        """
+        self.unified_db = unified_db
+        self.db_path = db_path  # Keep for compatibility but not used
+
+    def get_file_hash(self, file_path: str) -> Optional[str]:
+        """Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
+        from modules.unified_database import UnifiedDatabase
+        return UnifiedDatabase.get_file_hash(file_path)
+
+    def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
+        """Get download record by file hash (delegates to UnifiedDatabase)"""
+        return self.unified_db.get_download_by_file_hash(file_hash)
+
+    def __enter__(self):
+        return self
+        
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+    
+    def _get_connection(self):
+        """Get a connection from unified database"""
+        return self.unified_db.get_connection(for_write=True)
+
+    def _execute_with_retry(self, operation, retries: int = 3, for_write: bool = False):
+        """
+        Execute a database operation with retry logic for lock/deadlock errors.
+
+        Args:
+            operation: A callable that takes a connection and returns a result
+            retries: Number of retry attempts
+            for_write: Whether this is a write operation
+
+        Returns:
+            The result of the operation
+
+        Raises:
+            sqlite3.OperationalError: If operation fails after all retries
+        """
+        for attempt in range(retries):
+            try:
+                with self.unified_db.get_connection(for_write=for_write) as conn:
+                    return operation(conn)
+            except sqlite3.OperationalError as e:
+                if ("locked" in str(e) or "deadlock" in str(e).lower()) and attempt < retries - 1:
+                    delay = 1 + attempt * 2  # Exponential backoff
+                    logger.debug(f"Database locked, retrying in {delay} seconds...")
+                    time.sleep(delay)
+                    continue
+                else:
+                    logger.error(f"Database operation failed after {attempt + 1} attempts: {e}")
+                    raise
+        # This point should never be reached due to the raise above,
+        # but raise explicitly to satisfy type checkers
+        raise sqlite3.OperationalError("Database operation failed after all retries")
+    
+    def db_add_thread(self, thread_id: str, forum_name: str, thread_url: str,
+                     thread_title: str = None, monitor_until: datetime = None) -> bool:
+        """Add a forum thread to tracking"""
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR IGNORE INTO forum_threads
+                (thread_id, forum_name, thread_url, thread_title,
+                 created_date, last_checked, status, monitor_until)
+                VALUES (?, ?, ?, ?, ?, ?, 'active', ?)
+            ''', (thread_id, forum_name, thread_url, thread_title,
+                  datetime.now(), datetime.now(), monitor_until))
+            conn.commit()
+            return cursor.rowcount > 0
+
+        try:
+            return self._execute_with_retry(operation, for_write=True)
+        except Exception as e:
+            logger.error(f"Error adding thread: {e}")
+            return False
+    
+    def db_update_thread(self, thread_id: str, last_post_date: datetime = None,
+                        post_count: int = None) -> bool:
+        """Update thread information"""
+        # Build updates list outside the operation for clarity
+        updates = ["last_checked = ?"]
+        params = [datetime.now()]
+
+        if last_post_date:
+            updates.append("last_post_date = ?")
+            params.append(last_post_date)
+
+        if post_count is not None:
+            updates.append("post_count = ?")
+            params.append(post_count)
+
+        params.append(thread_id)
+
+        # Pre-build the SQL query to avoid f-string inside operation
+        sql = f'UPDATE forum_threads SET {", ".join(updates)} WHERE thread_id = ?'
+
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute(sql, params)
+            conn.commit()
+            return cursor.rowcount > 0
+
+        try:
+            return self._execute_with_retry(operation, for_write=True)
+        except Exception as e:
+            logger.error(f"Error updating thread {thread_id}: {e}")
+            return False
+    
+    def db_update_thread_last_checked(self, thread_id: str) -> bool:
+        """Update the last_checked timestamp for a forum thread"""
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute('''
+                UPDATE forum_threads
+                SET last_checked = ?
+                WHERE thread_id = ?
+            ''', (datetime.now(), thread_id))
+            conn.commit()
+            return cursor.rowcount > 0
+
+        try:
+            return self._execute_with_retry(operation, for_write=True)
+        except Exception as e:
+            logger.error(f"Error updating last_checked for thread {thread_id}: {e}")
+            return False
+    
+    def db_get_thread(self, thread_id: str) -> Optional[Dict]:
+        """Get thread information"""
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT * FROM forum_threads WHERE thread_id = ?",
+                (thread_id,)
+            )
+            row = cursor.fetchone()
+            return dict(row) if row else None
+
+        try:
+            return self._execute_with_retry(operation, for_write=False)
+        except Exception as e:
+            logger.error(f"Error getting thread {thread_id}: {e}")
+            return None
+    
+    def db_add_post(self, post_id: str, thread_id: str, post_url: str = None,
+                   author: str = None, post_date: datetime = None,
+                   has_images: bool = False) -> bool:
+        """Add a forum post"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                content_hash = hashlib.sha256(f"{thread_id}:{post_id}".encode()).hexdigest()
+                cursor.execute('''
+                    INSERT INTO forum_posts
+                    (post_id, thread_id, post_url, author, post_date,
+                     content_hash, has_images)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                    ON CONFLICT (post_id) DO UPDATE SET
+                        thread_id = EXCLUDED.thread_id,
+                        post_url = EXCLUDED.post_url,
+                        author = EXCLUDED.author,
+                        post_date = EXCLUDED.post_date,
+                        content_hash = EXCLUDED.content_hash,
+                        has_images = EXCLUDED.has_images
+                ''', (post_id, thread_id, post_url, author, post_date,
+                      content_hash, has_images))
+                conn.commit()
+                return True
+            except Exception as e:
+                logger.error(f"Error adding post: {e}")
+                return False
+    
+    def db_get_image_id(self, img_url: str) -> Optional[int]:
+        """Check if image already exists in downloads"""
+        url_hash = self.unified_db.get_url_hash(img_url)
+
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT id FROM downloads WHERE url_hash = ? AND platform = 'forums'",
+                (url_hash,)
+            )
+            row = cursor.fetchone()
+            return row[0] if row else None
+
+        try:
+            return self._execute_with_retry(operation, for_write=False)
+        except Exception as e:
+            logger.error(f"Error checking image existence: {e}")
+            return None
+    
+    def db_add_image(self, img_url: str, thread_id: str, post_id: str,
+                    filename: str, file_path: str, forum_name: str) -> bool:
+        """Add image to downloads"""
+        metadata = {
+            'thread_id': thread_id,
+            'post_id': post_id,
+            'forum_name': forum_name
+        }
+        
+        return self.unified_db.record_download(
+            url=img_url,
+            platform='forums',
+            source=forum_name,
+            content_type='image',
+            filename=filename,
+            file_path=file_path,
+            metadata=metadata
+        )
+    
+    def db_search_exists(self, search_id: str) -> bool:
+        """Check if search already exists"""
+        def operation(conn):
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT 1 FROM search_monitors WHERE search_id = ?",
+                (search_id,)
+            )
+            return cursor.fetchone() is not None
+
+        try:
+            return self._execute_with_retry(operation, for_write=False)
+        except Exception as e:
+            logger.error(f"Error checking search existence: {e}")
+            return False
+    
+    def db_add_search(self, search_id: str, forum_name: str, search_query: str,
+                     search_url: str = None, check_frequency_hours: int = 24) -> bool:
+        """Add or update search monitor"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                cursor.execute('''
+                    INSERT OR REPLACE INTO search_monitors
+                    (search_id, platform, source, search_query, search_url,
+                     last_checked, check_frequency_hours, active)
+                    VALUES (?, 'forums', ?, ?, ?, ?, ?, 1)
+                ''', (search_id, forum_name, search_query, search_url,
+                      datetime.now(), check_frequency_hours))
+                conn.commit()
+                return True
+            except Exception as e:
+                logger.error(f"Error adding search: {e}")
+                return False
+    
+    def db_update_search_results(self, search_id: str, results_count: int) -> bool:
+        """Update search results count"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                UPDATE search_monitors
+                SET last_checked = ?, results_found = ?
+                WHERE search_id = ?
+            ''', (datetime.now(), results_count, search_id))
+            conn.commit()
+            return cursor.rowcount > 0
+    
+    def add_to_download_queue(self, url: str, referer: str = None, save_path: str = None,
+                            thread_id: str = None, post_id: str = None,
+                            forum_name: str = None, metadata: Dict = None) -> bool:
+        """Add item to download queue"""
+        # Check if already downloaded
+        if self.unified_db.is_downloaded(url, platform='forums'):
+            return False
+
+        # Check if already in queue (with retry logic)
+        def check_queue(conn):
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT status FROM download_queue WHERE url = ?",
+                (url,)
+            )
+            return cursor.fetchone()
+
+        try:
+            existing = self._execute_with_retry(check_queue, for_write=False)
+            if existing:
+                if existing[0] == 'completed':
+                    return False  # Already downloaded
+                elif existing[0] == 'pending':
+                    return False  # Already in queue
+        except Exception as e:
+            logger.error(f"Error checking download queue: {e}")
+            return False
+        
+        # Add to queue
+        queue_metadata = metadata or {}
+        queue_metadata.update({
+            'thread_id': thread_id,
+            'post_id': post_id,
+            'forum_name': forum_name
+        })
+        
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            try:
+                cursor.execute('''
+                    INSERT INTO download_queue
+                    (url, platform, source, referer, save_path, status, metadata)
+                    VALUES (?, 'forums', ?, ?, ?, 'pending', ?)
+                ''', (url, forum_name, referer, str(save_path) if save_path else None, json.dumps(queue_metadata)))
+                conn.commit()
+                return True
+            except sqlite3.IntegrityError:
+                return False  # URL already in queue
+            except Exception as e:
+                logger.error(f"Error adding to queue: {e}")
+                return False
+    
+    def is_in_download_queue(self, url: str) -> bool:
+        """Check if URL is in download queue"""
+        with self.unified_db.get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT 1 FROM download_queue WHERE url = ? AND status = 'pending'",
+                (url,)
+            )
+            return cursor.fetchone() is not None
+    
+    def is_already_downloaded(self, url: str, forum_name: str = None) -> bool:
+        """Check if thread URL is already being tracked"""
+        # For thread URLs, check the forum_threads table
+        import hashlib
+        thread_id = hashlib.sha256(url.encode()).hexdigest()
+
+        with self.unified_db.get_connection() as conn:
+            cursor = conn.cursor()
+            if forum_name:
+                # Check for specific forum
+                cursor.execute('''
+                    SELECT 1 FROM forum_threads
+                    WHERE forum_name = ? AND (thread_url = ? OR thread_id = ?)
+                    LIMIT 1
+                ''', (forum_name, url, thread_id))
+            else:
+                # Check any forum
+                cursor.execute('''
+                    SELECT 1 FROM forum_threads
+                    WHERE thread_url = ? OR thread_id = ?
+                    LIMIT 1
+                ''', (url, thread_id))
+            return cursor.fetchone() is not None
+    
+    def mark_download_complete(self, url: str, filename: str = None, 
+                              file_path: str = None) -> bool:
+        """Mark download as complete in queue"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                UPDATE download_queue
+                SET status = 'completed', download_date = ?
+                WHERE url = ?
+            ''', (datetime.now(), url))
+            conn.commit()
+            return cursor.rowcount > 0
+    
+    def mark_download_failed(self, url: str, error_message: str = None) -> bool:
+        """Mark download as failed in queue"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                UPDATE download_queue
+                SET status = 'failed', attempts = attempts + 1, error_message = ?
+                WHERE url = ?
+            ''', (error_message, url))
+            conn.commit()
+            return cursor.rowcount > 0
+    
+    def record_download(self, url: str, thread_id: str = None, post_id: str = None,
+                       filename: str = None, metadata: Dict = None, file_path: str = None,
+                       post_date = None) -> bool:
+        """Record a download in the unified database
+
+        Args:
+            url: URL of the downloaded content
+            thread_id: Forum thread ID
+            post_id: Forum post ID
+            filename: Name of downloaded file
+            metadata: Additional metadata dict
+            file_path: Full path to downloaded file
+            post_date: Date of the forum post (datetime or None)
+        """
+        # Extract forum name from metadata if available
+        forum_name = metadata.get('forum_name') if metadata else None
+
+        # Prepare full metadata
+        full_metadata = metadata or {}
+        if thread_id:
+            full_metadata['thread_id'] = thread_id
+        if post_id:
+            full_metadata['post_id'] = post_id
+
+        # Calculate file hash if file_path provided
+        file_hash = None
+        if file_path:
+            try:
+                from modules.unified_database import UnifiedDatabase
+                file_hash = UnifiedDatabase.get_file_hash(file_path)
+            except Exception:
+                pass  # If hash fails, continue without it
+
+        # Record in unified database
+        return self.unified_db.record_download(
+            url=url,
+            platform='forums',
+            source=forum_name or 'unknown',
+            content_type='image',
+            filename=filename,
+            file_path=file_path,
+            file_hash=file_hash,
+            post_date=post_date,
+            metadata=full_metadata
+        )
+    
+    def get_pending_downloads(self, limit: int = 100) -> List[Dict]:
+        """Get pending downloads from queue"""
+        with self.unified_db.get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT * FROM download_queue
+                WHERE platform = 'forums' AND status = 'pending'
+                ORDER BY priority, created_date
+                LIMIT ?
+            ''', (limit,))
+            return [dict(row) for row in cursor.fetchall()]
+    
+    def cleanup_old_data(self, days: int = 180):
+        """Clean up old data"""
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            
+            # Clean old downloads
+            cursor.execute('''
+                DELETE FROM downloads
+                WHERE platform = 'forums'
+                AND download_date < datetime('now', ? || ' days')
+                AND status = 'completed'
+            ''', (-days,))
+            
+            # Clean old queue items
+            cursor.execute('''
+                DELETE FROM download_queue
+                WHERE platform = 'forums'
+                AND created_date < datetime('now', ? || ' days')
+                AND status IN ('completed', 'failed')
+            ''', (-days,))
+            
+            # Expire old monitors
+            cursor.execute('''
+                UPDATE forum_threads
+                SET status = 'expired'
+                WHERE monitor_until < datetime('now')
+                AND status = 'active'
+            ''')
+            
+            conn.commit()
--- a/modules/forum_downloader.py
+++ b/modules/forum_downloader.py
--- a/modules/imginn_api_module.py
+++ b/modules/imginn_api_module.py
--- a/modules/imginn_module.py
+++ b/modules/imginn_module.py
--- a/modules/immich_face_integration.py
+++ b/modules/immich_face_integration.py
@@ -0,0 +1,410 @@
+#!/usr/bin/env python3
+"""
+Immich Face Integration Module
+
+Integrates with Immich's face recognition system to leverage its existing
+face clustering and recognition data for media-downloader files.
+
+Immich uses:
+- InsightFace with buffalo_l model (same as media-downloader)
+- DBSCAN clustering for face grouping
+- 512-dimensional face embeddings
+- PostgreSQL for storage
+
+Path mapping:
+- Media-downloader: /opt/immich/md/...
+- Immich sees: /mnt/media/md/...
+"""
+
+import os
+import json
+from pathlib import Path
+from typing import Optional, List, Dict, Any, Tuple
+from datetime import datetime
+import httpx
+from modules.universal_logger import get_logger
+
+logger = get_logger('ImmichFace')
+
+
+class ImmichFaceIntegration:
+    """Interface with Immich's face recognition system."""
+
+    # Path mapping between systems
+    LOCAL_BASE = '/opt/immich'
+    IMMICH_BASE = '/mnt/media'
+
+    def __init__(self, api_url: str = None, api_key: str = None):
+        """
+        Initialize Immich face integration.
+
+        Args:
+            api_url: Immich API URL (default: http://localhost:2283/api)
+            api_key: Immich API key
+        """
+        self.api_url = (api_url or os.getenv('IMMICH_API_URL', 'http://localhost:2283/api')).rstrip('/')
+        self.api_key = api_key or os.getenv('IMMICH_API_KEY', '')
+        self._client = None
+        self._people_cache = None
+        self._people_cache_time = None
+        self._cache_ttl = 300  # 5 minutes
+
+    @property
+    def is_configured(self) -> bool:
+        """Check if Immich integration is properly configured."""
+        return bool(self.api_key)
+
+    def _get_client(self) -> httpx.Client:
+        """Get or create HTTP client."""
+        if self._client is None:
+            self._client = httpx.Client(
+                base_url=self.api_url,
+                headers={
+                    'x-api-key': self.api_key,
+                    'Accept': 'application/json'
+                },
+                timeout=30.0
+            )
+        return self._client
+
+    def _local_to_immich_path(self, local_path: str) -> str:
+        """
+        Convert local path to Immich's path format.
+
+        Example:
+            /opt/immich/md/instagram/user/image.jpg
+            -> /mnt/media/md/instagram/user/image.jpg
+        """
+        return local_path.replace(self.LOCAL_BASE, self.IMMICH_BASE)
+
+    def _immich_to_local_path(self, immich_path: str) -> str:
+        """
+        Convert Immich's path to local path format.
+
+        Example:
+            /mnt/media/md/instagram/user/image.jpg
+            -> /opt/immich/md/instagram/user/image.jpg
+        """
+        return immich_path.replace(self.IMMICH_BASE, self.LOCAL_BASE)
+
+    def test_connection(self) -> Dict[str, Any]:
+        """
+        Test connection to Immich API.
+
+        Returns:
+            Dict with 'success', 'message', and optionally 'server_info'
+        """
+        if not self.is_configured:
+            return {
+                'success': False,
+                'message': 'Immich API key not configured'
+            }
+
+        try:
+            client = self._get_client()
+            response = client.get('/server/ping')
+
+            if response.status_code == 200:
+                # Get server info
+                info_response = client.get('/server/version')
+                server_info = info_response.json() if info_response.status_code == 200 else {}
+
+                return {
+                    'success': True,
+                    'message': 'Connected to Immich',
+                    'server_info': server_info
+                }
+            else:
+                return {
+                    'success': False,
+                    'message': f'Immich API returned status {response.status_code}'
+                }
+        except httpx.ConnectError as e:
+            return {
+                'success': False,
+                'message': f'Cannot connect to Immich at {self.api_url}: {e}'
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'message': f'Immich API error: {e}'
+            }
+
+    def get_all_people(self, force_refresh: bool = False) -> List[Dict[str, Any]]:
+        """
+        Get all people/faces from Immich.
+
+        Returns:
+            List of people with id, name, thumbnailPath, etc.
+        """
+        if not self.is_configured:
+            return []
+
+        # Check cache
+        if not force_refresh and self._people_cache is not None:
+            if self._people_cache_time:
+                age = (datetime.now() - self._people_cache_time).total_seconds()
+                if age < self._cache_ttl:
+                    return self._people_cache
+
+        try:
+            client = self._get_client()
+            response = client.get('/people')
+
+            if response.status_code == 200:
+                data = response.json()
+                # Immich returns {'people': [...], 'total': N, ...}
+                people = data.get('people', data) if isinstance(data, dict) else data
+
+                # Cache the result
+                self._people_cache = people
+                self._people_cache_time = datetime.now()
+
+                logger.info(f"Fetched {len(people)} people from Immich")
+                return people
+            else:
+                logger.error(f"Failed to get people: {response.status_code}")
+                return []
+        except Exception as e:
+            logger.error(f"Error getting people from Immich: {e}")
+            return []
+
+    def get_named_people(self) -> List[Dict[str, Any]]:
+        """
+        Get only people with names assigned in Immich.
+
+        Returns:
+            List of named people
+        """
+        people = self.get_all_people()
+        return [p for p in people if p.get('name')]
+
+    def get_asset_by_path(self, local_path: str) -> Optional[Dict[str, Any]]:
+        """
+        Find an Immich asset by its file path.
+
+        Args:
+            local_path: Local file path (e.g., /opt/immich/md/...)
+
+        Returns:
+            Asset dict or None if not found
+        """
+        if not self.is_configured:
+            return None
+
+        immich_path = self._local_to_immich_path(local_path)
+
+        try:
+            client = self._get_client()
+
+            # Search by original path
+            response = client.post('/search/metadata', json={
+                'originalPath': immich_path
+            })
+
+            if response.status_code == 200:
+                data = response.json()
+                assets = data.get('assets', {}).get('items', [])
+                if assets:
+                    return assets[0]
+
+            return None
+        except Exception as e:
+            logger.error(f"Error searching asset by path: {e}")
+            return None
+
+    def get_faces_for_asset(self, asset_id: str) -> List[Dict[str, Any]]:
+        """
+        Get all detected faces for an asset.
+
+        Args:
+            asset_id: Immich asset ID
+
+        Returns:
+            List of face data including person info and bounding boxes
+        """
+        if not self.is_configured:
+            return []
+
+        try:
+            client = self._get_client()
+            response = client.get(f'/faces', params={'id': asset_id})
+
+            if response.status_code == 200:
+                return response.json()
+            else:
+                logger.warning(f"Failed to get faces for asset {asset_id}: {response.status_code}")
+                return []
+        except Exception as e:
+            logger.error(f"Error getting faces for asset: {e}")
+            return []
+
+    def get_faces_for_file(self, local_path: str) -> Dict[str, Any]:
+        """
+        Get face recognition results for a local file using Immich.
+
+        This is the main method for integration - given a local file path,
+        it finds the asset in Immich and returns any detected faces.
+
+        Args:
+            local_path: Local file path (e.g., /opt/immich/md/...)
+
+        Returns:
+            Dict with:
+                - found: bool - whether file exists in Immich
+                - faces: list of detected faces with person names
+                - asset_id: Immich asset ID if found
+        """
+        if not self.is_configured:
+            return {
+                'found': False,
+                'error': 'Immich not configured',
+                'faces': []
+            }
+
+        # Find the asset
+        asset = self.get_asset_by_path(local_path)
+        if not asset:
+            return {
+                'found': False,
+                'error': 'File not found in Immich',
+                'faces': []
+            }
+
+        asset_id = asset.get('id')
+
+        # Get faces for the asset
+        faces_data = self.get_faces_for_asset(asset_id)
+
+        # Process faces into a more usable format
+        faces = []
+        for face in faces_data:
+            person = face.get('person', {})
+            faces.append({
+                'face_id': face.get('id'),
+                'person_id': person.get('id'),
+                'person_name': person.get('name', ''),
+                'bounding_box': {
+                    'x1': face.get('boundingBoxX1'),
+                    'y1': face.get('boundingBoxY1'),
+                    'x2': face.get('boundingBoxX2'),
+                    'y2': face.get('boundingBoxY2')
+                },
+                'image_width': face.get('imageWidth'),
+                'image_height': face.get('imageHeight')
+            })
+
+        # Filter to only named faces
+        named_faces = [f for f in faces if f['person_name']]
+
+        return {
+            'found': True,
+            'asset_id': asset_id,
+            'faces': faces,
+            'named_faces': named_faces,
+            'face_count': len(faces),
+            'named_count': len(named_faces)
+        }
+
+    def get_person_by_name(self, name: str) -> Optional[Dict[str, Any]]:
+        """
+        Find a person in Immich by name.
+
+        Args:
+            name: Person name to search for
+
+        Returns:
+            Person dict or None
+        """
+        people = self.get_all_people()
+        for person in people:
+            if person.get('name', '').lower() == name.lower():
+                return person
+        return None
+
+    def get_person_assets(self, person_id: str, limit: int = 1000) -> List[Dict[str, Any]]:
+        """
+        Get all assets containing a specific person using search API.
+
+        Args:
+            person_id: Immich person ID
+            limit: Maximum number of assets to return
+
+        Returns:
+            List of assets
+        """
+        if not self.is_configured:
+            return []
+
+        try:
+            client = self._get_client()
+            # Use the search/metadata endpoint with personIds filter
+            response = client.post('/search/metadata', json={
+                'personIds': [person_id],
+                'size': limit
+            })
+
+            if response.status_code == 200:
+                data = response.json()
+                return data.get('assets', {}).get('items', [])
+            else:
+                logger.warning(f"Failed to get assets for person {person_id}: {response.status_code}")
+                return []
+        except Exception as e:
+            logger.error(f"Error getting person assets: {e}")
+            return []
+
+    def get_statistics(self) -> Dict[str, Any]:
+        """
+        Get Immich face recognition statistics.
+
+        Returns:
+            Dict with total people, named people, etc.
+        """
+        people = self.get_all_people()
+        named = [p for p in people if p.get('name')]
+
+        return {
+            'total_people': len(people),
+            'named_people': len(named),
+            'unnamed_people': len(people) - len(named),
+            'people_by_face_count': sorted(
+                [{'name': p.get('name', 'Unnamed'), 'count': p.get('faces', 0)}
+                 for p in people if p.get('name')],
+                key=lambda x: x['count'],
+                reverse=True
+            )[:20]
+        }
+
+    def close(self):
+        """Close HTTP client."""
+        if self._client:
+            self._client.close()
+            self._client = None
+
+
+# Singleton instance
+_immich_integration = None
+
+
+def get_immich_integration(api_url: str = None, api_key: str = None) -> ImmichFaceIntegration:
+    """
+    Get or create the Immich face integration instance.
+
+    Args:
+        api_url: Optional API URL override
+        api_key: Optional API key override
+
+    Returns:
+        ImmichFaceIntegration instance
+    """
+    global _immich_integration
+
+    if _immich_integration is None:
+        _immich_integration = ImmichFaceIntegration(api_url, api_key)
+    elif api_key and api_key != _immich_integration.api_key:
+        # Recreate if API key changed
+        _immich_integration.close()
+        _immich_integration = ImmichFaceIntegration(api_url, api_key)
+
+    return _immich_integration
--- a/modules/instagram_client_module.py
+++ b/modules/instagram_client_module.py
--- a/modules/instagram_perceptual_duplicate_detector.py
+++ b/modules/instagram_perceptual_duplicate_detector.py
@@ -0,0 +1,868 @@
+"""
+Instagram Perceptual Duplicate Detector
+
+Detects visually similar Instagram content (even with text overlays, stickers, etc.)
+and keeps the cleanest + highest quality version.
+
+Priority: Clean (no overlays) > Quality (resolution/size)
+"""
+
+import os
+import gc
+import json
+import uuid
+from pathlib import Path
+from typing import Optional, Dict, Tuple, List, TYPE_CHECKING
+from datetime import datetime
+from modules.universal_logger import get_logger
+
+if TYPE_CHECKING:
+    import numpy as np
+
+try:
+    import cv2
+    import numpy as np
+    OPENCV_AVAILABLE = True
+except ImportError:
+    OPENCV_AVAILABLE = False
+    np = None  # Define np as None when not available
+
+try:
+    import imagehash
+    from PIL import Image
+    IMAGEHASH_AVAILABLE = True
+except ImportError:
+    IMAGEHASH_AVAILABLE = False
+
+# OCR disabled — not currently needed
+EASYOCR_AVAILABLE = False
+TESSERACT_AVAILABLE = False
+
+
+class InstagramPerceptualDuplicateDetector:
+    """
+    Detects perceptual duplicates in Instagram content and keeps cleanest + best quality
+    """
+
+    def __init__(self, unified_db, log_callback=None):
+        """
+        Initialize detector
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            log_callback: Optional legacy callback (deprecated, uses universal logger)
+        """
+        self.db = unified_db
+        self.logger = get_logger('Perceptual_Duplicate_Detector')
+        self.easyocr_reader = None
+
+        # Initialize EasyOCR reader (lazy loading - only when needed)
+        if EASYOCR_AVAILABLE:
+            try:
+                # Suppress PyTorch pin_memory warning (we're using CPU anyway)
+                import warnings
+                warnings.filterwarnings('ignore', category=UserWarning, module='torch.utils.data.dataloader')
+
+                self.easyocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
+                self.logger.debug("EasyOCR initialized for text overlay detection", module="Perceptual")
+            except Exception as e:
+                self.logger.warning(f"Failed to initialize EasyOCR: {e}, will use Tesseract fallback", module="Perceptual")
+                self.easyocr_reader = None
+
+        # Check dependencies
+        if not OPENCV_AVAILABLE:
+            self.logger.warning("OpenCV not available - perceptual duplicate detection disabled", module="Perceptual")
+        if not IMAGEHASH_AVAILABLE:
+            self.logger.warning("imagehash not available - perceptual duplicate detection disabled", module="Perceptual")
+        if not EASYOCR_AVAILABLE and not TESSERACT_AVAILABLE:
+            self.logger.debug("No OCR available (EasyOCR or pytesseract) - text overlay detection disabled", module="Perceptual")
+
+        self.dependencies_available = OPENCV_AVAILABLE and IMAGEHASH_AVAILABLE
+
+    def check_and_handle_duplicate(self, file_path: str, platform: str, source: str, content_type: str = None) -> Optional[str]:
+        """
+        Check if file is a perceptual duplicate and handle accordingly
+
+        ALWAYS records perceptual hash (even when disabled) to build historical database.
+        Only performs duplicate detection/handling when enabled.
+
+        Returns:
+            - None if not a duplicate or feature disabled
+            - "skip" if this file should be skipped (lower quality duplicate)
+            - file_path if this file should be kept (same or better)
+        """
+        filename = Path(file_path).name
+
+        self.logger.debug(f"[PERCEPTUAL] ENTRY: check_and_handle_duplicate called", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   File: {filename}", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Platform: {platform}", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Source: {source}", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Content Type: {content_type}", module="Perceptual")
+
+        if not self.dependencies_available:
+            self.logger.warning(f"[PERCEPTUAL] SKIP: Dependencies not available (OpenCV/ImageHash missing)", module="Perceptual")
+            return None
+
+        # Get settings
+        settings = self._get_settings()
+        detection_enabled = settings.get('enabled', False)
+
+        self.logger.debug(f"[PERCEPTUAL] Settings loaded:", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Enabled: {detection_enabled}", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Platforms: {settings.get('platforms', [])}", module="Perceptual")
+        self.logger.debug(f"[PERCEPTUAL]   Threshold: {settings.get('perceptual_hash_threshold', 12)}", module="Perceptual")
+
+        try:
+            # ALWAYS calculate perceptual hash and scores (even when detection disabled)
+            # This builds the historical database for future use
+            self.logger.debug(f"[PERCEPTUAL] Calculating perceptual hash for {filename}...", module="Perceptual")
+            phash = self._calculate_perceptual_hash(file_path)
+            if not phash:
+                self.logger.error(f"[PERCEPTUAL] FAILED: Could not calculate perceptual hash for {filename}", module="Perceptual")
+                return None
+
+            self.logger.debug(f"[PERCEPTUAL] Hash calculated: {phash[:32]}...", module="Perceptual")
+
+            text_count, text_chars = self._detect_text_overlays(file_path) if settings.get('text_detection_enabled', True) else (0, 0)
+            quality_metrics = self._get_quality_metrics(file_path)
+
+            clean_score = self._calculate_clean_score(text_count, text_chars)
+            quality_score = self._calculate_quality_score(quality_metrics)
+
+            self.logger.debug(f"[PERCEPTUAL] Scores calculated:", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Clean Score: {clean_score:.2f}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Quality Score: {quality_score:.2f}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Text Overlays: {text_count} ({text_chars} chars)", module="Perceptual")
+
+            # If detection is disabled, just store the hash and return (no duplicate checking)
+            if not detection_enabled:
+                self.logger.debug(f"[PERCEPTUAL] SKIP: Detection disabled - storing hash only for {filename}", module="Perceptual")
+                self._store_perceptual_hash(
+                    file_path, platform, source, content_type,
+                    phash, text_count, text_chars, quality_score, clean_score, quality_metrics
+                )
+                return None  # Detection disabled, allow file to proceed
+
+            # Check if this platform is enabled for detection
+            platform_enabled = platform.lower() in [p.lower() for p in settings.get('platforms', ['instagram'])]
+            self.logger.debug(f"[PERCEPTUAL] Platform check: {platform} enabled = {platform_enabled}", module="Perceptual")
+
+            if not platform_enabled:
+                self.logger.debug(f"[PERCEPTUAL] SKIP: Platform '{platform}' not in enabled list - storing hash only", module="Perceptual")
+                self._store_perceptual_hash(
+                    file_path, platform, source, content_type,
+                    phash, text_count, text_chars, quality_score, clean_score, quality_metrics
+                )
+                return None  # Platform not enabled, allow file to proceed
+
+            # Detection is enabled - perform duplicate checking
+            self.logger.debug(f"[PERCEPTUAL] CHECKING FOR DUPLICATES: {filename}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Platform: {platform}, Source: {source}", module="Perceptual")
+
+            self.logger.log(
+                f"[PERCEPTUAL] New file: {Path(file_path).name} | "
+                f"Hash: {phash[:16]}... | Clean: {clean_score:.2f} | Quality: {quality_score:.2f}",
+                "info"
+            )
+
+            # Find perceptual duplicates in database
+            threshold = settings.get('perceptual_hash_threshold', 12)
+            self.logger.debug(f"[PERCEPTUAL] Searching for similar files (threshold: {threshold})...", module="Perceptual")
+
+            similar_files = self._find_similar_files(
+                phash,
+                platform,
+                source,
+                threshold
+            )
+
+            self.logger.debug(f"[PERCEPTUAL] Similar files found: {len(similar_files)}", module="Perceptual")
+
+            if similar_files:
+                for i, sim in enumerate(similar_files, 1):
+                    self.logger.debug(
+                        f"[PERCEPTUAL]   #{i}: {sim['filename']} | "
+                        f"Distance: {sim['hamming_distance']} | "
+                        f"Clean: {sim['clean_score']:.2f} | "
+                        f"Quality: {sim['quality_score']:.2f}",
+                        module="Perceptual"
+                    )
+
+            if not similar_files:
+                # No duplicates found - store this file's hash and continue
+                self.logger.debug(f"[PERCEPTUAL] NO DUPLICATES FOUND - keeping {filename}", module="Perceptual")
+                self._store_perceptual_hash(
+                    file_path, platform, source, content_type,
+                    phash, text_count, text_chars, quality_score, clean_score, quality_metrics
+                )
+                return file_path  # Keep this file
+
+            # Found similar file(s) - compare and decide which to keep
+            best_existing = self._get_best_existing_file(similar_files)
+
+            self.logger.debug(f"[PERCEPTUAL] DUPLICATE DETECTED!", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL] Best existing file: {best_existing['filename']}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Clean: {best_existing['clean_score']:.2f}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Quality: {best_existing['quality_score']:.2f}", module="Perceptual")
+            self.logger.debug(f"[PERCEPTUAL]   Path: {best_existing['file_path']}", module="Perceptual")
+
+            # Compare new file vs best existing
+            self.logger.debug(f"[PERCEPTUAL] Comparing new vs existing...", module="Perceptual")
+            comparison = self._compare_files(
+                new_clean=clean_score,
+                new_quality=quality_score,
+                existing_clean=best_existing['clean_score'],
+                existing_quality=best_existing['quality_score'],
+                settings=settings
+            )
+
+            self.logger.debug(f"[PERCEPTUAL] Comparison result: {comparison}", module="Perceptual")
+
+            if comparison == "new_better":
+                # New file is better - move existing to recycle, keep new
+                self.logger.info(
+                    f"[PERCEPTUAL] Replacing {best_existing['filename']} with cleaner version: {filename}",
+                    module="Perceptual"
+                )
+
+                # Move existing to recycle bin
+                self._move_to_recycle(
+                    best_existing['file_path'],
+                    reason='replaced_with_cleaner_duplicate',
+                    new_file=file_path
+                )
+
+                # Update database - replace old entry with new
+                self._replace_perceptual_hash_entry(
+                    old_id=best_existing['id'],
+                    new_file_path=file_path,
+                    new_phash=phash,
+                    new_text_count=text_count,
+                    new_text_chars=text_chars,
+                    new_quality_score=quality_score,
+                    new_clean_score=clean_score,
+                    new_quality_metrics=quality_metrics
+                )
+
+                return file_path  # Keep new file
+
+            elif comparison == "existing_better":
+                # Existing file is better - move new to recycle, keep existing
+                self.logger.info(
+                    f"[PERCEPTUAL] Skipping {filename} (duplicate of {best_existing['filename']})",
+                    module="Perceptual"
+                )
+
+                # Move new file to recycle bin
+                self._move_to_recycle(
+                    file_path,
+                    reason='duplicate_lower_quality_or_has_overlays',
+                    kept_file=best_existing['file_path']
+                )
+
+                return "skip"  # Skip this file
+
+            else:
+                # Same quality - keep existing (default behavior)
+                self.logger.info(
+                    f"[PERCEPTUAL] Skipping {filename} (same quality as {best_existing['filename']})",
+                    module="Perceptual"
+                )
+
+                self._move_to_recycle(
+                    file_path,
+                    reason='duplicate_same_quality',
+                    kept_file=best_existing['file_path']
+                )
+
+                return "skip"
+
+        except Exception as e:
+            self.logger.error(f"[PERCEPTUAL] EXCEPTION: {e}", module="Perceptual")
+            import traceback
+            self.logger.error(f"[PERCEPTUAL] Traceback:\n{traceback.format_exc()}", module="Perceptual")
+            return None
+
+    def _get_settings(self) -> dict:
+        """Get Instagram perceptual duplicate settings from database"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT value FROM settings WHERE key = 'instagram_perceptual_duplicates'")
+                result = cursor.fetchone()
+                if result:
+                    return json.loads(result[0])
+        except Exception as e:
+            self.logger.debug(f"Failed to get perceptual duplicate settings: {e}", module="Perceptual")
+
+        return {'enabled': False}
+
+    def _calculate_perceptual_hash(self, file_path: str) -> Optional[str]:
+        """Calculate perceptual hash for image or video"""
+        if not IMAGEHASH_AVAILABLE:
+            return None
+
+        frame = None
+        frame_rgb = None
+        pil_image = None
+
+        try:
+            # For videos, extract middle frame
+            if file_path.lower().endswith(('.mp4', '.mov', '.avi', '.mkv')):
+                frame = self._extract_video_frame(file_path)
+                if frame is None:
+                    return None
+
+                # Convert frame to PIL Image
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                pil_image = Image.fromarray(frame_rgb)
+            else:
+                # For images, open directly
+                pil_image = Image.open(file_path)
+
+            # Calculate perceptual hash (dHash - difference hash)
+            phash = str(imagehash.dhash(pil_image, hash_size=16))
+            return phash
+
+        except Exception as e:
+            self.logger.debug(f"Failed to calculate perceptual hash: {e}", module="Perceptual")
+            return None
+        finally:
+            # Clean up memory
+            if pil_image is not None:
+                pil_image.close()
+                del pil_image
+            if frame_rgb is not None:
+                del frame_rgb
+            if frame is not None:
+                del frame
+            gc.collect()
+
+    def _extract_video_frame(self, video_path: str, position: float = 0.5) -> Optional['np.ndarray']:
+        """Extract a frame from video at given position (0.0 to 1.0)"""
+        if not OPENCV_AVAILABLE:
+            return None
+
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                return None
+
+            # Get total frames and seek to middle
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            target_frame = int(total_frames * position)
+            cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
+
+            ret, frame = cap.read()
+            cap.release()
+
+            return frame if ret else None
+
+        except Exception as e:
+            self.logger.debug(f"Failed to extract video frame: {e}", module="Perceptual")
+            return None
+
+    def _detect_text_overlays(self, file_path: str) -> Tuple[int, int]:
+        """
+        Detect text overlays in image/video using EasyOCR (primary) or Tesseract (fallback)
+
+        Returns:
+            (text_region_count, total_text_characters)
+        """
+        if not self.easyocr_reader and not TESSERACT_AVAILABLE:
+            return (0, 0)
+
+        if not OPENCV_AVAILABLE:
+            return (0, 0)
+
+        image = None
+        gray = None
+
+        try:
+            text_regions = 0
+            total_chars = 0
+
+            # Load image or extract video frame
+            if file_path.lower().endswith(('.mp4', '.mov', '.avi', '.mkv')):
+                image = self._extract_video_frame(file_path)
+                if image is None:
+                    return (0, 0)
+            else:
+                image = cv2.imread(file_path)
+                if image is None:
+                    return (0, 0)
+
+            # Try EasyOCR first (better for Instagram overlays)
+            if self.easyocr_reader:
+                try:
+                    # EasyOCR works directly with image arrays
+                    results = self.easyocr_reader.readtext(image)
+
+                    # EasyOCR returns list of (bbox, text, confidence)
+                    for bbox, text, conf in results:
+                        if conf > 0.5:  # Only use detections with >50% confidence
+                            text_stripped = text.strip()
+                            if text_stripped:
+                                text_regions += 1
+                                total_chars += len(text_stripped)
+
+                    if text_regions > 0:
+                        self.logger.log(
+                            f"[OVERLAY] EasyOCR detected {text_regions} text regions, {total_chars} chars in {Path(file_path).name}",
+                            "debug"
+                        )
+                        return (text_regions, total_chars)
+
+                except Exception as e:
+                    self.logger.debug(f"EasyOCR failed: {e}, falling back to Tesseract", module="Perceptual")
+
+            # Fallback to Tesseract if EasyOCR didn't find anything or failed
+            if TESSERACT_AVAILABLE:
+                try:
+                    # Convert to grayscale for Tesseract
+                    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+                    # Run OCR with detailed data
+                    ocr_data = pytesseract.image_to_data(
+                        gray,
+                        output_type=pytesseract.Output.DICT,
+                        config='--psm 11'  # Sparse text mode
+                    )
+
+                    # Count text regions and characters
+                    text_regions = 0
+                    total_chars = 0
+                    confidence_threshold = 30
+
+                    for i, conf in enumerate(ocr_data['conf']):
+                        if int(conf) > confidence_threshold:
+                            text = ocr_data['text'][i].strip()
+                            if text:
+                                text_regions += 1
+                                total_chars += len(text)
+
+                    self.logger.log(
+                        f"[OVERLAY] Tesseract (fallback) detected {text_regions} text regions, {total_chars} chars in {Path(file_path).name}",
+                        "debug"
+                    )
+
+                except Exception as e:
+                    self.logger.debug(f"Tesseract OCR failed: {e}", module="Perceptual")
+
+            return (text_regions, total_chars)
+
+        except Exception as e:
+            self.logger.debug(f"Text overlay detection failed: {e}", module="Perceptual")
+            return (0, 0)
+        finally:
+            # Clean up memory - these are large numpy arrays
+            if gray is not None:
+                del gray
+            if image is not None:
+                del image
+            gc.collect()
+
+    def _get_quality_metrics(self, file_path: str) -> dict:
+        """Get quality metrics for file"""
+        import subprocess
+
+        metrics = {
+            'resolution': 0,
+            'width': 0,
+            'height': 0,
+            'file_size': 0,
+            'bitrate': 0
+        }
+
+        try:
+            # Get file size
+            metrics['file_size'] = Path(file_path).stat().st_size
+
+            # Use ffprobe for video metadata
+            cmd = [
+                'ffprobe',
+                '-v', 'quiet',
+                '-print_format', 'json',
+                '-show_format',
+                '-show_streams',
+                file_path
+            ]
+
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            if result.returncode == 0:
+                data = json.loads(result.stdout)
+
+                # Get video stream
+                video_stream = next((s for s in data.get('streams', []) if s.get('codec_type') == 'video'), None)
+                if video_stream:
+                    metrics['width'] = int(video_stream.get('width', 0))
+                    metrics['height'] = int(video_stream.get('height', 0))
+                    metrics['resolution'] = metrics['width'] * metrics['height']
+
+                # Get bitrate
+                format_info = data.get('format', {})
+                if 'bit_rate' in format_info:
+                    metrics['bitrate'] = int(format_info['bit_rate']) // 1000
+
+        except Exception as e:
+            self.logger.debug(f"Failed to get quality metrics: {e}", module="Perceptual")
+
+        return metrics
+
+    def _calculate_clean_score(self, text_count: int, text_chars: int) -> float:
+        """
+        Calculate cleanliness score (0-100)
+
+        Higher score = cleaner (less text/overlays)
+        """
+        # Base score starts at 100 (perfectly clean)
+        score = 100.0
+
+        # Penalize for text regions (each region -10 points, max -50)
+        text_penalty = min(text_count * 10, 50)
+        score -= text_penalty
+
+        # Penalize for character count (each 10 chars -5 points, max -40)
+        char_penalty = min((text_chars // 10) * 5, 40)
+        score -= char_penalty
+
+        # Ensure score is between 0-100
+        return max(0.0, min(100.0, score))
+
+    def _calculate_quality_score(self, metrics: dict) -> float:
+        """
+        Calculate quality score (0-100)
+
+        Based on resolution and file size
+        """
+        score = 0.0
+
+        # Resolution score (0-60 points)
+        # 1080p = 2,073,600 pixels = 60 points
+        # 720p = 921,600 pixels = 27 points
+        resolution = metrics.get('resolution', 0)
+        if resolution > 0:
+            resolution_score = min((resolution / 2_073_600) * 60, 60)
+            score += resolution_score
+
+        # File size score (0-40 points)
+        # 10MB = 40 points
+        # 5MB = 20 points
+        file_size = metrics.get('file_size', 0)
+        if file_size > 0:
+            size_mb = file_size / (1024 * 1024)
+            size_score = min((size_mb / 10) * 40, 40)
+            score += size_score
+
+        return min(100.0, score)
+
+    def _find_similar_files(self, phash: str, platform: str, source: str, threshold: int) -> List[dict]:
+        """Find files with similar perceptual hash"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Get all Instagram files (all methods now use platform='instagram')
+                self.logger.debug(f"[PERCEPTUAL_SEARCH] Querying database:", module="Perceptual")
+                self.logger.debug(f"[PERCEPTUAL_SEARCH]   Platform: instagram", module="Perceptual")
+                self.logger.debug(f"[PERCEPTUAL_SEARCH]   Source: {source}", module="Perceptual")
+                self.logger.debug(f"[PERCEPTUAL_SEARCH]   Threshold: {threshold}", module="Perceptual")
+
+                # Search all Instagram content (regardless of method)
+                # This catches reposts/duplicates from different accounts
+                cursor.execute("""
+                    SELECT id, file_path, filename, perceptual_hash,
+                           text_overlay_count, text_overlay_chars,
+                           quality_score, clean_score, resolution, file_size
+                    FROM instagram_perceptual_hashes
+                    WHERE platform = 'instagram'
+                """)
+
+                all_rows = cursor.fetchall()
+                self.logger.debug(f"[PERCEPTUAL_SEARCH] Database returned {len(all_rows)} existing files (checking across all sources)", module="Perceptual")
+
+                results = []
+                checked_count = 0
+                within_threshold = 0
+                missing_files = 0
+
+                for row in all_rows:
+                    existing_hash = row[3]
+                    existing_filename = row[2]
+
+                    # Calculate Hamming distance
+                    distance = self._hamming_distance(phash, existing_hash)
+                    checked_count += 1
+
+                    if distance <= threshold:
+                        within_threshold += 1
+                        # Check if file still exists
+                        if Path(row[1]).exists():
+                            self.logger.debug(
+                                f"[PERCEPTUAL_SEARCH]   MATCH: {existing_filename} (distance: {distance})",
+                                module="Perceptual"
+                            )
+                            results.append({
+                                'id': row[0],
+                                'file_path': row[1],
+                                'filename': row[2],
+                                'perceptual_hash': row[3],
+                                'text_overlay_count': row[4],
+                                'text_overlay_chars': row[5],
+                                'quality_score': row[6],
+                                'clean_score': row[7],
+                                'resolution': row[8],
+                                'file_size': row[9],
+                                'hamming_distance': distance
+                            })
+                        else:
+                            missing_files += 1
+                            self.logger.debug(
+                                f"[PERCEPTUAL_SEARCH]   MATCH but file missing: {existing_filename} (distance: {distance})",
+                                module="Perceptual"
+                            )
+
+                self.logger.debug(
+                    f"[PERCEPTUAL_SEARCH] Checked {checked_count} hashes, "
+                    f"{within_threshold} within threshold, "
+                    f"{missing_files} missing files, "
+                    f"{len(results)} valid matches",
+                    module="Perceptual"
+                )
+
+                return results
+
+        except Exception as e:
+            self.logger.error(f"Failed to find similar files: {e}", module="Perceptual")
+            return []
+
+    def _hamming_distance(self, hash1: str, hash2: str) -> int:
+        """Calculate Hamming distance between two hashes"""
+        if len(hash1) != len(hash2):
+            return 999  # Invalid comparison
+
+        return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
+
+    def _get_best_existing_file(self, similar_files: List[dict]) -> dict:
+        """Get the best existing file from similar files (highest clean + quality score)"""
+        if not similar_files:
+            return None
+
+        # Sort by clean score (primary), then quality score (secondary)
+        sorted_files = sorted(
+            similar_files,
+            key=lambda f: (f['clean_score'], f['quality_score']),
+            reverse=True
+        )
+
+        return sorted_files[0]
+
+    def _compare_files(self, new_clean: float, new_quality: float,
+                      existing_clean: float, existing_quality: float,
+                      settings: dict) -> str:
+        """
+        Compare new file vs existing file
+
+        Returns: "new_better", "existing_better", or "same"
+        """
+        clean_weight = settings.get('clean_score_weight', 3)
+        quality_weight = settings.get('quality_score_weight', 1)
+        min_difference = settings.get('min_text_difference', 5)
+
+        # IMPORTANT: Check for extreme quality differences first
+        # If one file has significantly higher quality, prefer it unless clean score is terrible
+        # This prevents low-resolution files from winning just because they have less detected text
+        min_acceptable_clean = settings.get('min_acceptable_clean', 30)
+        quality_ratio_threshold = settings.get('quality_ratio_threshold', 2.0)
+
+        # Check if new file has dramatically better quality
+        if new_quality > 0 and existing_quality > 0:
+            quality_ratio = new_quality / existing_quality
+            reverse_ratio = existing_quality / new_quality
+
+            # New file has 2x+ better quality and acceptable clean score
+            if quality_ratio >= quality_ratio_threshold and new_clean >= min_acceptable_clean:
+                self.logger.debug(
+                    f"[PERCEPTUAL] New file wins: {quality_ratio:.1f}x better quality "
+                    f"(new: Q={new_quality:.1f}/C={new_clean:.1f}, existing: Q={existing_quality:.1f}/C={existing_clean:.1f})",
+                    module="Perceptual"
+                )
+                return "new_better"
+
+            # Existing file has 2x+ better quality and acceptable clean score
+            if reverse_ratio >= quality_ratio_threshold and existing_clean >= min_acceptable_clean:
+                self.logger.debug(
+                    f"[PERCEPTUAL] Existing file wins: {reverse_ratio:.1f}x better quality "
+                    f"(existing: Q={existing_quality:.1f}/C={existing_clean:.1f}, new: Q={new_quality:.1f}/C={new_clean:.1f})",
+                    module="Perceptual"
+                )
+                return "existing_better"
+
+        # Standard weighted comparison for cases without extreme quality differences
+        new_score = (new_clean * clean_weight) + (new_quality * quality_weight)
+        existing_score = (existing_clean * clean_weight) + (existing_quality * quality_weight)
+
+        # Check if difference is significant
+        score_diff = abs(new_score - existing_score)
+        min_score_diff = min_difference * clean_weight  # Scale by weight
+
+        if new_score > existing_score and score_diff >= min_score_diff:
+            return "new_better"
+        elif existing_score > new_score and score_diff >= min_score_diff:
+            return "existing_better"
+        else:
+            return "same"
+
+    def _store_perceptual_hash(self, file_path: str, platform: str, source: str, content_type: str,
+                               phash: str, text_count: int, text_chars: int,
+                               quality_score: float, clean_score: float, quality_metrics: dict):
+        """Store perceptual hash and metadata in database (or update if exists)"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Check if hash already exists for this file
+                cursor.execute("""
+                    SELECT id FROM instagram_perceptual_hashes
+                    WHERE file_path = ?
+                """, (str(file_path),))
+
+                existing = cursor.fetchone()
+
+                if existing:
+                    # Update existing entry
+                    cursor.execute("""
+                        UPDATE instagram_perceptual_hashes
+                        SET filename = ?,
+                            platform = ?,
+                            source = ?,
+                            content_type = ?,
+                            perceptual_hash = ?,
+                            text_overlay_count = ?,
+                            text_overlay_chars = ?,
+                            quality_score = ?,
+                            clean_score = ?,
+                            resolution = ?,
+                            file_size = ?,
+                            width = ?,
+                            height = ?,
+                            created_at = CURRENT_TIMESTAMP
+                        WHERE id = ?
+                    """, (
+                        Path(file_path).name,
+                        platform,
+                        source,
+                        content_type or 'unknown',
+                        phash,
+                        text_count,
+                        text_chars,
+                        quality_score,
+                        clean_score,
+                        quality_metrics.get('resolution', 0),
+                        quality_metrics.get('file_size', 0),
+                        quality_metrics.get('width', 0),
+                        quality_metrics.get('height', 0),
+                        existing[0]
+                    ))
+                    self.logger.debug(f"[PERCEPTUAL] Updated hash for {Path(file_path).name}", module="Perceptual")
+                else:
+                    # Insert new entry
+                    entry_id = str(uuid.uuid4())
+
+                    cursor.execute("""
+                        INSERT INTO instagram_perceptual_hashes
+                        (id, file_path, filename, platform, source, content_type,
+                         perceptual_hash, text_overlay_count, text_overlay_chars,
+                         quality_score, clean_score, resolution, file_size, width, height)
+                        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """, (
+                        entry_id,
+                        str(file_path),
+                        Path(file_path).name,
+                        platform,
+                        source,
+                        content_type or 'unknown',
+                        phash,
+                        text_count,
+                        text_chars,
+                        quality_score,
+                        clean_score,
+                        quality_metrics.get('resolution', 0),
+                        quality_metrics.get('file_size', 0),
+                        quality_metrics.get('width', 0),
+                        quality_metrics.get('height', 0)
+                    ))
+                    self.logger.debug(f"[PERCEPTUAL] Stored hash for {Path(file_path).name}", module="Perceptual")
+
+                conn.commit()
+
+        except Exception as e:
+            self.logger.error(f"Failed to store perceptual hash: {e}", module="Perceptual")
+            # Note: Connection context manager handles rollback automatically on exception
+
+    def _replace_perceptual_hash_entry(self, old_id: str, new_file_path: str,
+                                       new_phash: str, new_text_count: int, new_text_chars: int,
+                                       new_quality_score: float, new_clean_score: float,
+                                       new_quality_metrics: dict):
+        """Replace old hash entry with new file data"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                cursor.execute("""
+                    UPDATE instagram_perceptual_hashes
+                    SET file_path = ?,
+                        filename = ?,
+                        perceptual_hash = ?,
+                        text_overlay_count = ?,
+                        text_overlay_chars = ?,
+                        quality_score = ?,
+                        clean_score = ?,
+                        resolution = ?,
+                        file_size = ?,
+                        width = ?,
+                        height = ?,
+                        created_at = CURRENT_TIMESTAMP
+                    WHERE id = ?
+                """, (
+                    str(new_file_path),
+                    Path(new_file_path).name,
+                    new_phash,
+                    new_text_count,
+                    new_text_chars,
+                    new_quality_score,
+                    new_clean_score,
+                    new_quality_metrics.get('resolution', 0),
+                    new_quality_metrics.get('file_size', 0),
+                    new_quality_metrics.get('width', 0),
+                    new_quality_metrics.get('height', 0),
+                    old_id
+                ))
+
+                conn.commit()
+
+        except Exception as e:
+            self.logger.error(f"Failed to replace perceptual hash entry: {e}", module="Perceptual")
+
+    def _move_to_recycle(self, file_path: str, reason: str, **metadata):
+        """Move file to recycle bin"""
+        try:
+            self.db.move_to_recycle_bin(
+                file_path=file_path,
+                deleted_from='instagram_perceptual_duplicate_detection',
+                deleted_by='system',
+                metadata={
+                    'reason': reason,
+                    **metadata
+                }
+            )
+
+            self.logger.debug(f"[PERCEPTUAL] Moved to recycle: {Path(file_path).name}", module="Perceptual")
+
+        except Exception as e:
+            self.logger.warning(f"Failed to move file to recycle: {e}", module="Perceptual")
+            # Fallback to delete if recycle fails
+            try:
+                Path(file_path).unlink()
+            except Exception:
+                pass
--- a/modules/instagram_rate_limiter.py
+++ b/modules/instagram_rate_limiter.py
@@ -0,0 +1,163 @@
+"""
+Shared Instagram API rate limiter.
+
+Tracks authenticated API calls in a rolling 1-hour window and enforces
+a configurable max rate. Both the main scraper and paid content modules
+use this to avoid exceeding Instagram's rate threshold.
+"""
+import logging
+import os
+import threading
+import time
+from collections import deque
+
+logger = logging.getLogger('media_downloader')
+
+_PAUSE_FILE = '/opt/media-downloader/data/.ig_paused_until'
+
+
+class InstagramBlockedError(Exception):
+    """Raised when Instagram API calls are paused due to account restriction."""
+    pass
+
+
+class _InstagramRateLimiter:
+    def __init__(self, max_calls_per_hour=180, window_seconds=3600):
+        self.max_calls = max_calls_per_hour
+        self.window = window_seconds
+        self._timestamps = deque()
+        self._lock = threading.Lock()
+        self._operation_lock = threading.Lock()  # Cross-module mutex
+        self._paused_until = 0  # Unix timestamp — block all calls until this time
+        self._load_pause_state()
+
+    def _load_pause_state(self):
+        """Load pause state from disk (survives restarts)."""
+        try:
+            if os.path.exists(_PAUSE_FILE):
+                with open(_PAUSE_FILE) as f:
+                    ts = float(f.read().strip())
+                if ts > time.time():
+                    self._paused_until = ts
+                    logger.warning(
+                        f"[IG-RateLimit] Loaded pause state — blocked until "
+                        f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts))}"
+                    )
+                else:
+                    # Expired, clean up
+                    os.remove(_PAUSE_FILE)
+        except Exception:
+            pass
+
+    def pause_until(self, timestamp: float):
+        """Block all Instagram API calls until the given unix timestamp."""
+        self._paused_until = timestamp
+        try:
+            os.makedirs(os.path.dirname(_PAUSE_FILE), exist_ok=True)
+            with open(_PAUSE_FILE, 'w') as f:
+                f.write(str(timestamp))
+        except Exception:
+            pass
+        logger.warning(
+            f"[IG-RateLimit] All Instagram API calls PAUSED until "
+            f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))}"
+        )
+
+    def resume(self):
+        """Resume Instagram API calls."""
+        self._paused_until = 0
+        try:
+            if os.path.exists(_PAUSE_FILE):
+                os.remove(_PAUSE_FILE)
+        except Exception:
+            pass
+        logger.info("[IG-RateLimit] Instagram API calls RESUMED")
+
+    @property
+    def is_paused(self):
+        if self._paused_until and time.time() < self._paused_until:
+            return True
+        return False
+
+    def track_call(self):
+        """Record an API call timestamp."""
+        with self._lock:
+            self._timestamps.append(time.time())
+
+    def wait_if_needed(self):
+        """Block if approaching rate limit. Call before each authenticated API request.
+
+        Raises InstagramBlockedError if calls are paused due to account restriction.
+        """
+        # Check kill switch first (outside lock — fast path)
+        # Re-check file if not paused in memory (another process may have set it)
+        if not self._paused_until:
+            self._load_pause_state()
+        if self._paused_until:
+            now = time.time()
+            if now < self._paused_until:
+                remaining = self._paused_until - now
+                hours = remaining / 3600
+                raise InstagramBlockedError(
+                    f"Instagram API paused — account restricted. "
+                    f"Resuming in {hours:.1f}h"
+                )
+            else:
+                # Restriction expired, auto-resume
+                self.resume()
+
+        with self._lock:
+            now = time.time()
+            cutoff = now - self.window
+            # Purge old entries
+            while self._timestamps and self._timestamps[0] < cutoff:
+                self._timestamps.popleft()
+
+            count = len(self._timestamps)
+
+            if count >= self.max_calls:
+                # At limit — wait until the oldest call in window expires
+                wait_time = self._timestamps[0] - cutoff + 1
+                logger.warning(
+                    f"[IG-RateLimit] At limit ({count}/{self.max_calls} calls/hr) — "
+                    f"waiting {wait_time:.0f}s"
+                )
+                # Release lock while sleeping
+                self._lock.release()
+                try:
+                    time.sleep(wait_time)
+                finally:
+                    self._lock.acquire()
+            elif count >= self.max_calls * 0.8:
+                # Approaching limit (80%+) — add progressive delay
+                ratio = count / self.max_calls
+                delay = 2 + (ratio - 0.8) * 40  # 2s at 80%, 10s at 100%
+                logger.info(
+                    f"[IG-RateLimit] Approaching limit ({count}/{self.max_calls} calls/hr) — "
+                    f"adding {delay:.1f}s delay"
+                )
+                self._lock.release()
+                try:
+                    time.sleep(delay)
+                finally:
+                    self._lock.acquire()
+
+            self._timestamps.append(time.time())
+
+    @property
+    def operation_lock(self):
+        """Lock for serializing Instagram operations (main scraper vs paid content)."""
+        return self._operation_lock
+
+    @property
+    def calls_in_window(self):
+        with self._lock:
+            now = time.time()
+            cutoff = now - self.window
+            while self._timestamps and self._timestamps[0] < cutoff:
+                self._timestamps.popleft()
+            return len(self._timestamps)
+
+
+# Module-level singleton
+rate_limiter = _InstagramRateLimiter(max_calls_per_hour=180)
--- a/modules/instagram_repost_detector.py
+++ b/modules/instagram_repost_detector.py
@@ -0,0 +1,782 @@
+#!/usr/bin/env python3
+"""
+Instagram Story Repost Detector Module
+
+Detects when Instagram stories are reposts/screenshots of other users' content,
+then replaces low-quality reposts with high-quality originals from the source.
+
+Features:
+- OCR-based repost detection (@username extraction)
+- ImgInn for downloading both stories and posts
+- Perceptual hash matching for content identification
+- Smart account filtering (monitored vs non-monitored)
+- Automatic cleanup of temporary downloads
+- Database tracking of all replacements
+"""
+
+import os
+import re
+import shutil
+from pathlib import Path
+from typing import Optional, Dict, List, Tuple
+from datetime import datetime, timedelta
+from modules.base_module import LoggingMixin
+from modules.universal_logger import get_logger
+
+# Module-level logger for import-time messages
+_module_logger = get_logger('RepostDetector')
+
+# Optional imports - fail gracefully if not available
+from PIL import Image  # Always needed
+
+# OCR disabled — not currently needed
+EASYOCR_AVAILABLE = False
+TESSERACT_AVAILABLE = False
+
+try:
+    import cv2
+    import numpy as np
+    CV2_AVAILABLE = True
+except ImportError:
+    CV2_AVAILABLE = False
+    _module_logger.warning("opencv-python not available - video processing disabled", module='RepostDetector')
+
+try:
+    import imagehash
+    IMAGEHASH_AVAILABLE = True
+except ImportError:
+    IMAGEHASH_AVAILABLE = False
+    _module_logger.warning("imagehash not available - perceptual hashing disabled", module='RepostDetector')
+
+
+class InstagramRepostDetector(LoggingMixin):
+    """
+    Detects and replaces Instagram story reposts with original content
+    """
+
+    def __init__(self, unified_db, log_callback=None):
+        """
+        Initialize the detector
+
+        Args:
+            unified_db: UnifiedDatabase instance
+            log_callback: Optional logging callback function(message, level)
+        """
+        # Initialize logging via mixin
+        self._init_logger('RepostDetector', log_callback, default_module='RepostDetector')
+
+        self.db = unified_db
+        self.temp_download_path = Path("/tmp/repost_detection")
+        self.last_original_username = None
+        self.easyocr_reader = None
+
+        # Ensure temp directory exists
+        self.temp_download_path.mkdir(parents=True, exist_ok=True)
+
+        # Initialize EasyOCR reader (lazy loading - only when needed)
+        if EASYOCR_AVAILABLE:
+            try:
+                # Suppress PyTorch pin_memory warning (we're using CPU anyway)
+                import warnings
+                warnings.filterwarnings('ignore', category=UserWarning, module='torch.utils.data.dataloader')
+
+                self.easyocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
+                self.log("EasyOCR initialized for text detection", "info")
+            except Exception as e:
+                self.log(f"Failed to initialize EasyOCR: {e}", "warning")
+                self.easyocr_reader = None
+
+        # Check dependencies
+        self._check_dependencies()
+
+    def _check_dependencies(self):
+        """Check if all required dependencies are available"""
+        missing = []
+
+        if not TESSERACT_AVAILABLE:
+            missing.append("pytesseract/PIL (pip3 install pytesseract pillow)")
+        if not CV2_AVAILABLE:
+            missing.append("opencv-python (pip3 install opencv-python)")
+        if not IMAGEHASH_AVAILABLE:
+            missing.append("imagehash (pip3 install imagehash)")
+
+        if missing:
+            self.log(f"Missing dependencies: {', '.join(missing)}", "warning")
+            self.log("Repost detection will be disabled until dependencies are installed", "warning")
+
+    def check_and_replace_repost(self, file_path: str, source_username: str) -> Optional[str]:
+        """
+        Check if story is a repost and replace with original
+
+        Args:
+            file_path: Path to potential repost file
+            source_username: Username who posted this story (e.g., evalongoria)
+
+        Returns:
+            Path to replacement file if found, None otherwise
+        """
+        # Check dependencies
+        if not all([TESSERACT_AVAILABLE, CV2_AVAILABLE, IMAGEHASH_AVAILABLE]):
+            self.log("Cannot process - missing dependencies", "debug")
+            return None
+
+        file_path = Path(file_path)
+        if not file_path.exists():
+            self.log(f"File not found: {file_path}", "error")
+            return None
+
+        self.log(f"Checking for repost: {file_path.name}", "info")
+
+        # Step 1: OCR to extract original @username
+        original_username = self._extract_username_from_repost(str(file_path))
+        if not original_username:
+            self.log(f"No @username detected - not a repost", "debug")
+            return None
+
+        # Check if user is reposting their own content
+        if original_username.lower() == source_username.lower():
+            self.log(f"@{source_username} is reposting their own content - skipping", "debug")
+            return None
+
+        self.log(f"Detected repost from @{original_username} in @{source_username}'s story", "info")
+        self.last_original_username = original_username
+
+        # Step 2: Check if original user is monitored
+        is_monitored = self._is_monitored_account(original_username)
+
+        # Step 3: Always save repost sources permanently (for face recognition + quality)
+        # Even non-monitored accounts get saved - they were discovered via reposts
+        download_path = Path("/opt/immich/md/instagram") / original_username
+        add_to_database = True
+
+        if is_monitored:
+            self.log(f"@{original_username} is monitored - checking existing content", "info")
+        else:
+            self.log(f"@{original_username} NOT monitored - but saving permanently (discovered via repost)", "info")
+
+        # Step 4: Check if we already fetched this user's content today
+        if not self._already_fetched_today(original_username):
+            # Step 5: Download stories + recent posts
+            self.log(f"Downloading content from @{original_username} via ImgInn...", "info")
+            success = self._download_content_via_imginn(
+                username=original_username,
+                destination=download_path,
+                add_to_database=add_to_database
+            )
+
+            if not success:
+                self.log(f"Failed to download content from @{original_username}", "error")
+                return None
+        else:
+            self.log(f"Content from @{original_username} already fetched today - using cache", "info")
+
+        # Step 6: Find matching original via perceptual hash
+        original_file = self._find_matching_original(
+            repost_path=str(file_path),
+            search_dir=download_path
+        )
+
+        if not original_file:
+            self.log(f"No matching original found for {file_path.name}", "warning")
+            # Keep all downloaded files - they'll be processed by move manager (face recognition, etc.)
+            self.log(f"Keeping all downloaded content from @{original_username} for processing", "info")
+            return None
+
+        # Step 7: Replace repost with original
+        replacement = self._replace_repost_with_original(
+            repost_path=str(file_path),
+            original_path=original_file
+        )
+
+        # All files are kept permanently - move manager will process them
+        self.log(f"All content from @{original_username} saved to {download_path}", "info")
+
+        return replacement
+
+    def _extract_username_region(self, img: Image.Image) -> Image.Image:
+        """Extract just the username region (top-left) and scale up for better OCR"""
+        if not CV2_AVAILABLE:
+            # Fallback: just crop using PIL
+            width, height = img.size
+            # Crop top 8% of image where username appears
+            return img.crop((0, 0, width, int(height * 0.08)))
+
+        try:
+            # Convert PIL to OpenCV format
+            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+
+            # Instagram usernames appear in top-left corner
+            # Crop to top 8% where username text is located
+            height, width = img_cv.shape[:2]
+            username_region = img_cv[0:int(height * 0.08), :]
+
+            # Convert to grayscale for better OCR
+            gray = cv2.cvtColor(username_region, cv2.COLOR_BGR2GRAY)
+
+            # Scale up 4x for better OCR on small text
+            # Instagram story usernames are quite small
+            scaled = cv2.resize(gray, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC)
+
+            # Convert back to PIL
+            return Image.fromarray(scaled)
+        except Exception as e:
+            self.log(f"Username region extraction failed: {e}", "debug")
+            return img
+
+    def _extract_username_from_repost(self, file_path: str) -> Optional[str]:
+        """
+        Extract @username from repost overlay using OCR (EasyOCR primary, Tesseract fallback)
+
+        Handles both images and videos (multi-frame extraction for videos)
+        """
+        # Check if we have any OCR available
+        if not self.easyocr_reader and not TESSERACT_AVAILABLE:
+            self.log("No OCR engine available", "warning")
+            return None
+
+        try:
+            # For images: Use EasyOCR (much better than Tesseract for Instagram overlays)
+            if file_path.endswith(('.jpg', '.jpeg', '.png', '.webp', '.heic')):
+
+                # Try EasyOCR first (best for Instagram stories)
+                if self.easyocr_reader:
+                    try:
+                        results = self.easyocr_reader.readtext(file_path)
+
+                        # EasyOCR returns list of (bbox, text, confidence)
+                        all_text = []
+                        for bbox, text, conf in results:
+                            if conf > 0.5:  # Only use detections with >50% confidence
+                                all_text.append(text)
+
+                        text = " ".join(all_text)
+                        if text.strip():
+                            self.log(f"EasyOCR detected text: {text[:100]}", "debug")
+                    except Exception as e:
+                        self.log(f"EasyOCR failed: {e}, falling back to Tesseract", "debug")
+                        text = ""
+                else:
+                    text = ""
+
+                # Fallback to Tesseract if EasyOCR didn't find anything
+                if not text.strip() and TESSERACT_AVAILABLE:
+                    with Image.open(file_path) as img:
+                        username_region = self._extract_username_region(img)
+
+                        for config in ['--psm 7', '--psm 11', '--psm 6']:
+                            try:
+                                ocr_result = pytesseract.image_to_string(username_region, config=config)
+                                if ocr_result and len(ocr_result.strip()) > 2:
+                                    text = ocr_result
+                                    self.log(f"Tesseract (fallback) text: {text[:100]}", "debug")
+                                    break
+                            except Exception:
+                                continue
+
+            # For videos: extract multiple frames and OCR each
+            elif file_path.endswith(('.mp4', '.mov', '.avi', '.mkv', '.webm')):
+                if not CV2_AVAILABLE:
+                    self.log("OpenCV not available - cannot process video", "warning")
+                    return None
+
+                video = cv2.VideoCapture(file_path)
+                if not video.isOpened():
+                    self.log(f"Failed to open video: {file_path}", "warning")
+                    return None
+
+                frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+                if frame_count == 0:
+                    self.log(f"Video has no frames: {file_path}", "warning")
+                    return None
+
+                # Check frames at 0%, 10%, and 50% positions
+                frames_to_check = [
+                    0,
+                    max(0, int(frame_count * 0.1)),
+                    max(0, int(frame_count * 0.5))
+                ]
+
+                text = ""
+                for frame_num in frames_to_check:
+                    video.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
+                    ret, frame = video.read()
+                    if ret:
+                        # Try EasyOCR first
+                        if self.easyocr_reader:
+                            try:
+                                results = self.easyocr_reader.readtext(frame)
+                                for bbox, frame_text, conf in results:
+                                    if conf > 0.5:
+                                        text += frame_text + " "
+                            except Exception as e:
+                                self.log(f"EasyOCR video frame failed: {e}", "debug")
+
+                        # Fallback to Tesseract if needed
+                        if not text.strip() and TESSERACT_AVAILABLE:
+                            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                            img = Image.fromarray(frame_rgb)
+                            username_region = self._extract_username_region(img)
+
+                            for config in ['--psm 7', '--psm 11', '--psm 6']:
+                                try:
+                                    ocr_result = pytesseract.image_to_string(username_region, config=config)
+                                    if ocr_result and len(ocr_result.strip()) > 2:
+                                        text += ocr_result + "\n"
+                                        break
+                                except Exception:
+                                    continue
+
+                video.release()
+                self.log(f"OCR text (video, {len(frames_to_check)} frames): {text[:100]}...", "debug")
+            else:
+                self.log(f"Unsupported file format: {file_path}", "debug")
+                return None
+
+            # Parse text to find @username or just username
+            # Pattern 1: @ followed by username characters (including space which might be underscore)
+            # Instagram usernames can have underscores, but OCR sometimes reads them as spaces
+            matches = re.findall(r'@([a-zA-Z0-9._ ]+)', text)
+
+            if matches:
+                # Clean up: remove trailing spaces, convert spaces to underscores
+                username = matches[0].strip().replace(' ', '_')
+                # Remove any characters that aren't valid in Instagram usernames
+                username = re.sub(r'[^a-zA-Z0-9._]', '', username)
+                # Remove trailing dots/underscores
+                username = username.rstrip('._')
+
+                if len(username) >= 3:  # Valid Instagram username minimum
+                    self.log(f"Extracted username (with @): @{username}", "info")
+                    return username
+
+            # Pattern 2: Instagram username without @ (at least 3 chars, lowercase letters, numbers, dots, underscores)
+            # Filter out common OCR noise and make sure it's a valid Instagram username pattern
+            lines = text.split('\n')
+            for line in lines:
+                line = line.strip().lower()
+                # Match Instagram username pattern: 3-30 chars, alphanumeric + dots/underscores
+                if re.match(r'^[a-z0-9._]{3,30}$', line):
+                    # Additional filter: likely an Instagram username (not random text)
+                    # Instagram usernames don't end with dots and must contain letters
+                    if not line.endswith('.') and re.search(r'[a-z]', line):
+                        self.log(f"Extracted username (without @): @{line}", "info")
+                        return line
+
+        except Exception as e:
+            self.log(f"OCR extraction failed: {e}", "warning")
+
+        return None
+
+    def _is_monitored_account(self, username: str) -> bool:
+        """
+        Check if username is in search_monitors
+
+        Returns True if user is being actively monitored for downloads
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Check search_monitors table
+                cursor.execute("""
+                    SELECT 1 FROM search_monitors
+                    WHERE platform IN ('instagram', 'instaloader', 'fastdl', 'imginn')
+                    AND source = ?
+                    AND active = 1
+                    LIMIT 1
+                """, (username,))
+
+                return cursor.fetchone() is not None
+
+        except Exception as e:
+            self.log(f"Error checking monitored status: {e}", "error")
+            return False
+
+    def _already_fetched_today(self, username: str) -> bool:
+        """
+        Check if we already downloaded this user's content today
+
+        Uses repost_fetch_cache table to track fetches
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Create cache table if doesn't exist
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS repost_fetch_cache (
+                        username TEXT PRIMARY KEY,
+                        last_fetched TEXT NOT NULL,
+                        content_count INTEGER DEFAULT 0
+                    )
+                """)
+
+                # Check if fetched in last 12 hours
+                cursor.execute("""
+                    SELECT last_fetched FROM repost_fetch_cache
+                    WHERE username = ?
+                    AND datetime(last_fetched) > datetime('now', '-12 hours')
+                """, (username,))
+
+                result = cursor.fetchone()
+                return result is not None
+
+        except Exception as e:
+            self.log(f"Error checking fetch cache: {e}", "error")
+            return False
+
+    def _mark_fetched(self, username: str, content_count: int = 0):
+        """Mark that we fetched this user's content"""
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+
+                cursor.execute("""
+                    INSERT OR REPLACE INTO repost_fetch_cache
+                    (username, last_fetched, content_count)
+                    VALUES (?, ?, ?)
+                """, (username, datetime.now().isoformat(), content_count))
+
+                self.log(f"Marked @{username} as fetched ({content_count} items)", "debug")
+
+        except Exception as e:
+            self.log(f"Error marking fetch: {e}", "error")
+
+    def _download_content_via_imginn(self, username: str, destination: Path, add_to_database: bool) -> bool:
+        """
+        Download stories AND recent posts from user via ImgInn
+
+        Args:
+            username: Instagram username
+            destination: Where to save (normal path or /tmp)
+            add_to_database: If False, skip database recording (temp processing)
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Import imginn module
+            from modules.imginn_module import ImgInnDownloader
+
+            # Initialize ImgInn with or without database
+            imginn = ImgInnDownloader(
+                unified_db=self.db if add_to_database else None,
+                log_callback=lambda msg, lvl: self.log(msg, lvl)
+            )
+
+            # Create destination directories
+            stories_dir = destination / "stories"
+            posts_dir = destination / "posts"
+            stories_dir.mkdir(parents=True, exist_ok=True)
+            posts_dir.mkdir(parents=True, exist_ok=True)
+
+            # Download stories
+            self.log(f"Downloading stories from @{username} via ImgInn...", "info")
+            stories_files = imginn.download_stories(
+                username=username,
+                max_stories=50,
+                output_dir=stories_dir,
+                skip_database=not add_to_database
+            )
+            stories_count = len(stories_files) if isinstance(stories_files, list) else 0
+
+            # Download recent posts (last 24 hours)
+            self.log(f"Downloading recent posts from @{username} via ImgInn...", "info")
+            posts_files = imginn.download_posts(
+                username=username,
+                max_posts=50,
+                output_dir=posts_dir,
+                max_age_hours=24,
+                skip_database=not add_to_database
+            )
+            posts_count = len(posts_files) if isinstance(posts_files, list) else 0
+
+            total_count = stories_count + posts_count
+            self.log(f"Downloaded {total_count} items ({stories_count} stories, {posts_count} posts)", "info")
+
+            # Mark this fetch in cache
+            self._mark_fetched(username, total_count)
+
+            return total_count > 0
+
+        except Exception as e:
+            self.log(f"ImgInn download failed: {e}", "error")
+            return False
+
+    def _find_matching_original(self, repost_path: str, search_dir: Path) -> Optional[str]:
+        """
+        Find matching original content using perceptual hashing
+
+        Searches both stories/ and posts/ subdirectories
+
+        Args:
+            repost_path: Path to the repost file (e.g., evalongoria story)
+            search_dir: Directory to search (e.g., /tmp/.../globalgiftfoundation/)
+
+        Returns:
+            Path to best matching original, or None
+        """
+        if not IMAGEHASH_AVAILABLE:
+            self.log("imagehash not available - cannot match", "warning")
+            return None
+
+        # Calculate hash of repost
+        repost_hash = self._get_perceptual_hash(repost_path)
+        if not repost_hash:
+            self.log(f"Failed to calculate hash for repost: {repost_path}", "warning")
+            return None
+
+        self.log(f"Repost hash: {repost_hash}", "debug")
+
+        # Search both stories and posts
+        best_match = None
+        best_distance = 999
+        threshold = 10  # Hamming distance threshold (0-64 scale)
+
+        for subdir in ["stories", "posts"]:
+            content_dir = search_dir / subdir
+            if not content_dir.exists():
+                self.log(f"Directory not found: {content_dir}", "debug")
+                continue
+
+            files = list(content_dir.rglob("*"))
+            self.log(f"Checking {len(files)} files in {content_dir}", "debug")
+
+            for file_path in files:
+                if not file_path.is_file():
+                    continue
+
+                # Skip non-media files
+                if file_path.suffix.lower() not in ['.jpg', '.jpeg', '.png', '.mp4', '.mov', '.avi', '.webp']:
+                    continue
+
+                # Calculate hash
+                candidate_hash = self._get_perceptual_hash(str(file_path))
+                if not candidate_hash:
+                    continue
+
+                # Compare (Hamming distance)
+                distance = repost_hash - candidate_hash
+
+                self.log(f"  {file_path.name}: distance={distance}", "debug")
+
+                if distance < threshold and distance < best_distance:
+                    best_distance = distance
+                    best_match = str(file_path)
+                    self.log(f"Better match found: {file_path.name} (distance: {distance})", "info")
+
+        if best_match:
+            self.log(f"✓ Found original: {Path(best_match).name} (distance: {best_distance})", "success")
+            return best_match
+        else:
+            self.log(f"✗ No matching original found for {Path(repost_path).name}", "warning")
+            return None
+
+    def _get_perceptual_hash(self, file_path: str):
+        """Calculate perceptual hash for image or video"""
+        if not IMAGEHASH_AVAILABLE:
+            return None
+
+        try:
+            # Image: direct hash
+            if file_path.endswith(('.jpg', '.jpeg', '.png', '.webp', '.heic')):
+                with Image.open(file_path) as img:
+                    return imagehash.dhash(img)  # Difference hash (good for cropped/resized)
+
+            # Video: hash middle frame
+            elif file_path.endswith(('.mp4', '.mov', '.avi', '.mkv', '.webm')):
+                if not CV2_AVAILABLE:
+                    return None
+
+                video = cv2.VideoCapture(file_path)
+                if not video.isOpened():
+                    return None
+
+                frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+                if frame_count == 0:
+                    video.release()
+                    return None
+
+                # Use middle frame
+                video.set(cv2.CAP_PROP_POS_FRAMES, frame_count // 2)
+                ret, frame = video.read()
+
+                video.release()
+
+                if ret:
+                    # Convert BGR to RGB
+                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    img = Image.fromarray(frame_rgb)
+                    result = imagehash.dhash(img)
+                    img.close()
+                    return result
+
+        except Exception as e:
+            self.log(f"Hash calculation failed for {Path(file_path).name}: {e}", "debug")
+
+        return None
+
+    def _replace_repost_with_original(self, repost_path: str, original_path: str) -> str:
+        """
+        Replace repost file with original high-quality file
+
+        Workflow:
+        1. Move repost to recycle bin (preserves it, not deleted)
+        2. Return path to ORIGINAL file with its original filename/metadata
+        3. Move module processes original as if it was downloaded directly
+
+        Args:
+            repost_path: Path to repost (e.g., evalongoria_story6.mp4)
+            original_path: Path to original (e.g., globalgiftfoundation_20251109_100000.jpg)
+
+        Returns:
+            Path to original file (keeps original filename and metadata)
+        """
+        import os
+        repost_file = Path(repost_path)
+        original_file = Path(original_path)
+
+        # Move repost to recycle bin (not delete - can recover if mistake)
+        if self.db:
+            try:
+                recycle_id = self.db.move_to_recycle_bin(
+                    file_path=str(repost_file),
+                    deleted_from='repost_detection',
+                    deleted_by='system',
+                    metadata={
+                        'reason': 'replaced_with_original',
+                        'original_source': str(original_file),
+                        'original_username': self.last_original_username
+                    }
+                )
+                if recycle_id:
+                    self.log(f"Moved repost to recycle bin: {repost_file.name} (ID: {recycle_id[:8]}...)", "info")
+                else:
+                    self.log(f"Failed to move repost to recycle bin, will delete instead", "warning")
+                    # Fallback: delete if recycle bin fails
+                    repost_file.unlink()
+            except Exception as e:
+                self.log(f"Recycle bin failed: {e}, deleting repost", "warning")
+                try:
+                    repost_file.unlink()
+                except Exception:
+                    pass
+        else:
+            # No database - just delete
+            try:
+                repost_file.unlink()
+                self.log(f"Deleted repost: {repost_file.name}", "debug")
+            except Exception as e:
+                self.log(f"Failed to delete repost: {e}", "warning")
+
+        # Return path to ORIGINAL file with its original filename and metadata
+        # Move module will process it as if it was downloaded directly from the original source
+        self.log(f"Replacing repost with original: {repost_file.name} → {original_file.name}", "info")
+
+        # Update database to track replacement
+        self._record_repost_replacement(
+            repost_path=str(repost_file),
+            original_path=str(original_file),
+            replacement_path=str(original_file)  # Same as original - keeps original filename
+        )
+
+        return str(original_file)
+
+    def _record_repost_replacement(self, repost_path: str, original_path: str, replacement_path: str):
+        """
+        Track repost replacements in database
+
+        Creates repost_replacements table to track what was replaced
+        """
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+
+                # Create tracking table
+                cursor.execute("""
+                    CREATE TABLE IF NOT EXISTS repost_replacements (
+                        id INTEGER PRIMARY KEY AUTOINCREMENT,
+                        repost_path TEXT NOT NULL,
+                        repost_filename TEXT NOT NULL,
+                        repost_source TEXT NOT NULL,
+                        original_path TEXT NOT NULL,
+                        original_username TEXT NOT NULL,
+                        replacement_path TEXT NOT NULL,
+                        detected_at TEXT DEFAULT CURRENT_TIMESTAMP,
+                        hash_distance INTEGER
+                    )
+                """)
+
+                # Extract usernames
+                repost_source = Path(repost_path).parent.name
+                original_username = self.last_original_username or "unknown"
+
+                # Insert record
+                cursor.execute("""
+                    INSERT INTO repost_replacements
+                    (repost_path, repost_filename, repost_source, original_path, original_username, replacement_path)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                """, (
+                    repost_path,
+                    Path(repost_path).name,
+                    repost_source,
+                    original_path,
+                    original_username,
+                    replacement_path
+                ))
+
+                self.log(f"Recorded replacement: {repost_source} → @{original_username}", "debug")
+
+        except Exception as e:
+            self.log(f"Failed to record replacement: {e}", "error")
+
+    def _cleanup_temp_downloads(self, temp_dir: Path, keep_file: str = None):
+        """
+        Clean up temporary downloads for non-monitored accounts
+
+        Args:
+            temp_dir: Directory to clean (e.g., /tmp/repost_detection/username/)
+            keep_file: Optional file to preserve (the matched original)
+        """
+        if not temp_dir.exists():
+            return
+
+        keep_path = Path(keep_file) if keep_file else None
+        deleted_count = 0
+
+        try:
+            # Delete all files except the keeper
+            for file_path in temp_dir.rglob("*"):
+                if not file_path.is_file():
+                    continue
+
+                if keep_path and file_path == keep_path:
+                    continue  # Skip the matched file
+
+                try:
+                    file_path.unlink()
+                    deleted_count += 1
+                except Exception as e:
+                    self.log(f"Failed to delete temp file {file_path.name}: {e}", "debug")
+
+            # Remove empty directories
+            for subdir in [temp_dir / "stories", temp_dir / "posts"]:
+                if subdir.exists() and not any(subdir.iterdir()):
+                    subdir.rmdir()
+
+            if not any(temp_dir.iterdir()):
+                temp_dir.rmdir()
+
+            self.log(f"Cleaned up {deleted_count} temporary files", "info")
+
+        except Exception as e:
+            self.log(f"Failed to cleanup directories: {e}", "debug")
+
+
+if __name__ == "__main__":
+    print("Instagram Repost Detector Module")
+    print("This module should be imported, not run directly")
+    print("\nDependencies:")
+    print(f"  - pytesseract/PIL: {'✓' if TESSERACT_AVAILABLE else '✗'}")
+    print(f"  - opencv-python: {'✓' if CV2_AVAILABLE else '✗'}")
+    print(f"  - imagehash: {'✓' if IMAGEHASH_AVAILABLE else '✗'}")
--- a/modules/instagram_utils.py
+++ b/modules/instagram_utils.py
@@ -0,0 +1,461 @@
+#!/usr/bin/env python3
+"""
+Instagram Utilities Module
+
+Shared utility functions for Instagram downloaders (imginn, fastdl, toolzu, instaloader).
+Centralizes common functionality like media ID extraction to avoid code duplication.
+"""
+
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, Set, Dict, Any
+
+
+def extract_instagram_media_id(filename_or_id: str) -> str:
+    """Extract the actual Instagram media ID from a filename or ID string.
+
+    Instagram image filenames follow the pattern:
+    {user_id}_{media_id}_{post_id}_n.ext
+    Where media_id is a 17-18 digit number starting with 18xxxxxxx
+
+    For video stories with AQ... format, these are story keys and
+    we use the whole key as the media ID.
+
+    Args:
+        filename_or_id: A filename like '591164014_18551181784006538_2284814566270897032_n'
+                       or just a media ID string
+
+    Returns:
+        The extracted Instagram media ID (17-18 digit number) or the original string
+        if no pattern matches
+
+    Examples:
+        >>> extract_instagram_media_id('591164014_18551181784006538_2284814566270897032_n')
+        '18551181784006538'
+        >>> extract_instagram_media_id('18551181784006538')
+        '18551181784006538'
+        >>> extract_instagram_media_id('AQOOlj6M4PlGHBuYl02KzwUXefsdiou9q3ooFiNF4cUy3DEY6QKxROoUe9BKCeVJA4UF5BiVPIuqXheCU')
+        'AQOOlj6M4PlGHBuYl02KzwUXefsdiou9q3ooFiNF4cUy3DEY6QKxROoUe9BKCeVJA4UF5BiVPIuqXheCU'
+    """
+    if not filename_or_id:
+        return filename_or_id
+
+    # Pattern 1: Standard Instagram image format with underscore separators
+    # {user_id}_{media_id}_{post_id}_n
+    # Media ID is the 17-18 digit number starting with 18
+    # Use underscore or start/end as boundaries (not \b which doesn't work with underscores)
+    ig_media_id_pattern = r'(?:^|_)(18\d{15,17})(?:_|$)'
+    match = re.search(ig_media_id_pattern, filename_or_id)
+    if match:
+        return match.group(1)
+
+    # Pattern 2: If it's already a valid media ID (17-18 digits starting with 18)
+    if re.match(r'^18\d{15,17}$', filename_or_id):
+        return filename_or_id
+
+    # Pattern 3: Story key format (AQ... encoded string) - use as-is
+    if filename_or_id.startswith('AQ') and len(filename_or_id) > 50:
+        return filename_or_id
+
+    # Pattern 4: Short post code format (like DRkaDSFD-U2) - use as-is
+    if re.match(r'^[A-Za-z0-9_-]{10,15}$', filename_or_id):
+        return filename_or_id
+
+    # No pattern matched - return original string
+    return filename_or_id
+
+
+def extract_media_id_from_url(url: str) -> Optional[str]:
+    """Extract Instagram media ID from a CDN URL.
+
+    Instagram CDN URLs contain media IDs in patterns like:
+    561378837_18538674661006538_479694548187839800_n.jpg
+
+    The second number (18538674661006538) is the Instagram media ID.
+
+    Args:
+        url: Instagram CDN URL string
+
+    Returns:
+        Media ID string or None if not found
+    """
+    if not url:
+        return None
+
+    # Pattern: number_MEDIAID_number_n.jpg or .mp4
+    pattern = r'(\d+)_(\d{17,19})_\d+_n\.(jpg|mp4|jpeg|png)'
+    match = re.search(pattern, url)
+    if match:
+        return match.group(2)  # Return the media ID
+
+    return None
+
+
+def extract_media_ids_from_url(url: str) -> list:
+    """Extract all Instagram media IDs from a URL.
+
+    Similar to extract_media_id_from_url but returns all matches as a list.
+
+    Args:
+        url: URL string that may contain Instagram media IDs
+
+    Returns:
+        List of media IDs found in the URL
+    """
+    if not url:
+        return []
+
+    # Pattern: number_MEDIAID_number_n.jpg
+    pattern = r'(\d+)_(\d{17,19})_\d+_n\.(jpg|mp4|jpeg|png)'
+    matches = re.findall(pattern, url)
+
+    if matches:
+        # Return the media ID (second capture group) from each match
+        return [match[1] for match in matches]
+
+    return []
+
+
+def extract_post_shortcode(url: str) -> Optional[str]:
+    """Extract Instagram post shortcode from a URL.
+
+    Args:
+        url: Instagram URL like https://www.instagram.com/p/ABC123/
+
+    Returns:
+        Shortcode string or None if not found
+    """
+    if not url:
+        return None
+
+    match = re.search(r'/p/([^/]+)/?', url)
+    if match:
+        return match.group(1)
+
+    return None
+
+
+def media_id_to_shortcode(media_id: str) -> str:
+    """Convert Instagram media ID to shortcode.
+
+    Args:
+        media_id: Numeric media ID string
+
+    Returns:
+        Instagram shortcode string
+    """
+    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
+
+    try:
+        media_id_int = int(media_id)
+    except (ValueError, TypeError):
+        return media_id  # Return as-is if not a valid number
+
+    shortcode = ''
+    while media_id_int > 0:
+        remainder = media_id_int % 64
+        media_id_int = media_id_int // 64
+        shortcode = alphabet[remainder] + shortcode
+
+    return shortcode or 'A'
+
+
+def scan_existing_files_for_media_ids(output_dir: Path, profile_name: str = None,
+                                      min_file_size: int = 0, recursive: bool = True) -> Set[str]:
+    """Scan existing files and extract media IDs for duplicate detection.
+
+    Scans image and video files in the output directory, extracts both the
+    full media ID string and the normalized Instagram media ID (18-digit number).
+
+    Args:
+        output_dir: Directory to scan for existing files
+        profile_name: Optional profile name to filter files
+        min_file_size: Minimum file size in bytes (skip smaller files as corrupted)
+        recursive: If True, search subdirectories (rglob), otherwise only top level (glob)
+
+    Returns:
+        Set of media IDs (both full and normalized) found in existing files
+    """
+    media_ids = set()
+
+    if not output_dir.exists():
+        return media_ids
+
+    glob_func = output_dir.rglob if recursive else output_dir.glob
+
+    for pattern in ["*.jpg", "*.jpeg", "*.png", "*.heic", "*.mp4", "*.mov"]:
+        for filepath in glob_func(pattern):
+            # Skip files smaller than min_file_size (likely corrupted/incomplete)
+            if min_file_size > 0:
+                try:
+                    if filepath.stat().st_size < min_file_size:
+                        continue
+                except OSError:
+                    continue
+
+            filename = filepath.stem
+
+            # Format is: profile_YYYYMMDD_HHMMSS_mediaid
+            # Split into parts: [profile, date, time, ...rest is media_id]
+            parts = filename.split('_', 3)
+
+            if len(parts) >= 4:
+                # Check profile name if provided
+                if profile_name and parts[0] != profile_name:
+                    continue
+                media_id_full = parts[3]
+            elif len(parts) > 1:
+                media_id_full = parts[-1]
+            else:
+                media_id_full = filename
+
+            if media_id_full:
+                # Add the full media ID string
+                media_ids.add(media_id_full)
+
+                # Also add the normalized Instagram media ID (18-digit number)
+                normalized_id = extract_instagram_media_id(media_id_full)
+                if normalized_id and normalized_id != media_id_full:
+                    media_ids.add(normalized_id)
+
+    return media_ids
+
+
+def parse_instagram_filename(filename: str) -> dict:
+    """Parse an Instagram filename into its components.
+
+    Args:
+        filename: Filename like 'evalongoria_20251205_120406_591164014_18551181784006538_2284814566270897032_n_story1.jpg'
+
+    Returns:
+        Dictionary with parsed components:
+        - username: str or None
+        - date: str or None (YYYYMMDD format)
+        - time: str or None (HHMMSS format)
+        - media_id_full: str or None (full ID after date/time)
+        - media_id: str or None (normalized 18-digit Instagram media ID)
+        - suffix: str or None (e.g., 'story1')
+        - extension: str or None
+    """
+    result = {
+        'username': None,
+        'date': None,
+        'time': None,
+        'media_id_full': None,
+        'media_id': None,
+        'suffix': None,
+        'extension': None
+    }
+
+    if not filename:
+        return result
+
+    # Get extension
+    path = Path(filename)
+    result['extension'] = path.suffix.lower() if path.suffix else None
+    basename = path.stem
+
+    # Split into parts
+    parts = basename.split('_')
+
+    if len(parts) >= 4:
+        result['username'] = parts[0]
+
+        # Check if parts[1] and parts[2] look like date/time
+        if len(parts[1]) == 8 and parts[1].isdigit():
+            result['date'] = parts[1]
+        if len(parts[2]) == 6 and parts[2].isdigit():
+            result['time'] = parts[2]
+
+        # Everything after date/time is the media ID (possibly with suffix)
+        media_id_full = '_'.join(parts[3:])
+        result['media_id_full'] = media_id_full
+
+        # Check for story suffix
+        if '_story' in media_id_full:
+            media_part, suffix_part = media_id_full.rsplit('_story', 1)
+            result['media_id_full'] = media_part
+            result['suffix'] = f'story{suffix_part}'
+
+        # Extract normalized media ID
+        result['media_id'] = extract_instagram_media_id(result['media_id_full'])
+
+    return result
+
+
+def record_instagram_download(db, media_id: str, username: str, content_type: str,
+                              filename: str, url: str = None, download_url: str = None,
+                              post_date: datetime = None, file_path: str = None,
+                              method: str = None, extra_metadata: Dict[str, Any] = None) -> bool:
+    """Record an Instagram download in the database with normalized media_id.
+
+    This is the centralized function for recording Instagram downloads across all
+    Instagram downloader modules (imginn, fastdl, toolzu, instaloader). It ensures
+    the media_id is always normalized for cross-module duplicate detection.
+
+    Args:
+        db: Database instance (UnifiedDatabase or adapter with record_download method)
+        media_id: The media ID (will be normalized automatically)
+        username: Instagram username
+        content_type: Type of content (posts, stories, reels, highlights)
+        filename: Filename of the downloaded file
+        url: Original Instagram URL (e.g., https://instagram.com/p/ABC123/)
+        download_url: Direct download URL (CDN URL)
+        post_date: Post date/time
+        file_path: Full file path on disk
+        method: Download method (imginn, fastdl, toolzu, instaloader)
+        extra_metadata: Additional metadata to include
+
+    Returns:
+        True if successfully recorded, False otherwise
+    """
+    if not db:
+        return False
+
+    # Normalize the media_id for consistent cross-module detection
+    normalized_media_id = extract_instagram_media_id(media_id) if media_id else media_id
+
+    # Build metadata with normalized media_id
+    metadata = {
+        'media_id': normalized_media_id,
+        'original_media_id': media_id if media_id != normalized_media_id else None,
+    }
+
+    # Add extra metadata if provided
+    if extra_metadata:
+        metadata.update(extra_metadata)
+
+    # Remove None values
+    metadata = {k: v for k, v in metadata.items() if v is not None}
+
+    # Determine URL for database (use download_url or construct from media_id)
+    db_url = url or download_url or f"instagram://{normalized_media_id}"
+
+    # Calculate file hash if file_path provided
+    file_hash = None
+    if file_path:
+        try:
+            from modules.unified_database import UnifiedDatabase
+            file_hash = UnifiedDatabase.get_file_hash(file_path)
+        except Exception:
+            pass
+
+    try:
+        # Try to use the db's record_download method directly
+        if hasattr(db, 'record_download'):
+            return db.record_download(
+                url=db_url,
+                platform='instagram',
+                source=username,
+                content_type=content_type,
+                filename=filename,
+                file_path=file_path,
+                file_hash=file_hash,
+                post_date=post_date,
+                metadata=metadata,
+                method=method
+            )
+        # Fallback for adapter-style databases
+        elif hasattr(db, 'mark_downloaded'):
+            return db.mark_downloaded(
+                username=username,
+                url=db_url,
+                filename=filename,
+                post_date=post_date,
+                metadata=metadata,
+                file_path=file_path,
+                content_type=content_type
+            )
+        else:
+            return False
+    except Exception:
+        return False
+
+
+def is_instagram_downloaded(db, media_id: str, username: str = None) -> bool:
+    """Check if Instagram content is already downloaded by media_id.
+
+    Checks for both the original and normalized media_id to ensure cross-module
+    duplicate detection works correctly.
+
+    Args:
+        db: Database instance (UnifiedDatabase or adapter)
+        media_id: The media ID to check (will check both original and normalized)
+        username: Optional username to scope the check
+
+    Returns:
+        True if already downloaded, False otherwise
+    """
+    if not db or not media_id:
+        return False
+
+    # Normalize the media_id
+    normalized_media_id = extract_instagram_media_id(media_id)
+
+    # Check if this looks like a shortcode (10-15 alphanumeric chars, no 18xxx pattern)
+    is_shortcode = (normalized_media_id == media_id and
+                    re.match(r'^[A-Za-z0-9_-]{10,15}$', media_id) and
+                    not re.match(r'^18\d{15,17}$', media_id))
+
+    try:
+        # Check if db has get_connection (UnifiedDatabase) - query directly
+        if hasattr(db, 'get_connection'):
+            with db.get_connection() as conn:
+                cursor = conn.cursor()
+                # Check both normalized and original media_id
+                # Also verify file_path is set (download was actually completed)
+                if normalized_media_id != media_id:
+                    cursor.execute('''
+                        SELECT 1 FROM downloads
+                        WHERE platform = 'instagram'
+                        AND (media_id = ? OR media_id = ?)
+                        AND file_path IS NOT NULL AND file_path != ''
+                        LIMIT 1
+                    ''', (normalized_media_id, media_id))
+                else:
+                    cursor.execute('''
+                        SELECT 1 FROM downloads
+                        WHERE platform = 'instagram'
+                        AND media_id = ?
+                        AND file_path IS NOT NULL AND file_path != ''
+                        LIMIT 1
+                    ''', (normalized_media_id,))
+                if cursor.fetchone() is not None:
+                    return True
+
+                # For shortcodes, also check the metadata JSON column
+                if is_shortcode:
+                    cursor.execute('''
+                        SELECT 1 FROM downloads
+                        WHERE platform = 'instagram'
+                        AND metadata LIKE ?
+                        AND file_path IS NOT NULL AND file_path != ''
+                        LIMIT 1
+                    ''', (f'%"shortcode": "{media_id}"%',))
+                    if cursor.fetchone() is not None:
+                        return True
+
+                # Check recycle bin — files previously downloaded then deleted
+                # should not be re-downloaded
+                cursor.execute('''
+                    SELECT 1 FROM recycle_bin
+                    WHERE original_filename LIKE ?
+                    LIMIT 1
+                ''', (f'%{normalized_media_id}%',))
+                if cursor.fetchone() is not None:
+                    return True
+
+                return False
+
+        # Fallback for adapters with is_already_downloaded method
+        elif hasattr(db, 'is_already_downloaded'):
+            if db.is_already_downloaded(normalized_media_id):
+                return True
+            # Also check original if different
+            if normalized_media_id != media_id and db.is_already_downloaded(media_id):
+                return True
+
+        return False
+    except Exception:
+        return False
--- a/modules/instaloader_module.py
+++ b/modules/instaloader_module.py
--- a/modules/media_identifier.py
+++ b/modules/media_identifier.py
@@ -0,0 +1,535 @@
+"""
+Media Identifier Module
+
+Parses media filenames using guessit and matches them against TMDB for metadata enrichment.
+Generates organized file paths for TV Shows and Movies.
+"""
+
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+
+from modules.universal_logger import get_logger
+
+logger = get_logger('MediaIdentifier')
+
+# Try to import guessit, but gracefully handle if not installed
+try:
+    import guessit
+    GUESSIT_AVAILABLE = True
+except ImportError:
+    GUESSIT_AVAILABLE = False
+    logger.warning("guessit not installed - filename parsing will be limited")
+
+
+@dataclass
+class ParsedMedia:
+    """Represents parsed media information from a filename."""
+    title: str
+    media_type: str  # 'movie' or 'episode' (TV)
+    year: Optional[int] = None
+    season: Optional[int] = None
+    episode: Optional[int] = None
+    quality: Optional[str] = None
+    source: Optional[str] = None
+    codec: Optional[str] = None
+    release_group: Optional[str] = None
+    original_filename: str = ""
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'title': self.title,
+            'media_type': self.media_type,
+            'year': self.year,
+            'season': self.season,
+            'episode': self.episode,
+            'quality': self.quality,
+            'source': self.source,
+            'codec': self.codec,
+            'release_group': self.release_group,
+            'original_filename': self.original_filename,
+        }
+
+
+@dataclass
+class TMDBMatch:
+    """Represents a TMDB match for parsed media."""
+    tmdb_id: int
+    title: str
+    original_title: Optional[str]
+    media_type: str  # 'movie' or 'tv'
+    year: Optional[int] = None
+    poster_path: Optional[str] = None
+    overview: Optional[str] = None
+    # For TV episodes
+    season_number: Optional[int] = None
+    episode_number: Optional[int] = None
+    episode_title: Optional[str] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'tmdb_id': self.tmdb_id,
+            'title': self.title,
+            'original_title': self.original_title,
+            'media_type': self.media_type,
+            'year': self.year,
+            'poster_path': self.poster_path,
+            'overview': self.overview,
+            'season_number': self.season_number,
+            'episode_number': self.episode_number,
+            'episode_title': self.episode_title,
+        }
+
+
+class MediaIdentifier:
+    """
+    Identifies media from filenames and matches against TMDB.
+
+    Uses guessit for filename parsing and TMDB API for metadata enrichment.
+    """
+
+    TMDB_BASE_URL = "https://api.themoviedb.org/3"
+    TMDB_IMAGE_BASE = "https://image.tmdb.org/t/p/w500"
+
+    # Quality normalization patterns
+    QUALITY_MAP = {
+        '2160p': '2160p',
+        '4k': '2160p',
+        'uhd': '2160p',
+        '1080p': '1080p',
+        'fullhd': '1080p',
+        'fhd': '1080p',
+        '720p': '720p',
+        'hd': '720p',
+        '480p': '480p',
+        'sd': '480p',
+        '360p': '360p',
+    }
+
+    def __init__(self, tmdb_api_key: str):
+        """
+        Initialize the MediaIdentifier.
+
+        Args:
+            tmdb_api_key: TMDB API key for lookups
+        """
+        self.api_key = tmdb_api_key
+        self.session = requests.Session()
+
+    def parse_filename(self, filename: str) -> Optional[ParsedMedia]:
+        """
+        Parse a media filename to extract metadata.
+
+        Args:
+            filename: The filename to parse (without path)
+
+        Returns:
+            ParsedMedia object with extracted information, or None if parsing fails
+        """
+        if not filename:
+            return None
+
+        # Strip path if present
+        filename = Path(filename).name
+
+        if GUESSIT_AVAILABLE:
+            return self._parse_with_guessit(filename)
+        else:
+            return self._parse_fallback(filename)
+
+    def _parse_with_guessit(self, filename: str) -> Optional[ParsedMedia]:
+        """Parse filename using guessit library."""
+        try:
+            result = guessit.guessit(filename)
+
+            # Determine media type
+            media_type = result.get('type', 'movie')
+            if media_type == 'episode':
+                media_type = 'episode'
+            else:
+                media_type = 'movie'
+
+            # Extract title
+            title = result.get('title', '')
+            if not title:
+                return None
+
+            # Extract quality
+            quality = None
+            screen_size = result.get('screen_size')
+            if screen_size:
+                quality = self.QUALITY_MAP.get(str(screen_size).lower(), str(screen_size))
+
+            return ParsedMedia(
+                title=title,
+                media_type=media_type,
+                year=result.get('year'),
+                season=result.get('season'),
+                episode=result.get('episode'),
+                quality=quality,
+                source=result.get('source'),
+                codec=result.get('video_codec'),
+                release_group=result.get('release_group'),
+                original_filename=filename,
+            )
+        except Exception as e:
+            logger.error(f"guessit parsing failed for '{filename}': {e}")
+            return self._parse_fallback(filename)
+
+    def _parse_fallback(self, filename: str) -> Optional[ParsedMedia]:
+        """
+        Fallback parser when guessit is not available.
+        Uses regex patterns to extract common media info.
+        """
+        try:
+            # Remove extension
+            name = Path(filename).stem
+
+            # Replace common separators with spaces
+            name = re.sub(r'[._]', ' ', name)
+
+            # Try to extract TV show pattern: Show Name S01E02 or Show.Name.1x02
+            tv_pattern = r'^(.+?)[\s\.]+[Ss](\d{1,2})[Ee](\d{1,2})'
+            tv_match = re.match(tv_pattern, name)
+
+            if tv_match:
+                title = tv_match.group(1).strip()
+                season = int(tv_match.group(2))
+                episode = int(tv_match.group(3))
+
+                # Extract quality
+                quality = self._extract_quality(name)
+
+                return ParsedMedia(
+                    title=title,
+                    media_type='episode',
+                    season=season,
+                    episode=episode,
+                    quality=quality,
+                    original_filename=filename,
+                )
+
+            # Try alternative TV pattern: 1x02 format
+            alt_tv_pattern = r'^(.+?)[\s\.]+(\d{1,2})x(\d{1,2})'
+            alt_match = re.match(alt_tv_pattern, name)
+
+            if alt_match:
+                title = alt_match.group(1).strip()
+                season = int(alt_match.group(2))
+                episode = int(alt_match.group(3))
+
+                quality = self._extract_quality(name)
+
+                return ParsedMedia(
+                    title=title,
+                    media_type='episode',
+                    season=season,
+                    episode=episode,
+                    quality=quality,
+                    original_filename=filename,
+                )
+
+            # Assume movie - extract title and year
+            # Pattern: Movie Title (2023) or Movie.Title.2023
+            movie_pattern = r'^(.+?)[\s\.]+\(?(\d{4})\)?'
+            movie_match = re.match(movie_pattern, name)
+
+            if movie_match:
+                title = movie_match.group(1).strip()
+                year = int(movie_match.group(2))
+            else:
+                # Just use the name as title
+                title = name.split()[0] if name.split() else name
+                year = None
+
+            quality = self._extract_quality(name)
+
+            return ParsedMedia(
+                title=title,
+                media_type='movie',
+                year=year,
+                quality=quality,
+                original_filename=filename,
+            )
+
+        except Exception as e:
+            logger.error(f"Fallback parsing failed for '{filename}': {e}")
+            return None
+
+    def _extract_quality(self, text: str) -> Optional[str]:
+        """Extract quality from text."""
+        text_lower = text.lower()
+        for pattern, quality in self.QUALITY_MAP.items():
+            if pattern in text_lower:
+                return quality
+        return None
+
+    def match_tmdb(self, parsed: ParsedMedia) -> Optional[TMDBMatch]:
+        """
+        Match parsed media against TMDB.
+
+        Args:
+            parsed: ParsedMedia object from parse_filename
+
+        Returns:
+            TMDBMatch object if found, None otherwise
+        """
+        if not parsed:
+            return None
+
+        try:
+            if parsed.media_type == 'episode':
+                return self._match_tv_show(parsed)
+            else:
+                return self._match_movie(parsed)
+        except Exception as e:
+            logger.error(f"TMDB matching failed for '{parsed.title}': {e}")
+            return None
+
+    def _match_tv_show(self, parsed: ParsedMedia) -> Optional[TMDBMatch]:
+        """Match a TV show episode against TMDB."""
+        try:
+            # Search for the TV show
+            search_url = f"{self.TMDB_BASE_URL}/search/tv"
+            params = {
+                'api_key': self.api_key,
+                'query': parsed.title,
+                'page': 1,
+            }
+            if parsed.year:
+                params['first_air_date_year'] = parsed.year
+
+            response = self.session.get(search_url, params=params, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+
+            results = data.get('results', [])
+            if not results:
+                logger.debug(f"No TMDB results for TV show: {parsed.title}")
+                return None
+
+            # Use the first (best) result
+            show = results[0]
+            show_id = show['id']
+
+            # Get episode details if we have season/episode
+            episode_title = None
+            if parsed.season and parsed.episode:
+                episode_url = f"{self.TMDB_BASE_URL}/tv/{show_id}/season/{parsed.season}/episode/{parsed.episode}"
+                ep_params = {'api_key': self.api_key}
+                try:
+                    ep_response = self.session.get(episode_url, params=ep_params, timeout=30)
+                    if ep_response.status_code == 200:
+                        ep_data = ep_response.json()
+                        episode_title = ep_data.get('name')
+                except Exception:
+                    pass
+
+            # Parse year from first_air_date
+            year = None
+            first_air_date = show.get('first_air_date', '')
+            if first_air_date and len(first_air_date) >= 4:
+                try:
+                    year = int(first_air_date[:4])
+                except ValueError:
+                    pass
+
+            return TMDBMatch(
+                tmdb_id=show_id,
+                title=show.get('name', parsed.title),
+                original_title=show.get('original_name'),
+                media_type='tv',
+                year=year,
+                poster_path=show.get('poster_path'),
+                overview=show.get('overview'),
+                season_number=parsed.season,
+                episode_number=parsed.episode,
+                episode_title=episode_title,
+            )
+
+        except Exception as e:
+            logger.error(f"TMDB TV show matching failed: {e}")
+            return None
+
+    def _match_movie(self, parsed: ParsedMedia) -> Optional[TMDBMatch]:
+        """Match a movie against TMDB."""
+        try:
+            # Search for the movie
+            search_url = f"{self.TMDB_BASE_URL}/search/movie"
+            params = {
+                'api_key': self.api_key,
+                'query': parsed.title,
+                'page': 1,
+            }
+            if parsed.year:
+                params['year'] = parsed.year
+
+            response = self.session.get(search_url, params=params, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+
+            results = data.get('results', [])
+            if not results:
+                logger.debug(f"No TMDB results for movie: {parsed.title}")
+                return None
+
+            # Use the first (best) result
+            movie = results[0]
+
+            # Parse year from release_date
+            year = None
+            release_date = movie.get('release_date', '')
+            if release_date and len(release_date) >= 4:
+                try:
+                    year = int(release_date[:4])
+                except ValueError:
+                    pass
+
+            return TMDBMatch(
+                tmdb_id=movie['id'],
+                title=movie.get('title', parsed.title),
+                original_title=movie.get('original_title'),
+                media_type='movie',
+                year=year,
+                poster_path=movie.get('poster_path'),
+                overview=movie.get('overview'),
+            )
+
+        except Exception as e:
+            logger.error(f"TMDB movie matching failed: {e}")
+            return None
+
+    def get_organized_path(
+        self,
+        match: TMDBMatch,
+        base_path: str,
+        original_filename: str,
+    ) -> str:
+        """
+        Generate an organized file path for the matched media.
+
+        Args:
+            match: TMDBMatch object with TMDB metadata
+            base_path: Base directory for media storage
+            original_filename: Original filename (for extension)
+
+        Returns:
+            Full organized path for the file
+        """
+        base = Path(base_path)
+
+        # Get extension from original filename
+        ext = Path(original_filename).suffix
+
+        # Sanitize title for filesystem
+        safe_title = self._sanitize_filename(match.title)
+
+        if match.media_type == 'tv':
+            # TV: {base}/TV Shows/{Show}/Season {XX}/{Show} - S{XX}E{XX} - {Episode Title}.{ext}
+            show_dir = base / "TV Shows" / safe_title
+
+            if match.season_number is not None:
+                season_dir = show_dir / f"Season {match.season_number:02d}"
+            else:
+                season_dir = show_dir / "Season 01"
+
+            # Build filename
+            if match.season_number is not None and match.episode_number is not None:
+                ep_part = f"S{match.season_number:02d}E{match.episode_number:02d}"
+            else:
+                ep_part = "S01E01"
+
+            if match.episode_title:
+                safe_ep_title = self._sanitize_filename(match.episode_title)
+                filename = f"{safe_title} - {ep_part} - {safe_ep_title}{ext}"
+            else:
+                filename = f"{safe_title} - {ep_part}{ext}"
+
+            return str(season_dir / filename)
+
+        else:
+            # Movie: {base}/Movies/{Title} ({Year})/{Title} ({Year}).{ext}
+            if match.year:
+                movie_folder = f"{safe_title} ({match.year})"
+            else:
+                movie_folder = safe_title
+
+            movie_dir = base / "Movies" / movie_folder
+            filename = f"{movie_folder}{ext}"
+
+            return str(movie_dir / filename)
+
+    def _sanitize_filename(self, name: str) -> str:
+        """
+        Sanitize a string for use as a filename.
+
+        Removes/replaces characters that are invalid in filenames.
+        """
+        if not name:
+            return "Unknown"
+
+        # Replace problematic characters
+        name = re.sub(r'[<>:"/\\|?*]', '', name)
+        name = re.sub(r'\s+', ' ', name)
+        name = name.strip()
+
+        # Limit length
+        if len(name) > 100:
+            name = name[:100].strip()
+
+        return name if name else "Unknown"
+
+    def identify_and_match(
+        self,
+        filename: str,
+        base_path: str = "/media",
+    ) -> Dict[str, Any]:
+        """
+        Convenience method to parse, match, and get organized path in one call.
+
+        Args:
+            filename: The media filename to process
+            base_path: Base directory for organized media
+
+        Returns:
+            Dict with parsed info, TMDB match, and organized path
+        """
+        result = {
+            'success': False,
+            'filename': filename,
+            'parsed': None,
+            'match': None,
+            'organized_path': None,
+            'error': None,
+        }
+
+        try:
+            # Parse filename
+            parsed = self.parse_filename(filename)
+            if not parsed:
+                result['error'] = 'Failed to parse filename'
+                return result
+
+            result['parsed'] = parsed.to_dict()
+
+            # Match against TMDB
+            match = self.match_tmdb(parsed)
+            if match:
+                result['match'] = match.to_dict()
+
+                # Get organized path
+                organized_path = self.get_organized_path(match, base_path, filename)
+                result['organized_path'] = organized_path
+                result['success'] = True
+            else:
+                result['error'] = 'No TMDB match found'
+
+            return result
+
+        except Exception as e:
+            result['error'] = str(e)
+            logger.error(f"identify_and_match failed for '{filename}': {e}")
+            return result
--- a/modules/monitor_wrapper.py
+++ b/modules/monitor_wrapper.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+Helper wrapper to integrate monitoring with downloaders
+"""
+
+from functools import wraps
+from modules.downloader_monitor import get_monitor
+
+
+def monitor_download(downloader_name):
+    """
+    Decorator to monitor download attempts
+
+    Usage:
+        @monitor_download('fastdl')
+        def download_function(username, ...):
+            ...
+            return count
+    """
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            # Extract username from args or kwargs
+            username = kwargs.get('username') or (args[0] if args else 'unknown')
+
+            try:
+                # Call the actual download function
+                result = func(*args, **kwargs)
+
+                # Determine success based on result
+                if isinstance(result, int):
+                    count = result
+                    success = count > 0
+                elif isinstance(result, dict):
+                    count = result.get('count', 0)
+                    success = result.get('success', count > 0)
+                else:
+                    count = 0
+                    success = False
+
+                # Log to monitor
+                monitor = get_monitor()
+                monitor.log_download_attempt(
+                    downloader=downloader_name,
+                    username=username,
+                    success=success,
+                    file_count=count,
+                    error_message=None
+                )
+
+                return result
+
+            except Exception as e:
+                # Log failure
+                monitor = get_monitor()
+                monitor.log_download_attempt(
+                    downloader=downloader_name,
+                    username=username,
+                    success=False,
+                    file_count=0,
+                    error_message=str(e)
+                )
+                raise
+
+        return wrapper
+    return decorator
+
+
+def log_download_result(downloader: str, username: str, count: int, error: str = None):
+    """
+    Simple function to log download result to monitor
+
+    Args:
+        downloader: Downloader name (fastdl, imginn, etc.)
+        username: Username
+        count: Number of files downloaded
+        error: Error message if failed
+    """
+    monitor = get_monitor()
+    monitor.log_download_attempt(
+        downloader=downloader,
+        username=username,
+        success=(error is None),
+        file_count=count,
+        error_message=error
+    )
--- a/modules/move_module.py
+++ b/modules/move_module.py
--- a/modules/paid_content/init.py
+++ b/modules/paid_content/init.py
@@ -0,0 +1,36 @@
+"""
+Paid Content Module
+
+Downloads and organizes content from subscription-based creator platforms
+(OnlyFans, Fansly, Patreon, Fanbox, etc.) via the Coomer.party and Kemono.party archival APIs.
+Also supports YouTube channels and Twitch clips via yt-dlp.
+"""
+
+from .scraper import PaidContentScraper
+from .api_client import PaidContentAPIClient
+from .db_adapter import PaidContentDBAdapter
+from .file_host_downloader import FileHostDownloader
+from .embed_downloader import EmbedDownloader
+from .youtube_client import YouTubeClient
+from .twitch_client import TwitchClient, TwitchThumbnailCache
+from .fansly_direct_client import FanslyDirectClient
+from .onlyfans_client import OnlyFansClient
+from .xhamster_client import XHamsterClient
+from .tiktok_client import TikTokClient
+from .instagram_adapter import InstagramAdapter
+
+__all__ = [
+    'PaidContentScraper',
+    'PaidContentAPIClient',
+    'PaidContentDBAdapter',
+    'FileHostDownloader',
+    'EmbedDownloader',
+    'YouTubeClient',
+    'TwitchClient',
+    'TwitchThumbnailCache',
+    'FanslyDirectClient',
+    'OnlyFansClient',
+    'XHamsterClient',
+    'TikTokClient',
+    'InstagramAdapter',
+]
--- a/modules/paid_content/api_client.py
+++ b/modules/paid_content/api_client.py
@@ -0,0 +1,311 @@
+"""
+Unified API client for Coomer.party and Kemono.party
+Both services share the same API structure (Kemono fork)
+"""
+
+import aiohttp
+import asyncio
+from typing import List, Optional, Dict, Any
+
+from modules.base_module import LoggingMixin, RateLimitMixin
+from .models import Creator, Post, Attachment
+
+
+class PaidContentAPIClient(LoggingMixin, RateLimitMixin):
+    """
+    API client for Coomer and Kemono archival services
+
+    API Endpoints:
+    - GET /creators - List all creators
+    - GET /{service}/user/{creator_id} - Get creator info
+    - GET /{service}/user/{creator_id} - Get creator's posts (paginated with ?o=offset)
+    - GET /{service}/user/{creator_id}/post/{post_id} - Get single post
+    """
+
+    # Fallback URLs if database doesn't have them configured
+    DEFAULT_SERVICE_URLS = {
+        'coomer': 'https://coomer.party',
+        'kemono': 'https://kemono.party'
+    }
+
+    SUPPORTED_PLATFORMS = {
+        'coomer': ['onlyfans', 'fansly', 'candfans'],
+        'kemono': ['patreon', 'fanbox', 'gumroad', 'subscribestar', 'discord']
+    }
+
+    def __init__(self, service_id: str, session_cookie: str = None, base_url: str = None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='API')
+        self._init_rate_limiter(min_delay=0.5, max_delay=2.0, batch_delay_min=1, batch_delay_max=3)
+
+        self.service_id = service_id
+
+        # Use provided base_url, or fall back to defaults
+        if base_url:
+            # If base_url includes /api/v1, extract just the base
+            if '/api/v1' in base_url:
+                self.base_url = base_url.replace('/api/v1', '').rstrip('/')
+            else:
+                self.base_url = base_url.rstrip('/')
+        else:
+            self.base_url = self.DEFAULT_SERVICE_URLS.get(service_id)
+
+        self.api_url = f"{self.base_url}/api/v1"
+        self.session_cookie = session_cookie
+        self._session: Optional[aiohttp.ClientSession] = None
+
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create aiohttp session"""
+        if self._session is None or self._session.closed:
+            # Note: Coomer/Kemono require 'Accept: text/css' header as anti-scraping measure
+            # Despite this, they still return JSON responses
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                'Accept': 'text/css',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Referer': self.base_url
+            }
+            cookies = {}
+            if self.session_cookie:
+                cookies['session'] = self.session_cookie
+
+            timeout = aiohttp.ClientTimeout(total=30)
+            self._session = aiohttp.ClientSession(headers=headers, cookies=cookies, timeout=timeout)
+        return self._session
+
+    async def close(self):
+        """Close the aiohttp session"""
+        if self._session and not self._session.closed:
+            await self._session.close()
+            self._session = None
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+
+    async def check_health(self) -> Dict[str, Any]:
+        """Check API health status"""
+        import time
+        try:
+            session = await self._get_session()
+            start = time.time()
+            async with session.get(f"{self.api_url}/creators", timeout=aiohttp.ClientTimeout(total=10)) as resp:
+                elapsed = time.time() - start
+                if resp.status == 200:
+                    # content_type=None allows parsing JSON regardless of response content-type
+                    await resp.json(content_type=None)
+                    return {'status': 'healthy', 'response_time': round(elapsed, 3)}
+                elif resp.status == 429:
+                    return {'status': 'rate_limited', 'response_code': 429}
+                else:
+                    return {'status': 'degraded', 'response_code': resp.status}
+        except asyncio.TimeoutError:
+            return {'status': 'timeout', 'error': 'Request timed out'}
+        except Exception as e:
+            return {'status': 'down', 'error': str(e)}
+
+    async def get_all_creators(self) -> List[Dict]:
+        """Get list of all available creators (for search)"""
+        self._delay_between_items()
+        try:
+            session = await self._get_session()
+            async with session.get(f"{self.api_url}/creators") as resp:
+                if resp.status == 200:
+                    return await resp.json(content_type=None)
+                self.log(f"Failed to get creators list: HTTP {resp.status}", 'warning')
+                return []
+        except Exception as e:
+            self.log(f"Error getting creators list: {e}", 'error')
+            return []
+
+    async def get_creator(self, platform: str, creator_id: str) -> Optional[Creator]:
+        """Get creator info"""
+        self._delay_between_items()
+        try:
+            session = await self._get_session()
+
+            # First try to get creator profile
+            url = f"{self.api_url}/{platform}/user/{creator_id}/profile"
+            async with session.get(url) as resp:
+                if resp.status == 200:
+                    data = await resp.json(content_type=None)
+                    return Creator.from_api(data, self.service_id, platform, self.base_url)
+
+            # Fallback: get first post to extract creator info
+            url = f"{self.api_url}/{platform}/user/{creator_id}/posts"
+            async with session.get(url) as resp:
+                if resp.status == 200:
+                    posts = await resp.json(content_type=None)
+                    if posts and len(posts) > 0:
+                        # Extract creator info from first post
+                        first_post = posts[0]
+                        # Construct image URLs - use .st instead of .party
+                        from urllib.parse import urlparse
+                        parsed = urlparse(self.base_url)
+                        # Convert .party to .st for image URLs (coomer.party/kemono.party images are at .st)
+                        netloc = parsed.netloc.replace('.party', '.st')
+                        img_domain = f"img.{netloc}"
+                        profile_image_url = f"https://{img_domain}/icons/{platform}/{creator_id}"
+                        banner_image_url = f"https://{img_domain}/banners/{platform}/{creator_id}"
+                        return Creator(
+                            creator_id=creator_id,
+                            service_id=self.service_id,
+                            platform=platform,
+                            username=first_post.get('user', creator_id),
+                            display_name=first_post.get('user', creator_id),
+                            profile_image_url=profile_image_url,
+                            banner_image_url=banner_image_url
+                        )
+
+            self.log(f"Creator not found: {platform}/{creator_id}", 'warning')
+            return None
+
+        except Exception as e:
+            self.log(f"Error getting creator {platform}/{creator_id}: {e}", 'error')
+            return None
+
+    async def get_creator_posts(self, platform: str, creator_id: str, offset: int = 0) -> List[Post]:
+        """Get creator's posts (50 per page by default)"""
+        self._delay_between_items()
+        try:
+            session = await self._get_session()
+
+            url = f"{self.api_url}/{platform}/user/{creator_id}/posts"
+            params = {'o': offset} if offset > 0 else {}
+
+            async with session.get(url, params=params) as resp:
+                if resp.status == 200:
+                    data = await resp.json(content_type=None)
+                    return [Post.from_api(p, self.service_id, platform, creator_id, self.base_url) for p in data]
+                elif resp.status == 404:
+                    self.log(f"Creator not found: {platform}/{creator_id}", 'warning')
+                else:
+                    self.log(f"Failed to get posts: HTTP {resp.status}", 'warning')
+                return []
+
+        except Exception as e:
+            self.log(f"Error getting posts for {platform}/{creator_id}: {e}", 'error')
+            return []
+
+    async def get_all_creator_posts(self, platform: str, creator_id: str,
+                                     since_date: str = None, max_posts: int = None,
+                                     progress_callback=None) -> List[Post]:
+        """Fetch all posts with pagination"""
+        all_posts = []
+        offset = 0
+        page = 0
+
+        self.log(f"Fetching posts for {platform}/{creator_id}", 'info')
+
+        while True:
+            posts = await self.get_creator_posts(platform, creator_id, offset)
+            if not posts:
+                break
+
+            for post in posts:
+                # Stop if we've reached posts we've already seen
+                if since_date and post.published_at and post.published_at <= since_date:
+                    self.log(f"Reached already-seen post date: {post.published_at}", 'debug')
+                    return all_posts
+
+                all_posts.append(post)
+
+                if max_posts and len(all_posts) >= max_posts:
+                    self.log(f"Reached max posts limit: {max_posts}", 'debug')
+                    return all_posts
+
+            page += 1
+            offset += 50
+
+            if progress_callback:
+                progress_callback(page, len(all_posts))
+
+            self._delay_between_batches()
+
+        self.log(f"Fetched {len(all_posts)} posts for {platform}/{creator_id}", 'info')
+        return all_posts
+
+    async def get_post(self, platform: str, creator_id: str, post_id: str) -> Optional[Post]:
+        """Get single post by ID"""
+        self._delay_between_items()
+        try:
+            session = await self._get_session()
+
+            url = f"{self.api_url}/{platform}/user/{creator_id}/post/{post_id}"
+            async with session.get(url) as resp:
+                if resp.status == 200:
+                    data = await resp.json(content_type=None)
+                    # Single post endpoint wraps response in {"post": {...}}
+                    if isinstance(data, dict) and 'post' in data:
+                        data = data['post']
+                    return Post.from_api(data, self.service_id, platform, creator_id, self.base_url)
+                return None
+
+        except Exception as e:
+            self.log(f"Error getting post {post_id}: {e}", 'error')
+            return None
+
+    async def search_creators(self, query: str, platform: str = None) -> List[Dict]:
+        """Search for creators by name"""
+        self._delay_between_items()
+        try:
+            # Get all creators and filter locally (API doesn't have search endpoint)
+            all_creators = await self.get_all_creators()
+
+            query_lower = query.lower()
+            results = []
+
+            for creator in all_creators:
+                if platform and creator.get('service') != platform:
+                    continue
+
+                name = (creator.get('name') or '').lower()
+                if query_lower in name:
+                    results.append({
+                        'id': creator.get('id'),
+                        'name': creator.get('name'),
+                        'service': creator.get('service'),
+                        'indexed': creator.get('indexed'),
+                        'updated': creator.get('updated'),
+                        'favorited': creator.get('favorited', 0)
+                    })
+
+            # Sort by favorited count (popularity)
+            results.sort(key=lambda x: x.get('favorited', 0), reverse=True)
+            return results[:50]  # Limit results
+
+        except Exception as e:
+            self.log(f"Error searching creators: {e}", 'error')
+            return []
+
+    def get_attachment_url(self, server_path: str) -> str:
+        """Convert server path to full download URL"""
+        if not server_path:
+            return ''
+        if server_path.startswith('http'):
+            return server_path
+        return f"{self.base_url}/data{server_path}"
+
+    def get_thumbnail_url(self, server_path: str) -> str:
+        """Get thumbnail URL for an attachment"""
+        if not server_path:
+            return ''
+        if server_path.startswith('http'):
+            return server_path
+        return f"{self.base_url}/thumbnail/data{server_path}"
+
+    @classmethod
+    def get_supported_platforms(cls, service_id: str) -> List[str]:
+        """Get list of supported platforms for a service"""
+        return cls.SUPPORTED_PLATFORMS.get(service_id, [])
+
+    @classmethod
+    def is_valid_service(cls, service_id: str) -> bool:
+        """Check if service ID is valid"""
+        return service_id in cls.SERVICE_URLS
+
+    @classmethod
+    def get_service_ids(cls) -> List[str]:
+        """Get list of all service IDs"""
+        return list(cls.SERVICE_URLS.keys())
--- a/modules/paid_content/bellazon_client.py
+++ b/modules/paid_content/bellazon_client.py
@@ -0,0 +1,389 @@
+"""
+Bellazon Forum Thread Client for Paid Content
+
+Scrapes Bellazon forum threads (Invision Power Suite) treating each thread
+as a "creator" and each reply with media as a post.
+
+Only bellazon-hosted uploads are captured (external image host links are
+unreliable/ephemeral). Video attachments (attachment.php) are also captured.
+"""
+
+import asyncio
+import html
+import json
+import re
+from datetime import datetime, timezone
+from typing import Dict, List, Optional, Set
+from urllib.parse import urlparse
+
+import aiohttp
+
+from modules.base_module import LoggingMixin
+from .models import Post, Attachment
+
+
+class BellazonClient(LoggingMixin):
+    """Client for scraping Bellazon forum threads."""
+
+    SERVICE_ID = 'bellazon'
+    PLATFORM = 'bellazon'
+    BASE_URL = 'https://www.bellazon.com/main'
+
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                       '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.9',
+    }
+
+    # Extensions considered images
+    IMAGE_EXTS = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff'}
+    # Extensions considered videos
+    VIDEO_EXTS = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}
+
+    def __init__(self, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Bellazon')
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    async def get_profile_info(self, topic_id: str) -> Optional[Dict]:
+        """Fetch first page of a thread and return profile-like info.
+
+        Returns dict with: username (slug), display_name, post_count, topic_url
+        """
+        # Bellazon requires a slug in the URL but redirects to the correct one
+        url = f'{self.BASE_URL}/topic/{topic_id}-x/'
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.get(url, headers=self.HEADERS, allow_redirects=True) as resp:
+                    if resp.status != 200:
+                        self.log(f"Bellazon topic {topic_id} returned HTTP {resp.status}", 'warning')
+                        return None
+                    final_url = str(resp.url)
+                    page_html = await resp.text()
+        except Exception as e:
+            self.log(f"Failed to fetch Bellazon topic {topic_id}: {e}", 'error')
+            return None
+
+        # Extract slug from final URL: /topic/{id}-{slug}/
+        slug = self._extract_slug(final_url, topic_id)
+
+        # Extract thread title from <h1>
+        title = self._extract_title(page_html)
+
+        # Extract page count from "Page X of Y"
+        page_count = self._extract_page_count(page_html)
+
+        # Count comments on this page to estimate total
+        comment_ids = re.findall(r'data-commentid="(\d+)"', page_html)
+        per_page = len(comment_ids) or 20
+        estimated_comments = per_page * page_count
+
+        return {
+            'username': slug,
+            'display_name': title or slug,
+            'post_count': estimated_comments,
+            'page_count': page_count,
+            'topic_url': final_url.split('?')[0].rstrip('/'),
+        }
+
+    async def get_posts(self, topic_id: str, topic_url: str,
+                        known_post_ids: Optional[Set[str]] = None,
+                        progress_callback=None) -> List[Post]:
+        """Scrape all pages of a thread and return posts with media."""
+        known = known_post_ids or set()
+        posts: List[Post] = []
+
+        # Fetch page 1 to get page count
+        page1_url = f'{topic_url}/page/1/'
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                page_html = await self._fetch_page(session, page1_url)
+                if page_html is None:
+                    return posts
+
+                page_count = self._extract_page_count(page_html)
+                self.log(f"Thread has {page_count} pages", 'info')
+
+                # Parse page 1
+                page_posts = self._parse_page(page_html, topic_id, known)
+                posts.extend(page_posts)
+
+                if progress_callback:
+                    progress_callback(len(posts))
+
+                # Parse remaining pages
+                for page_num in range(2, page_count + 1):
+                    page_url = f'{topic_url}/page/{page_num}/'
+                    await asyncio.sleep(1)  # Rate limit
+
+                    page_html = await self._fetch_page(session, page_url)
+                    if page_html is None:
+                        self.log(f"Failed to fetch page {page_num}, stopping", 'warning')
+                        break
+
+                    page_posts = self._parse_page(page_html, topic_id, known)
+                    posts.extend(page_posts)
+
+                    if progress_callback:
+                        progress_callback(len(posts))
+
+                    self.log(f"Page {page_num}/{page_count}: {len(page_posts)} posts with media", 'debug')
+
+        except Exception as e:
+            self.log(f"Error scraping Bellazon thread: {e}", 'error')
+
+        self.log(f"Total: {len(posts)} posts with media from {page_count} pages", 'info')
+        return posts
+
+    # ------------------------------------------------------------------
+    # HTML parsing helpers
+    # ------------------------------------------------------------------
+
+    def _parse_page(self, page_html: str, topic_id: str, known: Set[str]) -> List[Post]:
+        """Parse a single page of HTML and return Post objects for comments with media."""
+        posts: List[Post] = []
+
+        # Split HTML into comment blocks using data-commentid markers
+        # Each comment starts with data-commentid="..." and contains a content block
+        comment_pattern = re.compile(
+            r'data-commentid="(\d+)"\s+data-quotedata="([^"]*)"',
+            re.DOTALL
+        )
+
+        matches = list(comment_pattern.finditer(page_html))
+        if not matches:
+            return posts
+
+        for i, match in enumerate(matches):
+            comment_id = match.group(1)
+            post_id = f"comment_{comment_id}"
+
+            if post_id in known:
+                continue
+
+            quotedata_raw = match.group(2)
+
+            # Parse quote data for username and timestamp
+            username, timestamp = self._parse_quotedata(quotedata_raw)
+
+            # Extract the content block for this comment
+            start = match.end()
+            end = matches[i + 1].start() if i + 1 < len(matches) else len(page_html)
+            content_block = page_html[start:end]
+
+            # Find the actual content within data-role="commentContent"
+            # The closing pattern is </div> followed by blank lines then </div>
+            content_match = re.search(
+                r'data-role="commentContent"[^>]*>(.*?)</div>\s*\n\s*\n\s*</div>',
+                content_block, re.DOTALL
+            )
+            if not content_match:
+                # Fallback: grab everything from commentContent to ipsEntry__foot
+                content_match = re.search(
+                    r'data-role="commentContent"[^>]*>(.*?)(?=ipsEntry__foot)',
+                    content_block, re.DOTALL
+                )
+            if not content_match:
+                continue
+
+            content_html = content_match.group(1)
+
+            # Extract media from content
+            attachments = self._extract_media(content_html)
+
+            if not attachments:
+                continue  # Skip text-only replies
+
+            # Build published_at from timestamp
+            published_at = None
+            if timestamp:
+                try:
+                    dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                    published_at = dt.isoformat()
+                except (ValueError, OSError):
+                    pass
+
+            post = Post(
+                post_id=post_id,
+                service_id=self.SERVICE_ID,
+                platform=self.PLATFORM,
+                creator_id=topic_id,
+                title='',
+                content=f"Posted by {username}" if username else '',
+                published_at=published_at,
+                attachments=attachments,
+            )
+            posts.append(post)
+            known.add(post_id)
+
+        return posts
+
+    def _extract_media(self, content_html: str) -> List[Attachment]:
+        """Extract image and video attachments from a comment's HTML content."""
+        attachments: List[Attachment] = []
+        seen_urls: set = set()
+
+        # 1. Bellazon-hosted images: <a class="ipsAttachLink ipsAttachLink_image" href="...full..."><img src="...thumb...">
+        for m in re.finditer(
+            r'ipsAttachLink_image"\s+href="([^"]+)"[^>]*><img[^>]*src="([^"]+)"',
+            content_html
+        ):
+            full_url = self._normalize_url(m.group(1))
+            if full_url in seen_urls:
+                continue
+            # Skip thumbnails as the full URL
+            if '_thumb.' in full_url or '.thumb.' in full_url:
+                continue
+            seen_urls.add(full_url)
+            attachments.append(self._make_attachment(full_url, 'image'))
+
+        # 2. Direct image/video links from bellazon uploads not caught by pattern 1
+        for m in re.finditer(
+            r'href="([^"]*bellazon\.com/main/uploads/[^"]+)"',
+            content_html
+        ):
+            url = self._normalize_url(m.group(1))
+            if url in seen_urls:
+                continue
+            if '_thumb.' in url or '.thumb.' in url:
+                continue
+            ext = self._get_extension(url)
+            if ext in self.IMAGE_EXTS or ext in self.VIDEO_EXTS:
+                seen_urls.add(url)
+                file_type = 'image' if ext in self.IMAGE_EXTS else 'video'
+                attachments.append(self._make_attachment(url, file_type))
+
+        # 3. Video <source> tags: <source src="//www.bellazon.com/main/uploads/...MP4" type="video/mp4">
+        for m in re.finditer(
+            r'<source\s+src="([^"]+)"[^>]*type="video/',
+            content_html
+        ):
+            url = self._normalize_url(m.group(1))
+            if url in seen_urls:
+                continue
+            seen_urls.add(url)
+            name = self._filename_from_url(url)
+            attachments.append(self._make_attachment(url, 'video', name=name))
+
+        # 4. Video/file attachments: <a href="...attachment.php?id=XXX">filename.MP4</a>
+        # These are protocol-relative URLs like //www.bellazon.com/main/applications/...
+        for m in re.finditer(
+            r'href="([^"]*attachment\.php\?id=\d+[^"]*)"[^>]*>([^<]+)',
+            content_html
+        ):
+            att_url = self._normalize_url(m.group(1))
+            filename = m.group(2).strip()
+            if att_url in seen_urls:
+                continue
+            ext = self._get_extension(filename)
+            if ext in self.VIDEO_EXTS or ext in self.IMAGE_EXTS:
+                seen_urls.add(att_url)
+                file_type = 'video' if ext in self.VIDEO_EXTS else 'image'
+                attachments.append(self._make_attachment(att_url, file_type, name=filename))
+
+        return attachments
+
+    def _make_attachment(self, url: str, file_type: str, name: str = None) -> Attachment:
+        """Create an Attachment from a URL."""
+        if name is None:
+            name = self._filename_from_url(url)
+        ext = self._get_extension(name)
+
+        return Attachment(
+            name=name,
+            file_type=file_type,
+            extension=ext if ext else None,
+            server_path=url,  # Used as dedup key
+            download_url=url,
+        )
+
+    # ------------------------------------------------------------------
+    # Utility helpers
+    # ------------------------------------------------------------------
+
+    async def _fetch_page(self, session: aiohttp.ClientSession, url: str) -> Optional[str]:
+        """Fetch a single page, return HTML or None."""
+        try:
+            async with session.get(url, headers=self.HEADERS, allow_redirects=True) as resp:
+                if resp.status != 200:
+                    self.log(f"HTTP {resp.status} for {url}", 'warning')
+                    return None
+                return await resp.text()
+        except Exception as e:
+            self.log(f"Error fetching {url}: {e}", 'warning')
+            return None
+
+    @staticmethod
+    def _extract_slug(url: str, topic_id: str) -> str:
+        """Extract slug from URL like /topic/39089-india-reynolds/"""
+        m = re.search(rf'/topic/{re.escape(topic_id)}-([^/?#]+)', url)
+        if m:
+            return m.group(1).strip('/')
+        return topic_id
+
+    @staticmethod
+    def _extract_title(page_html: str) -> Optional[str]:
+        """Extract thread title from <h1>."""
+        m = re.search(r'<h1[^>]*>([^<]+)</h1>', page_html)
+        if m:
+            return html.unescape(m.group(1).strip())
+        m = re.search(r'<title>([^<]+)</title>', page_html, re.IGNORECASE)
+        if m:
+            title = html.unescape(m.group(1).strip())
+            # Remove site suffix
+            title = re.sub(r'\s*[-–—]\s*Bellazon.*$', '', title, flags=re.IGNORECASE).strip()
+            return title
+        return None
+
+    @staticmethod
+    def _extract_page_count(page_html: str) -> int:
+        """Extract total page count from 'Page X of Y'."""
+        m = re.search(r'Page\s+\d+\s+of\s+(\d+)', page_html)
+        if m:
+            return int(m.group(1))
+        return 1
+
+    @staticmethod
+    def _parse_quotedata(raw: str) -> tuple:
+        """Parse HTML-encoded JSON quotedata, return (username, unix_timestamp)."""
+        try:
+            decoded = html.unescape(raw)
+            data = json.loads(decoded)
+            return data.get('username', ''), data.get('timestamp')
+        except (json.JSONDecodeError, ValueError):
+            return '', None
+
+    @staticmethod
+    def _normalize_url(url: str) -> str:
+        """Normalize a URL: handle protocol-relative, decode HTML entities, make absolute."""
+        url = html.unescape(url)  # &amp; → &
+        if url.startswith('//'):
+            url = 'https:' + url
+        elif url.startswith('/'):
+            url = 'https://www.bellazon.com' + url
+        elif not url.startswith('http'):
+            url = 'https://www.bellazon.com/main/' + url
+        return url
+
+    @staticmethod
+    def _get_extension(filename_or_url: str) -> str:
+        """Get lowercase file extension from a filename or URL."""
+        # Strip query params
+        clean = filename_or_url.split('?')[0].split('#')[0]
+        if '.' in clean.split('/')[-1]:
+            return clean.rsplit('.', 1)[-1].lower()
+        return ''
+
+    @staticmethod
+    def _filename_from_url(url: str) -> str:
+        """Extract filename from URL path."""
+        path = urlparse(url).path
+        name = path.rstrip('/').split('/')[-1]
+        return name if name else 'unnamed'
--- a/modules/paid_content/besteyecandy_client.py
+++ b/modules/paid_content/besteyecandy_client.py
@@ -0,0 +1,468 @@
+"""
+BestEyeCandy.com Client for Paid Content
+
+Scrapes celebrity photo galleries from BestEyeCandy.com.
+Each celeb has a unique CID and paginated photo listings.
+
+Optimization: Full-res URLs follow a predictable pattern. We visit ONE
+detail page to determine the pattern (server hostname + name format),
+then construct all remaining URLs from photo IDs found on listing pages.
+"""
+
+import asyncio
+import html
+import json
+import re
+from datetime import datetime, timezone
+from typing import Dict, List, Optional, Set
+from urllib.parse import urlparse
+
+import aiohttp
+
+from modules.base_module import LoggingMixin
+from .models import Post, Attachment
+
+
+class BestEyeCandyClient(LoggingMixin):
+    """Client for scraping BestEyeCandy.com celebrity photo galleries."""
+
+    SERVICE_ID = 'besteyecandy'
+    PLATFORM = 'besteyecandy'
+    BASE_URL = 'https://besteyecandy.com'
+
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                       '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.9',
+    }
+
+    def __init__(self, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='BestEyeCandy')
+        self.unified_db = unified_db
+
+    # ------------------------------------------------------------------
+    # Cookie support
+    # ------------------------------------------------------------------
+
+    def _get_cookies(self) -> Optional[list]:
+        """Load cookies from the scrapers table for besteyecandy."""
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?",
+                               (self.SERVICE_ID,))
+                row = cursor.fetchone()
+                if row and row[0]:
+                    data = json.loads(row[0])
+                    if isinstance(data, dict) and 'cookies' in data:
+                        return data['cookies']
+                    elif isinstance(data, list):
+                        return data
+        except Exception as e:
+            self.log(f"Could not load cookies: {e}", 'debug')
+
+        return None
+
+    def _build_cookie_jar(self, cookies_list: list) -> aiohttp.CookieJar:
+        """Build an aiohttp CookieJar from a list of cookie dicts."""
+        jar = aiohttp.CookieJar(unsafe=True)
+        for cookie in cookies_list:
+            from http.cookies import Morsel
+            import types
+
+            name = cookie.get('name', '')
+            value = cookie.get('value', '')
+            domain = cookie.get('domain', '')
+            path = cookie.get('path', '/')
+
+            # Use SimpleCookie approach
+            from http.cookies import SimpleCookie
+            sc = SimpleCookie()
+            sc[name] = value
+            sc[name]['domain'] = domain
+            sc[name]['path'] = path
+            if cookie.get('secure'):
+                sc[name]['secure'] = True
+
+            jar.update_cookies(sc, urlparse(f"https://{domain.lstrip('.')}"))
+
+        return jar
+
+    def _create_session(self, timeout: aiohttp.ClientTimeout = None) -> aiohttp.ClientSession:
+        """Create an aiohttp session with cookies loaded from DB."""
+        if timeout is None:
+            timeout = aiohttp.ClientTimeout(total=60)
+
+        cookies_list = self._get_cookies()
+        if cookies_list:
+            jar = self._build_cookie_jar(cookies_list)
+            self.log(f"Loaded {len(cookies_list)} cookies for session", 'debug')
+            return aiohttp.ClientSession(timeout=timeout, cookie_jar=jar)
+        else:
+            self.log("No cookies found for besteyecandy, requests may fail", 'warning')
+            return aiohttp.ClientSession(timeout=timeout)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    async def get_profile_info(self, cid: str, celeb_slug: str) -> Optional[Dict]:
+        """Fetch page 1 of a celeb's listing and return profile-like info."""
+        url = (f'{self.BASE_URL}/section/celeb-photogallery/cid-{cid}/'
+               f'sortedby-age/page-1/{celeb_slug}.html')
+
+        try:
+            async with self._create_session() as session:
+                async with session.get(url, headers=self.HEADERS,
+                                       allow_redirects=True) as resp:
+                    if resp.status != 200:
+                        self.log(f"BestEyeCandy cid {cid} returned HTTP {resp.status}",
+                                 'warning')
+                        return None
+                    page_html = await resp.text()
+        except Exception as e:
+            self.log(f"Failed to fetch BestEyeCandy cid {cid}: {e}", 'error')
+            return None
+
+        # Extract celeb name from page title or heading
+        celeb_name = self._extract_celeb_name(page_html) or celeb_slug.replace('-', ' ')
+
+        # Extract total photos and pages
+        total_photos = self._extract_total_photos(page_html)
+        photos_per_page = len(self._extract_photo_ids(page_html)) or 48
+        page_count = self._extract_page_count(page_html,
+                                               photos_per_page=photos_per_page)
+
+        celeb_url = (f'{self.BASE_URL}/section/celeb-photogallery/cid-{cid}/'
+                     f'sortedby-age/page-1/{celeb_slug}.html')
+
+        return {
+            'username': celeb_slug,
+            'display_name': celeb_name,
+            'post_count': total_photos,
+            'page_count': page_count,
+            'celeb_url': celeb_url,
+        }
+
+    async def get_posts(self, cid: str, celeb_slug: str,
+                        known_post_ids: Optional[Set[str]] = None,
+                        progress_callback=None) -> List[Post]:
+        """Scrape all listing pages and return posts with full-res image URLs.
+
+        Each listing page becomes one Post with ~48 Attachments (one per photo).
+        Post IDs are "page_N" (e.g. "page_1", "page_2", ...).
+
+        Phase 1: Fetch page 1, get first photo ID, visit detail page to learn
+                 the full-res URL pattern.
+        Phase 2: Paginate all listing pages, build one Post per page.
+        """
+        known = known_post_ids or set()
+        posts: List[Post] = []
+        total_photos = 0
+        url_pattern = None
+
+        try:
+            async with self._create_session() as session:
+                # -- Phase 1: Fetch page 1 and determine full-res URL pattern --
+                page1_url = (f'{self.BASE_URL}/section/celeb-photogallery/cid-{cid}/'
+                             f'sortedby-age/page-1/{celeb_slug}.html')
+
+                page_html = await self._fetch_page(session, page1_url)
+                if page_html is None:
+                    return []
+
+                # Estimate page count for progress display
+                photos_per_page = len(self._extract_photo_ids(page_html)) or 48
+                estimated_pages = self._extract_page_count(
+                    page_html, photos_per_page=photos_per_page)
+                self.log(f"Estimated {estimated_pages} pages of photos "
+                         f"({photos_per_page}/page)", 'info')
+
+                # Discover full-res URL pattern from first photo
+                first_page_ids = self._extract_photo_ids(page_html)
+                if first_page_ids:
+                    url_pattern = await self._discover_url_pattern(
+                        session, first_page_ids[0], cid, celeb_slug)
+
+                if not url_pattern:
+                    self.log("Could not determine full-res URL pattern", 'error')
+                    return []
+
+                self.log(f"URL pattern: server={url_pattern['server']}, "
+                         f"name_format={url_pattern['name_format']}, "
+                         f"ext={url_pattern['ext']}", 'info')
+
+                # -- Phase 2: Paginate all pages, one Post per page --
+                page_num = 0
+                has_next = True  # start with page 1
+
+                while has_next:
+                    page_num += 1
+
+                    if page_num == 1:
+                        # Already fetched page 1
+                        pass
+                    else:
+                        await asyncio.sleep(2)  # Rate limit
+
+                        page_url = (
+                            f'{self.BASE_URL}/section/celeb-photogallery/cid-{cid}/'
+                            f'sortedby-age/page-{page_num}/{celeb_slug}.html')
+
+                        page_html = await self._fetch_page(session, page_url)
+                        if page_html is None:
+                            self.log(f"Failed to fetch page {page_num}, stopping",
+                                     'warning')
+                            break
+
+                    page_ids = self._extract_photo_ids(page_html)
+                    if not page_ids:
+                        self.log(f"Page {page_num}: no photos, stopping", 'info')
+                        break
+
+                    total_photos += len(page_ids)
+                    has_next = self._has_next_page(page_html)
+
+                    # Check if this page-post is already known
+                    post_id = f"page_{page_num}"
+                    if post_id in known:
+                        self.log(f"Page {page_num}: already known, skipping",
+                                 'debug')
+                        if progress_callback:
+                            progress_callback(
+                                f"Page {page_num}/~{estimated_pages} — "
+                                f"{total_photos} photos (skipped known)")
+                        continue
+
+                    # Build attachments for all photos on this page
+                    attachments = []
+                    for photo_id in page_ids:
+                        dl_url = self._construct_full_res_url(url_pattern, photo_id)
+                        filename = dl_url.rsplit('/', 1)[-1]
+
+                        attachments.append(Attachment(
+                            name=filename,
+                            file_type='image',
+                            extension=url_pattern.get('ext', 'jpg'),
+                            server_path=dl_url,
+                            download_url=dl_url,
+                        ))
+
+                    post = Post(
+                        post_id=post_id,
+                        service_id=self.SERVICE_ID,
+                        platform=self.PLATFORM,
+                        creator_id=cid,
+                        title=f"Page {page_num}",
+                        content=f"{len(page_ids)} photos",
+                        published_at=datetime.now(tz=timezone.utc).isoformat(),
+                        attachments=attachments,
+                    )
+                    posts.append(post)
+
+                    if progress_callback:
+                        progress_callback(
+                            f"Page {page_num}/~{estimated_pages} — "
+                            f"{total_photos} photos")
+
+                    self.log(f"Page {page_num}/~{estimated_pages}: "
+                             f"{len(page_ids)} photos", 'debug')
+
+        except Exception as e:
+            self.log(f"Error scraping BestEyeCandy: {e}", 'error')
+
+        self.log(f"Total: {len(posts)} new page-posts with "
+                 f"{total_photos} photos across all pages", 'info')
+        return posts
+
+    # ------------------------------------------------------------------
+    # URL pattern discovery
+    # ------------------------------------------------------------------
+
+    async def _discover_url_pattern(self, session: aiohttp.ClientSession,
+                                    photo_id: str, cid: str,
+                                    celeb_slug: str) -> Optional[Dict]:
+        """Visit a detail page to discover the full-res URL pattern.
+
+        Returns dict with keys: server, dir_pattern, name_format, ext
+        """
+        detail_url = (f'{self.BASE_URL}/section/celeb-photogallery/'
+                      f'cid-{cid}/{celeb_slug}/photo-{photo_id}.html')
+
+        await asyncio.sleep(2)  # Rate limit
+        page_html = await self._fetch_page(session, detail_url)
+        if page_html is None:
+            return None
+
+        # Look for full-res image URL in the detail page
+        # Pattern: <img src="https://euX.besteyecandy.com/section/large-photos/area-female/besteyecandy-{ID}/{Name}_{ID}_BestEyeCandyCOM.jpg">
+        # or <a href="..."> with similar pattern
+        patterns = [
+            r'(https?://[a-z0-9]+\.besteyecandy\.com/section/large-photos/[^"\'>\s]+)',
+            r'(https?://[a-z0-9]+\.besteyecandy\.com/[^"\'>\s]*besteyecandy-' + re.escape(photo_id) + r'[^"\'>\s]*)',
+        ]
+
+        full_res_url = None
+        for pattern in patterns:
+            match = re.search(pattern, page_html)
+            if match:
+                full_res_url = match.group(1)
+                break
+
+        if not full_res_url:
+            self.log(f"Could not find full-res URL on detail page for photo {photo_id}",
+                     'error')
+            return None
+
+        self.log(f"Found full-res URL: {full_res_url}", 'debug')
+
+        # Parse the URL to extract the pattern components
+        parsed = urlparse(full_res_url)
+        server = parsed.netloc  # e.g., eu4.besteyecandy.com
+
+        # Extract name format from the filename
+        # e.g., Myleene_Klass_7727820_BestEyeCandyCOM.jpg
+        filename = parsed.path.rsplit('/', 1)[-1]
+        ext = filename.rsplit('.', 1)[-1] if '.' in filename else 'jpg'
+
+        # Extract the path pattern (everything before the filename)
+        path_dir = parsed.path.rsplit('/', 1)[0]  # e.g., /section/large-photos/area-female/besteyecandy-7727820
+
+        # The directory pattern includes the photo ID, extract the base
+        # e.g., /section/large-photos/area-female/besteyecandy-{ID}
+        dir_pattern = re.sub(re.escape(photo_id), '{ID}', path_dir)
+
+        # Extract the name format by removing the photo ID
+        # e.g., Myleene_Klass_{ID}_BestEyeCandyCOM.jpg -> Myleene_Klass_{ID}_BestEyeCandyCOM
+        name_without_ext = filename.rsplit('.', 1)[0]
+        name_format = name_without_ext.replace(photo_id, '{ID}')
+
+        return {
+            'server': server,
+            'dir_pattern': dir_pattern,
+            'name_format': name_format,
+            'ext': ext,
+            'example_url': full_res_url,
+        }
+
+    def _construct_full_res_url(self, url_pattern: Dict, photo_id: str) -> str:
+        """Construct the full-res URL for a photo ID using the discovered pattern."""
+        dir_path = url_pattern['dir_pattern'].replace('{ID}', photo_id)
+        filename = url_pattern['name_format'].replace('{ID}', photo_id) + '.' + url_pattern['ext']
+        return f"https://{url_pattern['server']}{dir_path}/{filename}"
+
+    # ------------------------------------------------------------------
+    # HTML parsing helpers
+    # ------------------------------------------------------------------
+
+    def _extract_photo_ids(self, page_html: str) -> List[str]:
+        """Extract photo IDs from a listing page.
+
+        Photo links look like: href="...photo-12345.html"
+        """
+        ids = re.findall(r'href="[^"]*photo-(\d+)\.html"', page_html)
+        # Deduplicate while preserving order
+        seen = set()
+        unique_ids = []
+        for pid in ids:
+            if pid not in seen:
+                seen.add(pid)
+                unique_ids.append(pid)
+        return unique_ids
+
+    @staticmethod
+    def _extract_celeb_name(page_html: str) -> Optional[str]:
+        """Extract celebrity name from the page."""
+        # Try <title> tag: "Myleene Klass Photo Collection @ ...::: BestEyeCandy.com :::..."
+        m = re.search(r'<title>([^<]+)</title>', page_html, re.IGNORECASE)
+        if m:
+            title = html.unescape(m.group(1).strip())
+            # Remove everything from "Photo Collection" or "@" onwards
+            title = re.sub(r'\s*Photo\s+Collection.*$', '', title,
+                           flags=re.IGNORECASE).strip()
+            title = re.sub(r'\s*@.*$', '', title).strip()
+            # Fallback: remove BestEyeCandy suffix
+            title = re.sub(r'\s*[-\u2013\u2014|]?\s*\.{0,3}:{0,3}\s*BestEyeCandy.*$', '',
+                           title, flags=re.IGNORECASE).strip()
+            if title:
+                return title
+
+        # Try <h1> or <h2>
+        m = re.search(r'<h[12][^>]*>([^<]+)</h[12]>', page_html)
+        if m:
+            return html.unescape(m.group(1).strip())
+
+        return None
+
+    @staticmethod
+    def _extract_total_photos(page_html: str) -> int:
+        """Extract total photo count from the page.
+
+        Handles European format (15.660) and US format (15,660).
+        """
+        # Look for "N.NNN photos" or "N,NNN photos" or "NNN photos"
+        # Require leading digit to avoid matching ", photo" from keywords
+        m = re.search(r'(\d[\d.,]*)\s+photos?', page_html, re.IGNORECASE)
+        if m:
+            num_str = m.group(1)
+            # European format uses dots as thousands separators: 15.660
+            # US format uses commas: 15,660
+            # Remove both dots and commas (they're thousands separators)
+            num_str = num_str.replace('.', '').replace(',', '')
+            try:
+                return int(num_str)
+            except ValueError:
+                pass
+        return 0
+
+    @staticmethod
+    def _extract_page_count(page_html: str, photos_per_page: int = 48) -> int:
+        """Extract total page count from the listing page.
+
+        Uses total photo count divided by photos per page, or falls back
+        to finding the maximum page number in pagination links.
+        """
+        # Method 1: Calculate from total photos
+        m = re.search(r'(\d[\d.,]*)\s+photos?', page_html, re.IGNORECASE)
+        if m:
+            num_str = m.group(1).replace('.', '').replace(',', '')
+            try:
+                total = int(num_str)
+                if total > 0:
+                    return (total + photos_per_page - 1) // photos_per_page
+            except ValueError:
+                pass
+
+        # Method 2: Find max page-N in pagination links for same celeb
+        page_nums = [int(x) for x in re.findall(r'/page-(\d+)/', page_html)]
+        if page_nums:
+            return max(page_nums)
+
+        return 1
+
+    @staticmethod
+    def _has_next_page(page_html: str) -> bool:
+        """Check if there's a 'Next Page' link on the current page."""
+        return 'alt="Next Page"' in page_html
+
+    # ------------------------------------------------------------------
+    # Utility helpers
+    # ------------------------------------------------------------------
+
+    async def _fetch_page(self, session: aiohttp.ClientSession,
+                          url: str) -> Optional[str]:
+        """Fetch a single page, return HTML or None."""
+        try:
+            async with session.get(url, headers=self.HEADERS,
+                                   allow_redirects=True) as resp:
+                if resp.status != 200:
+                    self.log(f"HTTP {resp.status} for {url}", 'warning')
+                    return None
+                return await resp.text()
+        except Exception as e:
+            self.log(f"Error fetching {url}: {e}", 'warning')
+            return None
--- a/modules/paid_content/coppermine_client.py
+++ b/modules/paid_content/coppermine_client.py
@@ -0,0 +1,622 @@
+"""
+Coppermine Gallery scraper client.
+
+Coppermine is a PHP photo gallery with a nested structure:
+  categories > sub-categories > albums > photos
+
+One album maps to one Post with N Attachments.
+Full-res URLs are derived from thumbnails by stripping the `thumb_` prefix.
+"""
+
+import asyncio
+import re
+from datetime import datetime
+from typing import Dict, List, Optional, Set
+from urllib.parse import urljoin, urlparse, parse_qs
+
+import aiohttp
+
+from modules.base_module import LoggingMixin
+from .models import Post, Attachment
+
+
+class CoppermineClient(LoggingMixin):
+    SERVICE_ID = 'coppermine'
+    PLATFORM = 'coppermine'
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                       '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.5',
+    }
+
+    IMAGE_EXTS = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff'}
+
+    def __init__(self, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Coppermine')
+
+    async def get_profile_info(self, gallery_url: str) -> Optional[Dict]:
+        """Fetch gallery root and extract profile metadata.
+
+        Args:
+            gallery_url: Base gallery URL (e.g. https://kylie-jenner.org/gallery)
+
+        Returns:
+            Dict with username, display_name, post_count, gallery_url or None on failure
+        """
+        root_url = self._build_url(gallery_url, 'index.php')
+        timeout = aiohttp.ClientTimeout(total=30)
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                html = await self._fetch_page(session, root_url)
+                if not html:
+                    return None
+
+                # Extract site title from <title> tag
+                title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.DOTALL | re.IGNORECASE)
+                site_title = title_match.group(1).strip() if title_match else 'Coppermine Gallery'
+                # Clean HTML entities
+                site_title = re.sub(r'&amp;', '&', site_title)
+                site_title = re.sub(r'&lt;', '<', site_title)
+                site_title = re.sub(r'&gt;', '>', site_title)
+                site_title = re.sub(r'&#\d+;', '', site_title)
+                site_title = re.sub(r'&\w+;', '', site_title)
+
+                # Try to extract stats: "N files in M albums"
+                total_files = 0
+                total_albums = 0
+                stats_match = re.search(
+                    r'(\d[\d,]*)\s+files?\s+in\s+(\d[\d,]*)\s+albums?',
+                    html, re.IGNORECASE
+                )
+                if stats_match:
+                    total_files = int(stats_match.group(1).replace(',', ''))
+                    total_albums = int(stats_match.group(2).replace(',', ''))
+
+                # Use domain as username
+                parsed = urlparse(gallery_url)
+                domain = parsed.netloc.replace('www.', '')
+
+                return {
+                    'username': domain,
+                    'display_name': site_title,
+                    'post_count': total_albums,
+                    'gallery_url': gallery_url,
+                }
+        except Exception as e:
+            self.log(f"Error fetching profile info from {gallery_url}: {e}", 'error')
+            return None
+
+    async def get_posts(self, gallery_url: str,
+                        known_post_ids: Optional[Set[str]] = None,
+                        progress_callback=None,
+                        post_callback=None):
+        """Crawl the gallery, yielding new albums as Post objects incrementally.
+
+        Phase 1: Fetch root, extract top-level category links
+        Phase 2: Recursively crawl categories until album links found
+        Phase 3: For each album, fetch thumbnails and call post_callback immediately
+
+        Args:
+            gallery_url: Base gallery URL
+            known_post_ids: Set of post IDs already in DB (album_NNN)
+            progress_callback: Called with status message strings
+            post_callback: async callable(post) — called for each album as it's fetched.
+                          If provided, posts are streamed instead of collected.
+
+        Returns:
+            List of Post objects (only if post_callback is None)
+        """
+        known = known_post_ids or set()
+        timeout = aiohttp.ClientTimeout(total=None, sock_connect=30, sock_read=60)
+        posts_collected = [] if post_callback is None else None
+
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                # Phase 1: Get all category links from root
+                root_url = self._build_url(gallery_url, 'index.php')
+                root_html = await self._fetch_page(session, root_url)
+                if not root_html:
+                    self.log("Failed to fetch gallery root", 'error')
+                    return [] if post_callback is None else None
+
+                category_ids = self._extract_category_ids(root_html)
+                self.log(f"Found {len(category_ids)} top-level categories", 'info')
+
+                if progress_callback:
+                    progress_callback(f'Found {len(category_ids)} categories, crawling...')
+
+                # Phase 2: Recursively crawl categories to find album IDs
+                album_ids = set()
+                visited_cats = set()
+                for cat_id in category_ids:
+                    new_albums = await self._crawl_category(
+                        session, gallery_url, cat_id, visited_cats, known, progress_callback
+                    )
+                    album_ids.update(new_albums)
+
+                # Filter out known albums
+                new_album_ids = {aid for aid in album_ids
+                                 if f"album_{aid}" not in known}
+
+                self.log(f"Found {len(new_album_ids)} new albums "
+                         f"({len(album_ids)} total, {len(album_ids) - len(new_album_ids)} known)",
+                         'info')
+
+                if progress_callback:
+                    progress_callback(f'Found {len(new_album_ids)} new albums, fetching photos...')
+
+                # Phase 3: Fetch each new album and deliver Post objects
+                parsed = urlparse(gallery_url)
+                domain = parsed.netloc.replace('www.', '')
+                fetched = 0
+
+                for i, album_id in enumerate(sorted(new_album_ids)):
+                    if progress_callback and (i + 1) % 5 == 0:
+                        progress_callback(
+                            f'Fetching album {i + 1}/{len(new_album_ids)}...'
+                        )
+
+                    post = await self._fetch_album(session, gallery_url, album_id, domain)
+                    if post and post.attachments:
+                        fetched += 1
+                        if post_callback:
+                            await post_callback(post)
+                        else:
+                            posts_collected.append(post)
+
+                    # Rate limit: 1s between page fetches
+                    await asyncio.sleep(2)
+
+                self.log(f"Fetched {fetched} albums with attachments", 'info')
+                return posts_collected
+
+        except Exception as e:
+            self.log(f"Error crawling gallery {gallery_url}: {e}", 'error')
+            return [] if post_callback is None else None
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _build_url(self, gallery_url: str, page: str) -> str:
+        """Build a full URL from the gallery base and a page name."""
+        base = gallery_url.rstrip('/')
+        return f"{base}/{page}"
+
+    async def _fetch_page(self, session: aiohttp.ClientSession, url: str,
+                          max_retries: int = 3) -> Optional[str]:
+        """Fetch a page and return its HTML text, or None on failure.
+
+        Retries with exponential backoff on connection errors / server disconnects.
+        """
+        for attempt in range(max_retries):
+            try:
+                async with session.get(url, headers=self.HEADERS) as resp:
+                    if resp.status == 429:
+                        wait = 5 * (attempt + 1)
+                        self.log(f"Rate limited on {url}, waiting {wait}s", 'warning')
+                        await asyncio.sleep(wait)
+                        continue
+                    if resp.status != 200:
+                        self.log(f"HTTP {resp.status} fetching {url}", 'warning')
+                        return None
+                    return await resp.text()
+            except (aiohttp.ServerDisconnectedError, aiohttp.ClientOSError,
+                    aiohttp.ClientPayloadError, ConnectionResetError) as e:
+                wait = 3 * (attempt + 1)
+                if attempt < max_retries - 1:
+                    self.log(f"Connection error on {url}, retry {attempt + 1} in {wait}s: {e}",
+                             'warning')
+                    await asyncio.sleep(wait)
+                else:
+                    self.log(f"Failed after {max_retries} attempts: {url}: {e}", 'warning')
+                    return None
+            except Exception as e:
+                self.log(f"Error fetching {url}: {e}", 'warning')
+                return None
+        return None
+
+    def _extract_category_ids(self, html: str) -> List[str]:
+        """Extract category IDs from index.php page.
+
+        Looks for links like: index.php?cat=N
+        """
+        cat_ids = []
+        seen = set()
+        for match in re.finditer(r'index\.php\?cat=(\d+)', html):
+            cat_id = match.group(1)
+            if cat_id not in seen:
+                seen.add(cat_id)
+                cat_ids.append(cat_id)
+        return cat_ids
+
+    def _extract_album_ids(self, html: str) -> List[str]:
+        """Extract album IDs from a category page.
+
+        Looks for links like: thumbnails.php?album=N
+        """
+        album_ids = []
+        seen = set()
+        for match in re.finditer(r'thumbnails\.php\?album=(\d+)', html):
+            album_id = match.group(1)
+            if album_id not in seen:
+                seen.add(album_id)
+                album_ids.append(album_id)
+        return album_ids
+
+    def _extract_page_count(self, html: str) -> int:
+        """Extract total page count from Coppermine pagination text.
+
+        Looks for patterns like "53 albums on 2 page(s)" or "N files on M page(s)".
+        """
+        match = re.search(r'on\s+(\d+)\s+page\(s\)', html, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+        return 1
+
+    async def _crawl_category(self, session: aiohttp.ClientSession,
+                               gallery_url: str, cat_id: str,
+                               visited: Set[str], known: Set[str],
+                               progress_callback=None,
+                               depth: int = 0) -> Set[str]:
+        """Recursively crawl a category to find all album IDs.
+
+        Categories can contain sub-categories or albums. We recurse
+        until we find album links (thumbnails.php?album=N).
+        Handles pagination within category pages (index.php?cat=N&page=M).
+
+        Args:
+            session: aiohttp session
+            gallery_url: Base gallery URL
+            cat_id: Category ID to crawl
+            visited: Set of already-visited category IDs (prevents loops)
+            known: Set of known post_ids (for logging only)
+            progress_callback: Status callback
+            depth: Recursion depth (max 10)
+
+        Returns:
+            Set of album ID strings
+        """
+        if cat_id in visited or depth > 10:
+            return set()
+        visited.add(cat_id)
+
+        # Fetch first page
+        cat_url = self._build_url(gallery_url, f'index.php?cat={cat_id}')
+        html = await self._fetch_page(session, cat_url)
+        if not html:
+            return set()
+
+        await asyncio.sleep(2)
+
+        album_ids = set(self._extract_album_ids(html))
+        sub_cat_ids = self._extract_category_ids(html)
+
+        # Handle pagination: fetch remaining pages
+        total_pages = self._extract_page_count(html)
+        if total_pages > 1:
+            for page_num in range(2, total_pages + 1):
+                page_url = self._build_url(
+                    gallery_url, f'index.php?cat={cat_id}&page={page_num}'
+                )
+                page_html = await self._fetch_page(session, page_url)
+                if page_html:
+                    album_ids.update(self._extract_album_ids(page_html))
+                    # Sub-categories are the same on every page, no need to re-extract
+                await asyncio.sleep(2)
+
+        # Filter out the current category from sub-categories
+        sub_cat_ids = [c for c in sub_cat_ids if c != cat_id and c not in visited]
+
+        if progress_callback:
+            progress_callback(
+                f'Category {cat_id}: {len(album_ids)} albums, '
+                f'{len(sub_cat_ids)} sub-categories'
+                + (f' ({total_pages} pages)' if total_pages > 1 else '')
+            )
+
+        # Recurse into sub-categories
+        for sub_id in sub_cat_ids:
+            sub_albums = await self._crawl_category(
+                session, gallery_url, sub_id, visited, known,
+                progress_callback, depth + 1
+            )
+            album_ids.update(sub_albums)
+
+        return album_ids
+
+    async def _fetch_album(self, session: aiohttp.ClientSession,
+                            gallery_url: str, album_id: str,
+                            domain: str) -> Optional[Post]:
+        """Fetch an album page (all pages) and build a Post object.
+
+        Handles pagination within albums (thumbnails.php?album=N&page=M).
+
+        Args:
+            session: aiohttp session
+            gallery_url: Base gallery URL
+            album_id: Album ID to fetch
+            domain: Domain name for creator_id
+
+        Returns:
+            Post object with attachments, or None on failure
+        """
+        album_url = self._build_url(gallery_url, f'thumbnails.php?album={album_id}')
+        html = await self._fetch_page(session, album_url)
+        if not html:
+            return None
+
+        # Extract album title from first page
+        title = self._extract_album_title(html)
+        if not title:
+            title = f"Album {album_id}"
+
+        # Extract attachments from first page
+        attachments = self._extract_attachments(html, gallery_url)
+
+        # Handle pagination within album
+        total_pages = self._extract_page_count(html)
+        if total_pages > 1:
+            for page_num in range(2, total_pages + 1):
+                page_url = self._build_url(
+                    gallery_url, f'thumbnails.php?album={album_id}&page={page_num}'
+                )
+                page_html = await self._fetch_page(session, page_url)
+                if page_html:
+                    attachments.extend(self._extract_attachments(page_html, gallery_url))
+                await asyncio.sleep(2)
+
+        if not attachments:
+            return None
+
+        # Extract album date from breadcrumb + title
+        album_date = self._extract_album_date(html, title)
+
+        post_id = f"album_{album_id}"
+        return Post(
+            post_id=post_id,
+            service_id=self.SERVICE_ID,
+            platform=self.PLATFORM,
+            creator_id=domain,
+            title=None,
+            content=title,
+            published_at=album_date,
+            attachments=attachments,
+        )
+
+    def _extract_album_title(self, html: str) -> Optional[str]:
+        """Extract album title from page HTML.
+
+        Priority: breadcrumb last item > <h1>/<h2> heading > <title> last segment
+        """
+        # Try breadcrumb: last text segment after the last ">"
+        # Coppermine breadcrumbs: "Home > Category > Sub > Album Title"
+        bc_match = re.search(
+            r'class="[^"]*breadcrumb[^"]*"[^>]*>(.*?)</(?:div|span|td|p)',
+            html, re.DOTALL | re.IGNORECASE
+        )
+        if bc_match:
+            bc_text = bc_match.group(1)
+            # Strip HTML tags, split on ">", take last segment
+            bc_text = re.sub(r'<[^>]+>', ' ', bc_text)
+            parts = [p.strip() for p in bc_text.split('>') if p.strip()]
+            if parts:
+                title = self._clean_text(parts[-1])
+                if title and title.lower() not in ('home', 'index', 'gallery'):
+                    return title
+
+        # Try headings
+        for tag in ('h1', 'h2', 'h3'):
+            h_match = re.search(
+                rf'<{tag}[^>]*>(.*?)</{tag}>', html, re.DOTALL | re.IGNORECASE
+            )
+            if h_match:
+                title = self._clean_text(h_match.group(1))
+                if title and len(title) > 2:
+                    return title
+
+        # Fallback: <title> tag — take the last segment before the site name
+        title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.DOTALL | re.IGNORECASE)
+        if title_match:
+            title = title_match.group(1).strip()
+            # Usually "Site Name - Album Title" or "Album Title - Site Name"
+            # The album-specific part is typically not the site name;
+            # use the longest segment as a heuristic
+            if ' - ' in title:
+                parts = [p.strip() for p in title.split(' - ')]
+                # Pick the longest part (album names tend to be longer than site names)
+                title = max(parts, key=len)
+            if title:
+                return self._clean_text(title)
+
+        return None
+
+    def _extract_album_date(self, html: str, title: str) -> str:
+        """Extract album date from breadcrumb year + title month/day.
+
+        Breadcrumb: "Home > Candids > 2026 > January 11 - Leaving..."
+        Title: "January 11 - Leaving Golden Globes afterparty..."
+
+        Returns ISO date string, or current datetime as fallback.
+        """
+        MONTHS = {
+            'january': 1, 'february': 2, 'march': 3, 'april': 4,
+            'may': 5, 'june': 6, 'july': 7, 'august': 8,
+            'september': 9, 'october': 10, 'november': 11, 'december': 12,
+        }
+
+        # Extract year from breadcrumb path (look for 4-digit year in links)
+        year = None
+        # Breadcrumb links: index.php?cat=155">2026</a>
+        for m in re.finditer(r'>\s*((?:19|20)\d{2})\s*</', html):
+            year = int(m.group(1))
+
+        # Also try path segments in albums/ URLs for year
+        if not year:
+            path_match = re.search(r'albums/[^/]+/(20\d{2})/', html)
+            if path_match:
+                year = int(path_match.group(1))
+
+        # Extract month and day from album title
+        month, day = None, None
+        if title:
+            # "January 11 - ..." or "March 3 - ..."
+            date_match = re.match(
+                r'(\w+)\s+(\d{1,2})\b', title
+            )
+            if date_match:
+                month_name = date_match.group(1).lower()
+                if month_name in MONTHS:
+                    month = MONTHS[month_name]
+                    day = int(date_match.group(2))
+
+        # Build date from breadcrumb year + title month/day
+        if year and month and day:
+            try:
+                return datetime(year, month, day).isoformat()
+            except ValueError:
+                pass
+        if year and month:
+            try:
+                return datetime(year, month, 1).isoformat()
+            except ValueError:
+                pass
+        if year:
+            return datetime(year, 1, 1).isoformat()
+
+        # Fallback: parse "Date added=Jan 13, 2026" from thumbnail tooltips
+        MONTH_ABBR = {
+            'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4,
+            'may': 5, 'jun': 6, 'jul': 7, 'aug': 8,
+            'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12,
+        }
+        added_match = re.search(
+            r'Date added\s*=\s*(\w{3})\s+(\d{1,2}),?\s+(\d{4})', html
+        )
+        if added_match:
+            m_abbr = added_match.group(1).lower()
+            if m_abbr in MONTH_ABBR:
+                try:
+                    return datetime(
+                        int(added_match.group(3)),
+                        MONTH_ABBR[m_abbr],
+                        int(added_match.group(2))
+                    ).isoformat()
+                except ValueError:
+                    pass
+
+        # Also try "last one added on Jan 13, 2026" from album_stat
+        stat_match = re.search(
+            r'last one added on\s+(\w{3})\s+(\d{1,2}),?\s+(\d{4})', html
+        )
+        if stat_match:
+            m_abbr = stat_match.group(1).lower()
+            if m_abbr in MONTH_ABBR:
+                try:
+                    return datetime(
+                        int(stat_match.group(3)),
+                        MONTH_ABBR[m_abbr],
+                        int(stat_match.group(2))
+                    ).isoformat()
+                except ValueError:
+                    pass
+
+        return datetime.now().isoformat()
+
+    def _extract_attachments(self, html: str, gallery_url: str) -> List[Attachment]:
+        """Extract photo attachments from album page HTML.
+
+        Finds thumbnail images and converts them to full-res URLs by
+        stripping the `thumb_` prefix from the filename.
+        """
+        attachments = []
+        seen_urls = set()
+
+        # Pattern: thumbnail images in album pages
+        # Common patterns:
+        # <img src="albums/path/thumb_filename.jpg" ...>
+        # <img src="albums/path/normal_filename.jpg" ...>
+        for match in re.finditer(
+            r'<img[^>]+src=["\']([^"\']*?albums/[^"\']*?(?:thumb_|normal_)[^"\']+)["\']',
+            html, re.IGNORECASE
+        ):
+            thumb_src = match.group(1)
+            full_url = self._thumb_to_fullres(thumb_src, gallery_url)
+            if full_url and full_url not in seen_urls:
+                seen_urls.add(full_url)
+                filename = full_url.rsplit('/', 1)[-1] if '/' in full_url else full_url
+                ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
+
+                attachments.append(Attachment(
+                    name=filename,
+                    server_path=full_url,  # use as dedup key
+                    file_type='image' if ext in self.IMAGE_EXTS else 'unknown',
+                    extension=ext or None,
+                    download_url=full_url,
+                ))
+
+        # Also try: <a href="displayimage.php?..."><img src="albums/...">
+        # Some themes wrap thumbnails in links
+        if not attachments:
+            for match in re.finditer(
+                r'<a[^>]+href=["\'][^"\']*displayimage\.php[^"\']*["\'][^>]*>'
+                r'\s*<img[^>]+src=["\']([^"\']+)["\']',
+                html, re.IGNORECASE | re.DOTALL
+            ):
+                thumb_src = match.group(1)
+                full_url = self._thumb_to_fullres(thumb_src, gallery_url)
+                if full_url and full_url not in seen_urls:
+                    seen_urls.add(full_url)
+                    filename = full_url.rsplit('/', 1)[-1] if '/' in full_url else full_url
+                    ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
+
+                    attachments.append(Attachment(
+                        name=filename,
+                        server_path=full_url,
+                        file_type='image' if ext in self.IMAGE_EXTS else 'unknown',
+                        extension=ext or None,
+                        download_url=full_url,
+                    ))
+
+        return attachments
+
+    def _thumb_to_fullres(self, thumb_src: str, gallery_url: str) -> Optional[str]:
+        """Convert a thumbnail URL to a full-resolution URL.
+
+        Strips `thumb_` or `normal_` prefix from the filename and
+        prepends the gallery base URL if needed.
+
+        Args:
+            thumb_src: Thumbnail src attribute value
+            gallery_url: Base gallery URL
+
+        Returns:
+            Full-resolution image URL, or None if conversion fails
+        """
+        if not thumb_src:
+            return None
+
+        # Strip thumb_ or normal_ prefix from filename
+        # e.g. albums/candids/2026/0111/thumb_001.jpg → albums/candids/2026/0111/001.jpg
+        fullres_path = re.sub(r'(/)(?:thumb_|normal_)', r'\1', thumb_src)
+
+        # If the path is already absolute (starts with http), return as-is
+        if fullres_path.startswith(('http://', 'https://')):
+            return fullres_path
+
+        # Otherwise, make it absolute relative to gallery URL
+        base = gallery_url.rstrip('/')
+        fullres_path = fullres_path.lstrip('./')
+        return f"{base}/{fullres_path}"
+
+    def _clean_text(self, text: str) -> str:
+        """Clean HTML entities and whitespace from text."""
+        text = re.sub(r'&amp;', '&', text)
+        text = re.sub(r'&lt;', '<', text)
+        text = re.sub(r'&gt;', '>', text)
+        text = re.sub(r'&quot;', '"', text)
+        text = re.sub(r'&#\d+;', '', text)
+        text = re.sub(r'&\w+;', '', text)
+        text = re.sub(r'<[^>]+>', '', text)
+        return text.strip()
--- a/modules/paid_content/db_adapter.py
+++ b/modules/paid_content/db_adapter.py
--- a/modules/paid_content/embed_downloader.py
+++ b/modules/paid_content/embed_downloader.py
@@ -0,0 +1,297 @@
+"""
+Embed Downloader - Downloads embedded videos from posts using yt-dlp
+Supports: YouTube, Vimeo, Dailymotion, Twitch, and many other platforms
+"""
+
+import asyncio
+import json
+import os
+import subprocess
+from pathlib import Path
+from typing import Dict, Optional
+
+from modules.base_module import LoggingMixin
+
+
+class EmbedDownloader(LoggingMixin):
+    """
+    Download embedded videos from posts using yt-dlp
+
+    Wrapper around yt-dlp for downloading videos from various platforms
+    embedded in creator posts.
+    """
+
+    # Quality presets for yt-dlp
+    QUALITY_PRESETS = {
+        'best': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
+        '1080p': 'bestvideo[height<=1080][ext=mp4]+bestaudio[ext=m4a]/best[height<=1080][ext=mp4]/best',
+        '720p': 'bestvideo[height<=720][ext=mp4]+bestaudio[ext=m4a]/best[height<=720][ext=mp4]/best',
+        '480p': 'bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]/best',
+        'audio': 'bestaudio[ext=m4a]/bestaudio/best',
+    }
+
+    def __init__(self, ytdlp_path: str = None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Embed')
+
+        # Find yt-dlp executable
+        self.ytdlp_path = ytdlp_path or self._find_ytdlp()
+        if not self.ytdlp_path:
+            self.log("yt-dlp not found, embed downloading will be disabled", 'warning')
+
+    def _find_ytdlp(self) -> Optional[str]:
+        """Find yt-dlp executable"""
+        # Check common locations
+        common_paths = [
+            '/usr/local/bin/yt-dlp',
+            '/usr/bin/yt-dlp',
+            '/opt/homebrew/bin/yt-dlp',
+            os.path.expanduser('~/.local/bin/yt-dlp'),
+        ]
+
+        for path in common_paths:
+            if os.path.isfile(path) and os.access(path, os.X_OK):
+                return path
+
+        # Try to find via which
+        try:
+            result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except Exception:
+            pass
+
+        return None
+
+    def is_available(self) -> bool:
+        """Check if yt-dlp is available"""
+        return self.ytdlp_path is not None
+
+    async def download(self, url: str, output_dir: Path, quality: str = 'best',
+                      filename_template: str = None) -> Dict:
+        """
+        Download video from URL
+
+        Args:
+            url: Video URL to download
+            output_dir: Directory to save the video
+            quality: Quality preset ('best', '1080p', '720p', '480p', 'audio')
+            filename_template: Optional custom filename template
+
+        Returns:
+            Dict with success status and file info
+        """
+        if not self.is_available():
+            return {
+                'success': False,
+                'error': 'yt-dlp not available'
+            }
+
+        try:
+            # Create output directory
+            output_dir = Path(output_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Build output template
+            if filename_template:
+                output_template = str(output_dir / filename_template)
+            else:
+                output_template = str(output_dir / 'embed_%(title).50s_%(id)s.%(ext)s')
+
+            # Get format string
+            format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best'])
+
+            # Build command
+            cmd = [
+                self.ytdlp_path,
+                '--no-playlist',
+                '--no-warnings',
+                '-f', format_str,
+                '--merge-output-format', 'mp4',
+                '-o', output_template,
+                '--print-json',  # Output JSON with video info
+                url
+            ]
+
+            self.log(f"Downloading embed: {url}", 'debug')
+
+            # Run yt-dlp
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error_msg = stderr.decode('utf-8', errors='replace').strip()
+                # Try to extract useful error message
+                if 'Video unavailable' in error_msg:
+                    error_msg = 'Video unavailable or private'
+                elif 'age-restricted' in error_msg.lower():
+                    error_msg = 'Video is age-restricted'
+                elif 'members only' in error_msg.lower():
+                    error_msg = 'Video is members-only'
+                elif len(error_msg) > 200:
+                    error_msg = error_msg[:200] + '...'
+
+                self.log(f"yt-dlp failed: {error_msg}", 'warning')
+                return {
+                    'success': False,
+                    'error': error_msg or f'yt-dlp exited with code {result.returncode}'
+                }
+
+            # Parse output JSON
+            stdout_text = stdout.decode('utf-8', errors='replace')
+            video_info = None
+
+            for line in stdout_text.strip().split('\n'):
+                try:
+                    video_info = json.loads(line)
+                    break
+                except json.JSONDecodeError:
+                    continue
+
+            if not video_info:
+                # Try to find the downloaded file
+                files = list(output_dir.glob('embed_*'))
+                if files:
+                    file_path = files[0]
+                    return {
+                        'success': True,
+                        'file_path': str(file_path),
+                        'filename': file_path.name,
+                        'file_size': file_path.stat().st_size if file_path.exists() else None
+                    }
+                return {
+                    'success': False,
+                    'error': 'Could not parse yt-dlp output'
+                }
+
+            # Extract file info
+            file_path = video_info.get('_filename') or video_info.get('filename')
+
+            # Handle potential path issues
+            if file_path:
+                file_path = Path(file_path)
+                if not file_path.exists():
+                    # Try to find the file
+                    possible_files = list(output_dir.glob(f"*{video_info.get('id', '')}*"))
+                    if possible_files:
+                        file_path = possible_files[0]
+
+            return {
+                'success': True,
+                'file_path': str(file_path) if file_path else None,
+                'filename': file_path.name if file_path else None,
+                'file_size': file_path.stat().st_size if file_path and file_path.exists() else video_info.get('filesize'),
+                'title': video_info.get('title'),
+                'duration': video_info.get('duration'),
+                'uploader': video_info.get('uploader'),
+                'upload_date': video_info.get('upload_date'),
+                'video_id': video_info.get('id'),
+                'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower()
+            }
+
+        except asyncio.TimeoutError:
+            return {
+                'success': False,
+                'error': 'Download timed out'
+            }
+        except Exception as e:
+            self.log(f"Error downloading embed: {e}", 'error')
+            return {
+                'success': False,
+                'error': str(e)
+            }
+
+    async def get_video_info(self, url: str) -> Dict:
+        """
+        Get video information without downloading
+
+        Args:
+            url: Video URL
+
+        Returns:
+            Dict with video metadata
+        """
+        if not self.is_available():
+            return {'success': False, 'error': 'yt-dlp not available'}
+
+        try:
+            cmd = [
+                self.ytdlp_path,
+                '--no-playlist',
+                '--no-warnings',
+                '-j',  # Output JSON
+                '--no-download',
+                url
+            ]
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error_msg = stderr.decode('utf-8', errors='replace').strip()
+                return {
+                    'success': False,
+                    'error': error_msg or f'yt-dlp exited with code {result.returncode}'
+                }
+
+            video_info = json.loads(stdout.decode('utf-8'))
+
+            return {
+                'success': True,
+                'title': video_info.get('title'),
+                'duration': video_info.get('duration'),
+                'uploader': video_info.get('uploader'),
+                'upload_date': video_info.get('upload_date'),
+                'view_count': video_info.get('view_count'),
+                'like_count': video_info.get('like_count'),
+                'description': video_info.get('description'),
+                'thumbnail': video_info.get('thumbnail'),
+                'video_id': video_info.get('id'),
+                'platform': video_info.get('extractor_key', video_info.get('extractor', 'unknown')).lower(),
+                'formats': len(video_info.get('formats', []))
+            }
+
+        except Exception as e:
+            self.log(f"Error getting video info: {e}", 'error')
+            return {
+                'success': False,
+                'error': str(e)
+            }
+
+    @staticmethod
+    def detect_platform(url: str) -> Optional[str]:
+        """Detect video platform from URL"""
+        url_lower = url.lower()
+
+        if 'youtube.com' in url_lower or 'youtu.be' in url_lower:
+            return 'youtube'
+        elif 'vimeo.com' in url_lower:
+            return 'vimeo'
+        elif 'dailymotion.com' in url_lower:
+            return 'dailymotion'
+        elif 'twitch.tv' in url_lower:
+            return 'twitch'
+        elif 'twitter.com' in url_lower or 'x.com' in url_lower:
+            return 'twitter'
+        elif 'tiktok.com' in url_lower:
+            return 'tiktok'
+        elif 'instagram.com' in url_lower:
+            return 'instagram'
+        elif 'reddit.com' in url_lower:
+            return 'reddit'
+
+        return None
+
+    @staticmethod
+    def is_supported_url(url: str) -> bool:
+        """Check if URL is from a supported platform"""
+        return EmbedDownloader.detect_platform(url) is not None
--- a/modules/paid_content/fansly_direct_client.py
+++ b/modules/paid_content/fansly_direct_client.py
--- a/modules/paid_content/file_host_downloader.py
+++ b/modules/paid_content/file_host_downloader.py
@@ -0,0 +1,529 @@
+"""
+Download files from external file hosting services
+Supports: Bunkr, Pixeldrain, Gofile, Cyberdrop
+"""
+
+import asyncio
+import re
+from pathlib import Path
+from typing import Dict, List, Optional
+from urllib.parse import urlparse, parse_qs
+
+import aiohttp
+
+from modules.base_module import LoggingMixin, RateLimitMixin
+
+
+class FileHostDownloader(LoggingMixin, RateLimitMixin):
+    """
+    Download files from various file hosting services
+    Used for manual import of PPV content
+    """
+
+    SUPPORTED_HOSTS = {
+        'bunkr': ['bunkr.sk', 'bunkr.si', 'bunkr.la', 'bunkrr.ru', 'bunkr.ph', 'bunkr.is', 'bunkr.ac', 'bunkr.cr'],
+        'pixeldrain': ['pixeldrain.com'],
+        'gofile': ['gofile.io'],
+        'cyberdrop': ['cyberdrop.me', 'cyberdrop.to', 'cyberdrop.cc'],
+        'fileditch': ['fileditchfiles.me', 'fileditch.me'],
+    }
+
+    # Bunkr CDN servers (food-themed) - try in order
+    BUNKR_CDNS = [
+        'i-soup.bunkr.ru',
+        'i-burger.bunkr.ru',
+        'i-pizza.bunkr.ru',
+        'i-taco.bunkr.ru',
+        'i-fries.bunkr.ru',
+        'i-hotdog.bunkr.ru',
+        'i-nachos.bunkr.ru',
+        'i-sushi.bunkr.ru',
+        'i-ramen.bunkr.ru',
+        'i-curry.bunkr.ru',
+        'i-kebab.bunkr.ru',
+        'i-pasta.bunkr.ru',
+        'i-steak.bunkr.ru',
+        'i-salad.bunkr.ru',
+        'i-sandwich.bunkr.ru',
+        'i-waffle.bunkr.ru',
+        'i-pancake.bunkr.ru',
+        'i-donut.bunkr.ru',
+        'i-cookie.bunkr.ru',
+        'i-cake.bunkr.ru',
+        'i-bacon.bunkr.ru',
+        'i-cheese.bunkr.ru',
+        'i-chicken.bunkr.ru',
+        'i-fish.bunkr.ru',
+        'i-noodle.bunkr.ru',
+        'i-rice.bunkr.ru',
+        'i-bread.bunkr.ru',
+        'burger.bunkr.ru',
+        'pizza.bunkr.ru',
+        'milkshake.bunkr.ru',
+    ]
+
+    def __init__(self, log_callback=None, progress_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='FileHost')
+        self._init_rate_limiter(min_delay=1, max_delay=3)
+        self.progress_callback = progress_callback  # Called with (downloaded_bytes, total_bytes, filename)
+
+    def detect_host(self, url: str) -> Optional[str]:
+        """Detect which file host a URL belongs to"""
+        try:
+            parsed = urlparse(url)
+            domain = parsed.netloc.lower().replace('www.', '')
+
+            for host, domains in self.SUPPORTED_HOSTS.items():
+                if domain in domains:
+                    return host
+        except Exception:
+            pass
+        return None
+
+    def is_supported_url(self, url: str) -> bool:
+        """Check if URL is from a supported file host"""
+        return self.detect_host(url) is not None
+
+    async def download_url(self, url: str, save_dir: Path) -> Dict:
+        """
+        Download file(s) from URL
+        Returns: {'success': bool, 'files': [paths], 'error': str}
+        """
+        host = self.detect_host(url)
+        if not host:
+            return {'success': False, 'files': [], 'error': 'Unsupported host'}
+
+        handler = getattr(self, f'_download_{host}', None)
+        if not handler:
+            return {'success': False, 'files': [], 'error': f'No handler for {host}'}
+
+        try:
+            save_dir = Path(save_dir)
+            save_dir.mkdir(parents=True, exist_ok=True)
+            return await handler(url, save_dir)
+        except Exception as e:
+            self.log(f"Error downloading from {host}: {e}", 'error')
+            return {'success': False, 'files': [], 'error': str(e)}
+
+    async def _download_pixeldrain(self, url: str, save_dir: Path) -> Dict:
+        """Download from Pixeldrain"""
+        # Extract file ID from URL
+        # Format: https://pixeldrain.com/u/FILEID or /l/LISTID
+
+        parsed = urlparse(url)
+        path_parts = parsed.path.strip('/').split('/')
+
+        if len(path_parts) < 2:
+            return {'success': False, 'files': [], 'error': 'Invalid Pixeldrain URL'}
+
+        url_type, file_id = path_parts[0], path_parts[1]
+
+        files = []
+        timeout = aiohttp.ClientTimeout(total=300)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            if url_type == 'u':
+                # Single file
+                api_url = f"https://pixeldrain.com/api/file/{file_id}/info"
+                async with session.get(api_url) as resp:
+                    if resp.status != 200:
+                        return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
+                    info = await resp.json()
+
+                download_url = f"https://pixeldrain.com/api/file/{file_id}"
+                filename = info.get('name', f'{file_id}.bin')
+                save_path = save_dir / self._sanitize_filename(filename)
+
+                await self._download_file(session, download_url, save_path)
+                files.append(str(save_path))
+
+            elif url_type == 'l':
+                # List (album)
+                api_url = f"https://pixeldrain.com/api/list/{file_id}"
+                async with session.get(api_url) as resp:
+                    if resp.status != 200:
+                        return {'success': False, 'files': [], 'error': f'API error: {resp.status}'}
+                    data = await resp.json()
+
+                for i, item in enumerate(data.get('files', [])):
+                    self._delay_between_items()
+                    item_id = item['id']
+                    filename = item.get('name', f'{i:03d}_{item_id}.bin')
+                    download_url = f"https://pixeldrain.com/api/file/{item_id}"
+                    save_path = save_dir / self._sanitize_filename(filename)
+
+                    try:
+                        await self._download_file(session, download_url, save_path)
+                        files.append(str(save_path))
+                    except Exception as e:
+                        self.log(f"Failed to download {filename}: {e}", 'warning')
+
+        return {'success': True, 'files': files, 'error': None}
+
+    async def _download_gofile(self, url: str, save_dir: Path) -> Dict:
+        """Download from Gofile"""
+        # Extract content ID from URL
+        # Format: https://gofile.io/d/CONTENTID
+
+        parsed = urlparse(url)
+        path_parts = parsed.path.strip('/').split('/')
+
+        if len(path_parts) < 2 or path_parts[0] != 'd':
+            return {'success': False, 'files': [], 'error': 'Invalid Gofile URL'}
+
+        content_id = path_parts[1]
+
+        files = []
+        timeout = aiohttp.ClientTimeout(total=300)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            # Create guest account token (POST request required since API change)
+            async with session.post('https://api.gofile.io/accounts') as resp:
+                if resp.status != 200:
+                    return {'success': False, 'files': [], 'error': 'Failed to get Gofile token'}
+                account_data = await resp.json()
+                if account_data.get('status') != 'ok':
+                    return {'success': False, 'files': [], 'error': f"Gofile API error: {account_data.get('status')}"}
+                token = account_data.get('data', {}).get('token')
+
+            if not token:
+                return {'success': False, 'files': [], 'error': 'No Gofile token received'}
+
+            # Get content info
+            # Gofile requires x-website-token header (changed from query param in 2024)
+            headers = {
+                'Authorization': f'Bearer {token}',
+                'x-website-token': '4fd6sg89d7s6',
+            }
+            api_url = f"https://api.gofile.io/contents/{content_id}"
+
+            async with session.get(api_url, headers=headers) as resp:
+                if resp.status == 401:
+                    return {'success': False, 'files': [], 'error': 'Gofile authentication failed - websiteToken may have changed'}
+                if resp.status != 200:
+                    return {'success': False, 'files': [], 'error': f'Failed to get content: {resp.status}'}
+                content_data = await resp.json()
+
+            if content_data.get('status') == 'error-notPremium':
+                return {'success': False, 'files': [], 'error': 'Gofile requires premium account for API access - try direct download'}
+            if content_data.get('status') != 'ok':
+                error = content_data.get('data', {}).get('message', content_data.get('status', 'Unknown error'))
+                return {'success': False, 'files': [], 'error': error}
+
+            contents = content_data.get('data', {}).get('children', {})
+
+            for item_id, item in contents.items():
+                if item.get('type') != 'file':
+                    continue
+
+                self._delay_between_items()
+                download_url = item.get('link')
+                filename = item.get('name', f'{item_id}.bin')
+                save_path = save_dir / self._sanitize_filename(filename)
+
+                try:
+                    await self._download_file(session, download_url, save_path, headers=headers)
+                    files.append(str(save_path))
+                except Exception as e:
+                    self.log(f"Failed to download {filename}: {e}", 'warning')
+
+        return {'success': True, 'files': files, 'error': None}
+
+    async def _download_cyberdrop(self, url: str, save_dir: Path) -> Dict:
+        """Download from Cyberdrop"""
+        # Cyberdrop albums: https://cyberdrop.me/a/ALBUMID
+        # Single files: https://cyberdrop.me/f/FILEID or direct CDN links
+
+        files = []
+        timeout = aiohttp.ClientTimeout(total=300)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            parsed = urlparse(url)
+            path_parts = parsed.path.strip('/').split('/')
+
+            if len(path_parts) >= 2 and path_parts[0] == 'a':
+                # Album
+                album_url = url
+                async with session.get(album_url) as resp:
+                    if resp.status != 200:
+                        return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
+                    html = await resp.text()
+
+                # Parse file links from HTML
+                # Pattern: href="https://fs-XXX.cyberdrop.to/FILE"
+                cdn_pattern = r'href="(https://[a-z0-9-]+\.cyberdrop\.[a-z]+/[^"]+)"'
+                matches = re.findall(cdn_pattern, html)
+
+                for i, file_url in enumerate(matches):
+                    self._delay_between_items()
+                    filename = file_url.split('/')[-1].split('?')[0]
+                    if not filename:
+                        filename = f'{i:03d}.bin'
+                    save_path = save_dir / self._sanitize_filename(filename)
+
+                    try:
+                        await self._download_file(session, file_url, save_path)
+                        files.append(str(save_path))
+                    except Exception as e:
+                        self.log(f"Failed to download {filename}: {e}", 'warning')
+
+            else:
+                # Single file or direct CDN link
+                filename = parsed.path.split('/')[-1] or 'download.bin'
+                save_path = save_dir / self._sanitize_filename(filename)
+
+                await self._download_file(session, url, save_path)
+                files.append(str(save_path))
+
+        return {'success': True, 'files': files, 'error': None}
+
+    async def _download_bunkr(self, url: str, save_dir: Path) -> Dict:
+        """Download from Bunkr with CDN fallback support"""
+        # Bunkr albums: https://bunkr.sk/a/ALBUMID
+        # Single files: https://bunkr.sk/f/FILEID or https://bunkr.sk/v/VIDEOID
+
+        files = []
+        failed = []
+        timeout = aiohttp.ClientTimeout(total=600)  # Increased for large files
+
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        }
+
+        async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
+            parsed = urlparse(url)
+            path_parts = parsed.path.strip('/').split('/')
+
+            if len(path_parts) >= 2 and path_parts[0] == 'a':
+                # Album page
+                async with session.get(url) as resp:
+                    if resp.status != 200:
+                        return {'success': False, 'files': [], 'error': f'Failed to fetch album: {resp.status}'}
+                    html = await resp.text()
+
+                # Parse file links from HTML - look for /f/ links
+                file_pattern = r'href="(/f/[^"]+)"'
+                matches = re.findall(file_pattern, html)
+
+                self.log(f"Found {len(matches)} files in Bunkr album", 'info')
+
+                for i, file_path in enumerate(matches):
+                    self._delay_between_items()
+
+                    # Make absolute URL
+                    file_url = f"https://{parsed.netloc}{file_path}"
+
+                    # Get direct download URL and file UUID
+                    direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, file_url)
+                    if not direct_url:
+                        self.log(f"Could not get direct URL for {file_url}", 'warning')
+                        failed.append(file_url)
+                        continue
+
+                    filename = direct_url.split('/')[-1].split('?')[0]
+                    if not filename:
+                        filename = f'{i:03d}.bin'
+                    save_path = save_dir / self._sanitize_filename(filename)
+
+                    try:
+                        await self._download_file(session, direct_url, save_path,
+                                                  try_cdn_fallback=True, file_uuid=file_uuid)
+                        files.append(str(save_path))
+                        self.log(f"Downloaded: {filename}", 'info')
+                    except Exception as e:
+                        self.log(f"Failed to download {filename}: {e}", 'warning')
+                        failed.append(filename)
+
+            else:
+                # Single file page
+                direct_url, file_uuid = await self._get_bunkr_direct_url_with_uuid(session, url)
+                if not direct_url:
+                    return {'success': False, 'files': [], 'error': 'Could not get direct download URL'}
+
+                filename = direct_url.split('/')[-1].split('?')[0] or 'download.bin'
+                save_path = save_dir / self._sanitize_filename(filename)
+
+                await self._download_file(session, direct_url, save_path,
+                                          try_cdn_fallback=True, file_uuid=file_uuid)
+                files.append(str(save_path))
+
+        result = {'success': len(files) > 0, 'files': files, 'error': None}
+        if failed:
+            result['failed'] = failed
+            result['error'] = f'{len(failed)} files failed to download'
+        return result
+
+    async def _get_bunkr_direct_url_with_uuid(self, session: aiohttp.ClientSession, page_url: str) -> tuple:
+        """Extract direct download URL and file UUID from Bunkr file page"""
+        try:
+            async with session.get(page_url) as resp:
+                if resp.status != 200:
+                    return None, None
+                html = await resp.text()
+
+            file_uuid = None
+
+            # Extract file UUID first
+            uuid_patterns = [
+                r'data-v="([a-f0-9-]{36}\.[a-z0-9]+)"',
+                r'([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\.[a-z0-9]+)',
+            ]
+            for pattern in uuid_patterns:
+                match = re.search(pattern, html)
+                if match:
+                    file_uuid = match.group(1)
+                    break
+
+            # Try to find existing CDN URL in page
+            cdn_patterns = [
+                r'href="(https://[^"]*\.bunkr\.ru/[^"]+)"',
+                r'src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
+                r'data-src="(https://[^"]*\.bunkr\.ru/[^"]+)"',
+            ]
+
+            for pattern in cdn_patterns:
+                match = re.search(pattern, html)
+                if match:
+                    url = match.group(1)
+                    if await self._check_url_accessible(session, url):
+                        return url, file_uuid
+
+            # If we have UUID, try CDNs
+            if file_uuid:
+                self.log(f"Found file UUID: {file_uuid}, trying CDNs...", 'debug')
+                for cdn in self.BUNKR_CDNS:
+                    cdn_url = f"https://{cdn}/{file_uuid}"
+                    if await self._check_url_accessible(session, cdn_url):
+                        self.log(f"Found working CDN: {cdn}", 'debug')
+                        return cdn_url, file_uuid
+
+            return None, file_uuid
+        except Exception as e:
+            self.log(f"Error getting Bunkr direct URL: {e}", 'warning')
+            return None, None
+
+    async def _check_url_accessible(self, session: aiohttp.ClientSession, url: str) -> bool:
+        """Check if a URL is accessible (returns 200)"""
+        try:
+            async with session.head(url, allow_redirects=True, timeout=aiohttp.ClientTimeout(total=10)) as resp:
+                return resp.status == 200
+        except Exception:
+            return False
+
+    async def _download_fileditch(self, url: str, save_dir: Path) -> Dict:
+        """Download from FileDitch (Cloudflare-protected)"""
+        from modules.cloudflare_handler import CloudflareHandler
+
+        # Extract filename from URL: file.php?f=/b74/tLyJWGrzvSyRlJvBVDBa.mp4
+        parsed = urlparse(url)
+        params = parse_qs(parsed.query)
+        file_path = params.get('f', [''])[0]
+        if not file_path:
+            return {'success': False, 'files': [], 'error': 'Invalid FileDitch URL - no file parameter'}
+
+        filename = file_path.rsplit('/', 1)[-1] if '/' in file_path else file_path
+        if not filename:
+            return {'success': False, 'files': [], 'error': 'Could not extract filename from URL'}
+
+        save_path = save_dir / self._sanitize_filename(filename)
+
+        # Use CloudflareHandler to get cookies via FlareSolverr
+        cf_handler = CloudflareHandler(
+            module_name='FileDitch',
+            flaresolverr_url='http://localhost:8191/v1',
+            flaresolverr_enabled=True,
+        )
+
+        self.log('Bypassing Cloudflare for FileDitch via FlareSolverr...', 'info')
+        if not cf_handler.get_cookies_via_flaresolverr(url):
+            return {'success': False, 'files': [], 'error': 'Failed to bypass Cloudflare for FileDitch'}
+
+        cookies = cf_handler.get_cookies_dict()
+        user_agent = cf_handler.get_user_agent()
+
+        # Download with the obtained cookies
+        timeout = aiohttp.ClientTimeout(total=3600)
+        cookie_jar = aiohttp.CookieJar()
+        headers = {'User-Agent': user_agent or 'Mozilla/5.0'}
+
+        async with aiohttp.ClientSession(timeout=timeout, cookie_jar=cookie_jar, headers=headers) as session:
+            # Set cookies on session
+            for name, value in cookies.items():
+                cookie_jar.update_cookies({name: value}, response_url=url)
+
+            await self._download_file(session, url, save_path, headers=headers)
+
+        return {'success': True, 'files': [str(save_path)], 'error': None}
+
+    async def _download_file(self, session: aiohttp.ClientSession, url: str,
+                            save_path: Path, headers: Dict = None,
+                            try_cdn_fallback: bool = False, file_uuid: str = None) -> None:
+        """Download a single file with streaming and optional CDN fallback"""
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+
+        urls_to_try = [url]
+
+        # If CDN fallback enabled and we have a file UUID, add alternate CDNs
+        if try_cdn_fallback and file_uuid:
+            for cdn in self.BUNKR_CDNS:
+                alt_url = f"https://{cdn}/{file_uuid}"
+                if alt_url != url:
+                    urls_to_try.append(alt_url)
+
+        last_error = None
+        for try_url in urls_to_try:
+            try:
+                self.log(f"Downloading: {save_path.name} from {try_url[:60]}...", 'info')
+                async with session.get(try_url, headers=headers) as resp:
+                    if resp.status == 200:
+                        total_size = int(resp.headers.get('content-length', 0))
+                        downloaded = 0
+                        last_log_pct = 0
+
+                        with open(save_path, 'wb') as f:
+                            async for chunk in resp.content.iter_chunked(65536):  # 64KB chunks
+                                f.write(chunk)
+                                downloaded += len(chunk)
+
+                                # Log and callback progress every 2%
+                                if total_size > 0:
+                                    pct = int(downloaded * 100 / total_size)
+                                    if pct >= last_log_pct + 2:
+                                        self.log(f"  {save_path.name}: {pct}% ({downloaded // (1024*1024)}MB / {total_size // (1024*1024)}MB)", 'info')
+                                        last_log_pct = pct
+                                        # Call progress callback if provided
+                                        if self.progress_callback:
+                                            try:
+                                                self.progress_callback(downloaded, total_size, save_path.name)
+                                            except Exception:
+                                                pass  # Don't fail download due to callback error
+
+                        self.log(f"Downloaded: {save_path.name} ({downloaded // (1024*1024)}MB)", 'info')
+                        return  # Success
+                    else:
+                        last_error = f"HTTP {resp.status}"
+                        self.log(f"Download failed: {save_path.name} - {last_error}", 'warning')
+            except Exception as e:
+                last_error = str(e)
+                self.log(f"Download error: {save_path.name} - {last_error}", 'warning')
+                # Try next CDN
+                continue
+
+        raise Exception(f"Download failed after trying {len(urls_to_try)} URLs: {last_error}")
+
+    def _sanitize_filename(self, filename: str) -> str:
+        """Sanitize filename for filesystem"""
+        if not filename:
+            return 'download.bin'
+        # Remove/replace invalid characters
+        filename = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', filename)
+        filename = filename.strip('. ')
+        return filename or 'download.bin'
+
+    @classmethod
+    def get_supported_domains(cls) -> List[str]:
+        """Get list of all supported domains"""
+        domains = []
+        for host_domains in cls.SUPPORTED_HOSTS.values():
+            domains.extend(host_domains)
+        return domains
--- a/modules/paid_content/filename_parser.py
+++ b/modules/paid_content/filename_parser.py
@@ -0,0 +1,171 @@
+"""
+Filename parser for extracting dates and metadata from Fansly/paid content filenames.
+
+Supports:
+1. Fansly snowflake IDs: 871257582885416960.mp4
+2. Embedded date format: 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP.mp4
+3. Date-prefixed files: 2022-07-08.mp4 or 2022-07-08_video.mp4
+"""
+
+import re
+from datetime import datetime, timezone
+from typing import Optional, Dict, Tuple
+from pathlib import Path
+
+
+# Fansly epoch calibrated from known files
+# Based on: 513099759796367360 = 2023-05-11 15:51 UTC
+FANSLY_EPOCH_MS = 1561483337101
+
+
+def decode_fansly_snowflake(snowflake_id: str) -> Optional[datetime]:
+    """
+    Decode a Fansly snowflake ID to a datetime.
+
+    Fansly uses Twitter-style snowflake IDs where the timestamp
+    is encoded in the upper bits (shifted right by 22).
+    """
+    try:
+        sid = int(snowflake_id)
+        # Timestamp is in upper bits
+        timestamp_ms = (sid >> 22) + FANSLY_EPOCH_MS
+        return datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
+    except (ValueError, OverflowError, OSError):
+        return None
+
+
+def parse_filename(filename: str) -> Dict:
+    """
+    Parse a filename and extract any date/metadata information.
+
+    Returns:
+        {
+            'original_filename': str,
+            'detected_date': datetime or None,
+            'fansly_id': str or None,
+            'date_source': str or None,  # 'snowflake', 'embedded', 'prefix', None
+            'confidence': str,  # 'high', 'medium', 'low'
+        }
+    """
+    result = {
+        'original_filename': filename,
+        'detected_date': None,
+        'fansly_id': None,
+        'date_source': None,
+        'confidence': 'low',
+    }
+
+    # Get the base name without extension
+    name = Path(filename).stem
+
+    # Pattern 1: Embedded date format
+    # 2023-05-11_at_15-51_id_513099759796367360-zRvVUZeP-YcNs55W9.mp4
+    # 2026-01-24_at_06-22_id_871257582885416960_hash2_4547ab5367c6d7ea3a28ac4fc79df018.mp4
+    # Also handles spaces: 2023 05 11_at_15 51_id_513099759796367360
+    embedded_pattern = r'(\d{4})[-_ ](\d{2})[-_ ](\d{2})[-_ ]?at[-_ ](\d{2})[-_ ](\d{2})[-_ ]?id[-_ ](\d{15,20})'
+    match = re.search(embedded_pattern, name, re.IGNORECASE)
+    if match:
+        year, month, day, hour, minute, fansly_id = match.groups()
+        try:
+            result['detected_date'] = datetime(
+                int(year), int(month), int(day),
+                int(hour), int(minute), 0,
+                tzinfo=timezone.utc
+            )
+            result['fansly_id'] = fansly_id
+            result['date_source'] = 'embedded'
+            result['confidence'] = 'high'
+            return result
+        except ValueError:
+            pass
+
+    # Pattern 2: Date prefix (YYYY-MM-DD or YYYY_MM_DD)
+    # 2022-07-08.mp4 or 2022-07-08_video.mp4
+    date_prefix_pattern = r'^(\d{4})[-_](\d{2})[-_](\d{2})(?:[_\-\s]|$)'
+    match = re.match(date_prefix_pattern, name)
+    if match:
+        year, month, day = match.groups()
+        try:
+            result['detected_date'] = datetime(
+                int(year), int(month), int(day),
+                12, 0, 0,  # Default to noon
+                tzinfo=timezone.utc
+            )
+            result['date_source'] = 'prefix'
+            result['confidence'] = 'high'
+            return result
+        except ValueError:
+            pass
+
+    # Pattern 3: Pure Fansly snowflake ID
+    # 871257582885416960.mp4 (15-20 digit number)
+    snowflake_pattern = r'^(\d{15,20})(?:_\d+)?$'
+    match = re.match(snowflake_pattern, name)
+    if match:
+        fansly_id = match.group(1)
+        decoded_date = decode_fansly_snowflake(fansly_id)
+        if decoded_date:
+            # Sanity check: date should be between 2020 and 2030
+            if 2020 <= decoded_date.year <= 2030:
+                result['detected_date'] = decoded_date
+                result['fansly_id'] = fansly_id
+                result['date_source'] = 'snowflake'
+                result['confidence'] = 'high'
+                return result
+
+    # Pattern 4: Fansly ID embedded anywhere in filename
+    # e.g., video_871257582885416960_hd.mp4
+    embedded_id_pattern = r'(\d{15,20})'
+    matches = re.findall(embedded_id_pattern, name)
+    for potential_id in matches:
+        decoded_date = decode_fansly_snowflake(potential_id)
+        if decoded_date and 2020 <= decoded_date.year <= 2030:
+            result['detected_date'] = decoded_date
+            result['fansly_id'] = potential_id
+            result['date_source'] = 'snowflake'
+            result['confidence'] = 'medium'
+            return result
+
+    return result
+
+
+def parse_filenames(filenames: list) -> Dict:
+    """
+    Parse multiple filenames and return analysis.
+
+    Returns:
+        {
+            'files': [parsed result for each file],
+            'earliest_date': datetime or None,
+            'latest_date': datetime or None,
+            'suggested_date': datetime or None,  # Most common or earliest
+            'has_dates': bool,
+        }
+    """
+    results = [parse_filename(f) for f in filenames]
+
+    dates = [r['detected_date'] for r in results if r['detected_date']]
+
+    analysis = {
+        'files': results,
+        'earliest_date': min(dates) if dates else None,
+        'latest_date': max(dates) if dates else None,
+        'suggested_date': min(dates) if dates else None,  # Use earliest as default
+        'has_dates': len(dates) > 0,
+    }
+
+    return analysis
+
+
+def format_date_for_display(dt: datetime) -> str:
+    """Format datetime for display: 'May 11, 2023 at 3:51 PM'"""
+    if dt is None:
+        return ''
+    return dt.strftime('%b %d, %Y at %-I:%M %p')
+
+
+def format_date_for_input(dt: datetime) -> Tuple[str, str]:
+    """Format datetime for HTML inputs: (date_str, time_str)"""
+    if dt is None:
+        return ('', '')
+    return (dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M'))
--- a/modules/paid_content/hqcelebcorner_client.py
+++ b/modules/paid_content/hqcelebcorner_client.py
@@ -0,0 +1,14 @@
+"""Backwards-compatibility shim — use xenforo_forum_client instead."""
+from .xenforo_forum_client import XenForoForumClient
+
+
+class HQCelebCornerClient(XenForoForumClient):
+    """Legacy alias for XenForoForumClient, pre-configured for HQCelebCorner."""
+
+    def __init__(self, log_callback=None):
+        super().__init__(
+            service_id='hqcelebcorner',
+            base_url='https://www.hqcelebcorner.net',
+            cookie_path='/opt/media-downloader/cookies/forum_cookies_HQCelebCorner.json',
+            log_callback=log_callback,
+        )
--- a/modules/paid_content/instagram_adapter.py
+++ b/modules/paid_content/instagram_adapter.py
--- a/modules/paid_content/models.py
+++ b/modules/paid_content/models.py
@@ -0,0 +1,312 @@
+"""
+Pydantic models for Paid Content feature
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+
+
+@dataclass
+class Attachment:
+    """Represents a file attachment from a post"""
+    name: str
+    server_path: str
+    file_type: Optional[str] = None
+    extension: Optional[str] = None
+    download_url: Optional[str] = None
+    file_size: Optional[int] = None
+    width: Optional[int] = None
+    height: Optional[int] = None
+    duration: Optional[int] = None
+    needs_quality_recheck: bool = False
+    is_preview: bool = False
+
+    @classmethod
+    def from_api(cls, data: Dict, base_url: str = '') -> 'Attachment':
+        """Create Attachment from API response"""
+        name = data.get('name', '')
+        path = data.get('path', '')
+
+        # Detect file type from extension
+        ext = ''
+        if '.' in name:
+            ext = name.rsplit('.', 1)[-1].lower()
+
+        file_type = 'unknown'
+        image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic'}
+        video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}
+        archive_exts = {'zip', 'rar', '7z', 'tar', 'gz'}
+
+        if ext in image_exts:
+            file_type = 'image'
+        elif ext in video_exts:
+            file_type = 'video'
+        elif ext in archive_exts:
+            file_type = 'archive'
+        elif ext in {'pdf', 'doc', 'docx', 'txt'}:
+            file_type = 'document'
+
+        return cls(
+            name=name,
+            server_path=path,
+            file_type=file_type,
+            extension=ext if ext else None,
+            download_url=f"{base_url}/data{path}" if base_url and path else None
+        )
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for database storage"""
+        d = {
+            'name': self.name,
+            'server_path': self.server_path,
+            'file_type': self.file_type,
+            'extension': self.extension,
+            'download_url': self.download_url,
+            'file_size': self.file_size,
+            'width': self.width,
+            'height': self.height,
+            'duration': self.duration
+        }
+        if self.needs_quality_recheck:
+            d['needs_quality_recheck'] = 1
+        return d
+
+
+@dataclass
+class Post:
+    """Represents a post from a creator"""
+    post_id: str
+    service_id: str
+    platform: str
+    creator_id: str
+    title: Optional[str] = None
+    content: Optional[str] = None
+    published_at: Optional[str] = None
+    added_at: Optional[str] = None
+    edited_at: Optional[str] = None
+    attachments: List[Attachment] = field(default_factory=list)
+    embed_urls: List[str] = field(default_factory=list)
+    is_pinned: bool = False
+    pinned_at: Optional[str] = None
+    auto_tags: List[str] = field(default_factory=list)  # Tag names to auto-apply on sync
+    tagged_users: List[str] = field(default_factory=list)  # Instagram users tagged in the post
+
+    @classmethod
+    def from_api(cls, data: Dict, service_id: str, platform: str, creator_id: str, base_url: str = '') -> 'Post':
+        """Create Post from API response"""
+        # Parse attachments
+        attachments = []
+        for att_data in data.get('attachments', []):
+            attachments.append(Attachment.from_api(att_data, base_url))
+
+        # Also check file field (some APIs use this instead of attachments)
+        if 'file' in data and data['file']:
+            file_data = data['file']
+            if isinstance(file_data, dict):
+                attachments.append(Attachment.from_api(file_data, base_url))
+            elif isinstance(file_data, str):
+                attachments.append(Attachment(
+                    name=file_data.split('/')[-1] if '/' in file_data else file_data,
+                    server_path=file_data
+                ))
+
+        # Parse dates
+        published = data.get('published')
+        added = data.get('added')
+        edited = data.get('edited')
+
+        # Content: use 'content' if available, fallback to 'substring' (list endpoint returns truncated)
+        content = data.get('content') or data.get('substring') or ''
+
+        # Single post endpoint returns HTML content (e.g. <p>text</p>), strip tags
+        if content and '<' in content:
+            import re
+            content = re.sub(r'<br\s*/?>', '\n', content)
+            content = re.sub(r'</p>\s*<p>', '\n\n', content)
+            content = re.sub(r'<[^>]+>', '', content)
+            content = content.strip()
+
+        title = data.get('title')
+
+        # OnlyFans posts on Coomer have the post text in 'title' and empty 'content'.
+        # Copy title to content and clear title (OF posts don't have real titles).
+        if not content and title:
+            content = title
+            title = None
+
+        return cls(
+            post_id=str(data.get('id', '')),
+            service_id=service_id,
+            platform=platform,
+            creator_id=creator_id,
+            title=title,
+            content=content,
+            published_at=published,
+            added_at=added,
+            edited_at=edited,
+            attachments=attachments,
+            embed_urls=data.get('embed', []) or []
+        )
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for database storage"""
+        return {
+            'post_id': self.post_id,
+            'title': self.title,
+            'content': self.content,
+            'published_at': self.published_at,
+            'added_at': self.added_at,
+            'edited_at': self.edited_at,
+            'has_attachments': 1 if self.attachments else 0,
+            'attachment_count': len(self.attachments),
+            'embed_count': len(self.embed_urls),
+            'is_pinned': 1 if self.is_pinned else 0,
+            'pinned_at': self.pinned_at
+        }
+
+
+@dataclass
+class Message:
+    """Represents a chat message from/to a creator"""
+    message_id: str
+    platform: str
+    service_id: str
+    creator_id: str  # Platform-specific creator ID
+    text: Optional[str] = None
+    sent_at: Optional[str] = None
+    is_from_creator: bool = True
+    is_tip: bool = False
+    tip_amount: Optional[float] = None
+    price: Optional[float] = None
+    is_free: bool = True
+    is_purchased: bool = False
+    reply_to_message_id: Optional[str] = None
+    attachments: List[Attachment] = field(default_factory=list)
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for database storage"""
+        return {
+            'message_id': self.message_id,
+            'text': self.text,
+            'sent_at': self.sent_at,
+            'is_from_creator': 1 if self.is_from_creator else 0,
+            'is_tip': 1 if self.is_tip else 0,
+            'tip_amount': self.tip_amount,
+            'price': self.price,
+            'is_free': 1 if self.is_free else 0,
+            'is_purchased': 1 if self.is_purchased else 0,
+            'has_attachments': 1 if self.attachments else 0,
+            'attachment_count': len(self.attachments),
+            'reply_to_message_id': self.reply_to_message_id,
+        }
+
+
+@dataclass
+class Creator:
+    """Represents a creator from Coomer/Kemono"""
+    creator_id: str
+    service_id: str
+    platform: str
+    username: str
+    display_name: Optional[str] = None
+    profile_image_url: Optional[str] = None
+    banner_image_url: Optional[str] = None
+    bio: Optional[str] = None
+    post_count: int = 0
+
+    @classmethod
+    def from_api(cls, data: Dict, service_id: str, platform: str, base_url: str = None) -> 'Creator':
+        """Create Creator from API response"""
+        creator_id = str(data.get('id', ''))
+
+        # Construct image domain - use .st instead of .party (coomer.party redirects to coomer.st)
+        img_domain = None
+        if base_url and creator_id:
+            from urllib.parse import urlparse
+            parsed = urlparse(base_url)
+            # Convert .party to .st for image URLs (coomer.party/kemono.party images are at .st)
+            netloc = parsed.netloc.replace('.party', '.st')
+            img_domain = f"img.{netloc}"
+
+        # Construct profile image URL from icon endpoint
+        profile_image_url = data.get('profile_image')
+        if not profile_image_url and img_domain:
+            # Icon URLs are at img.{domain}/icons/{platform}/{creator_id}
+            profile_image_url = f"https://{img_domain}/icons/{platform}/{creator_id}"
+
+        # Construct banner image URL
+        banner_image_url = data.get('banner_image')
+        if not banner_image_url and img_domain:
+            # Banner URLs are at img.{domain}/banners/{platform}/{creator_id}
+            banner_image_url = f"https://{img_domain}/banners/{platform}/{creator_id}"
+
+        return cls(
+            creator_id=creator_id,
+            service_id=service_id,
+            platform=platform,
+            username=data.get('name', ''),
+            display_name=data.get('name'),
+            profile_image_url=profile_image_url,
+            banner_image_url=banner_image_url,
+            post_count=data.get('post_count', 0)
+        )
+
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for database storage"""
+        return {
+            'service_id': self.service_id,
+            'platform': self.platform,
+            'creator_id': self.creator_id,
+            'username': self.username,
+            'display_name': self.display_name,
+            'profile_image_url': self.profile_image_url,
+            'banner_image_url': self.banner_image_url,
+            'bio': self.bio,
+            'post_count': self.post_count
+        }
+
+
+@dataclass
+class SyncResult:
+    """Result of a creator sync operation"""
+    success: bool
+    new_posts: int = 0
+    new_attachments: int = 0
+    downloaded_files: int = 0
+    failed_files: int = 0
+    skipped_files: int = 0
+    error: Optional[str] = None
+    downloaded_file_info: Optional[List[Dict]] = None  # List of {file_path, filename, source, content_type}
+
+    def to_dict(self) -> Dict:
+        return {
+            'success': self.success,
+            'new_posts': self.new_posts,
+            'new_attachments': self.new_attachments,
+            'downloaded_files': self.downloaded_files,
+            'failed_files': self.failed_files,
+            'skipped_files': self.skipped_files,
+            'error': self.error
+        }
+
+
+@dataclass
+class DownloadResult:
+    """Result of a download operation"""
+    success: bool
+    file_path: Optional[str] = None
+    file_hash: Optional[str] = None
+    file_size: Optional[int] = None
+    error: Optional[str] = None
+    is_duplicate: bool = False
+
+    def to_dict(self) -> Dict:
+        return {
+            'success': self.success,
+            'file_path': self.file_path,
+            'file_hash': self.file_hash,
+            'file_size': self.file_size,
+            'error': self.error,
+            'is_duplicate': self.is_duplicate
+        }
--- a/modules/paid_content/onlyfans_client.py
+++ b/modules/paid_content/onlyfans_client.py
@@ -0,0 +1,729 @@
+"""
+OnlyFans Direct API Client
+
+Downloads content directly from the OnlyFans API using browser-extracted
+credentials and dynamic request signing.
+"""
+
+import asyncio
+import aiohttp
+import re
+from datetime import datetime
+from typing import List, Optional, Dict, Any, Callable
+from urllib.parse import urlparse, urlencode
+
+from modules.base_module import LoggingMixin, RateLimitMixin
+from .models import Post, Attachment, Message
+from .onlyfans_signing import OnlyFansSigner
+
+
+class OnlyFansClient(LoggingMixin, RateLimitMixin):
+    """
+    API client for downloading content directly from OnlyFans.
+
+    API Endpoints:
+    - Base URL: https://onlyfans.com/api2/v2
+    - Auth: Requires browser-extracted credentials (sess, auth_id, x-bc, User-Agent)
+    - Signing: Every request needs dynamic sign/time/app-token headers
+    - GET /users/me - Verify auth
+    - GET /users/{username} - Get user profile
+    - GET /users/{user_id}/posts?limit=50&offset={offset} - Get posts (paginated)
+    """
+
+    BASE_URL = "https://onlyfans.com/api2/v2"
+    SERVICE_ID = "onlyfans_direct"
+    PLATFORM = "onlyfans"
+
+    def __init__(
+        self,
+        auth_config: Dict[str, str],
+        signing_url: Optional[str] = None,
+        log_callback: Optional[Callable] = None,
+    ):
+        """
+        Args:
+            auth_config: Dict with keys: sess, auth_id, auth_uid (optional), x_bc, user_agent
+            signing_url: Optional custom URL for signing rules
+            log_callback: Optional logging callback
+        """
+        self._init_logger('PaidContent', log_callback, default_module='OnlyFansDirect')
+        # More conservative rate limiting than Fansly (OF is stricter)
+        self._init_rate_limiter(
+            min_delay=1.5, max_delay=3.0,
+            batch_delay_min=3, batch_delay_max=6
+        )
+
+        self.auth_config = auth_config
+        self._session: Optional[aiohttp.ClientSession] = None
+        self._signer = OnlyFansSigner(rules_url=signing_url)
+
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create aiohttp session with OnlyFans headers"""
+        if self._session is None or self._session.closed:
+            # Build cookie string
+            cookies = f"sess={self.auth_config['sess']}; auth_id={self.auth_config['auth_id']}"
+            auth_uid = self.auth_config.get('auth_uid')
+            if auth_uid:
+                cookies += f"; auth_uid_{self.auth_config['auth_id']}={auth_uid}"
+
+            headers = {
+                'Accept': 'application/json, text/plain, */*',
+                'User-Agent': self.auth_config.get('user_agent', ''),
+                'x-bc': self.auth_config.get('x_bc', ''),
+                'Cookie': cookies,
+                'Origin': 'https://onlyfans.com',
+                'Referer': 'https://onlyfans.com/',
+            }
+            timeout = aiohttp.ClientTimeout(total=60)
+            self._session = aiohttp.ClientSession(headers=headers, timeout=timeout)
+        return self._session
+
+    async def _sign_request(self, endpoint: str) -> Dict[str, str]:
+        """
+        Compute signing headers for an API request.
+
+        Args:
+            endpoint: API path (e.g. "/users/me") - will be prefixed with /api2/v2
+
+        Returns:
+            Dict with sign, time, app-token, user-id headers
+        """
+        user_id = self.auth_config.get('auth_id', '0')
+        # Sign with full URL path (matching OF-Scraper)
+        full_path = f"/api2/v2{endpoint}"
+        sign_headers = await self._signer.sign(full_path, user_id)
+        sign_headers['user-id'] = user_id
+        return sign_headers
+
+    async def _api_request(self, endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]:
+        """
+        Make a signed API request to OnlyFans.
+
+        Handles 401 (auth failure), 429 (rate limit), and general errors.
+        Auto-retries on 429 with exponential backoff.
+
+        Args:
+            endpoint: API path (e.g. "/users/me")
+            params: Optional query parameters
+
+        Returns:
+            Parsed JSON response or None on failure
+        """
+        session = await self._get_session()
+        # Include query params in the signing path (OF-Scraper does this)
+        sign_endpoint = endpoint
+        if params:
+            sign_endpoint = f"{endpoint}?{urlencode(params)}"
+        sign_headers = await self._sign_request(sign_endpoint)
+
+        url = f"{self.BASE_URL}{endpoint}"
+        max_retries = 3
+
+        for attempt in range(max_retries):
+            try:
+                async with session.get(url, params=params, headers=sign_headers) as resp:
+                    if resp.status == 200:
+                        return await resp.json()
+                    elif resp.status == 401:
+                        self.log("OnlyFans auth failed (401) - credentials may be expired", 'error')
+                        return None
+                    elif resp.status == 429:
+                        retry_after = int(resp.headers.get('Retry-After', 30))
+                        wait = min(retry_after * (attempt + 1), 120)
+                        self.log(f"Rate limited (429), waiting {wait}s (attempt {attempt + 1}/{max_retries})", 'warning')
+                        await asyncio.sleep(wait)
+                        # Refresh signing headers for retry (timestamp changes)
+                        sign_headers = await self._sign_request(sign_endpoint)
+                        continue
+                    elif resp.status == 404:
+                        self.log(f"Not found (404): {endpoint}", 'debug')
+                        return None
+                    else:
+                        text = await resp.text()
+                        self.log(f"API error: HTTP {resp.status} for {endpoint}: {text[:200]}", 'warning')
+                        return None
+            except asyncio.TimeoutError:
+                self.log(f"Request timeout for {endpoint} (attempt {attempt + 1})", 'warning')
+                if attempt < max_retries - 1:
+                    await asyncio.sleep(5 * (attempt + 1))
+                    sign_headers = await self._sign_request(sign_endpoint)
+                    continue
+                return None
+            except Exception as e:
+                self.log(f"Request error for {endpoint}: {e}", 'error')
+                return None
+
+        return None
+
+    @staticmethod
+    def _strip_html(text: str) -> str:
+        """Strip HTML tags and convert common entities to plain text"""
+        if not text:
+            return ''
+        text = re.sub(r'<br\s*/?>', '\n', text)
+        text = re.sub(r'<[^>]+>', '', text)
+        text = text.replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>').replace('&#x27;', "'").replace('&quot;', '"')
+        return text.strip()
+
+    async def close(self):
+        """Close the aiohttp session"""
+        if self._session and not self._session.closed:
+            await self._session.close()
+            self._session = None
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+
+    async def check_auth(self) -> Dict[str, Any]:
+        """
+        Verify credentials by calling /users/me.
+
+        Returns:
+            Dict with 'valid' bool and optionally 'user_id', 'username', 'name'
+        """
+        self._delay_between_items()
+        try:
+            data = await self._api_request("/users/me")
+            if data and data.get('id'):
+                return {
+                    'valid': True,
+                    'user_id': str(data['id']),
+                    'username': data.get('username', ''),
+                    'name': data.get('name', ''),
+                }
+            return {'valid': False, 'error': 'Invalid credentials or unexpected response'}
+        except Exception as e:
+            self.log(f"Error checking auth: {e}", 'error')
+            return {'valid': False, 'error': str(e)}
+
+    async def get_user_info(self, username: str) -> Optional[Dict[str, Any]]:
+        """
+        Get user profile info.
+
+        Args:
+            username: The OnlyFans username
+
+        Returns:
+            Normalized user info dict or None
+        """
+        self._delay_between_items()
+        try:
+            data = await self._api_request(f"/users/{username}")
+            if not data or not data.get('id'):
+                self.log(f"User not found: {username}", 'warning')
+                return None
+
+            return {
+                'user_id': str(data['id']),
+                'username': data.get('username', username),
+                'display_name': data.get('name', ''),
+                'avatar_url': data.get('avatar'),
+                'banner_url': data.get('header'),
+                'bio': self._strip_html(data.get('rawAbout') or data.get('about') or ''),
+                'join_date': (data.get('joinDate') or '')[:10] or None,
+                'posts_count': data.get('postsCount', 0),
+            }
+        except Exception as e:
+            self.log(f"Error getting user info for {username}: {e}", 'error')
+            return None
+
+    async def get_single_post(self, post_id: str) -> Optional[Post]:
+        """
+        Fetch a single post by its OnlyFans post ID.
+
+        Args:
+            post_id: The OnlyFans post ID
+
+        Returns:
+            Post object or None
+        """
+        self._delay_between_items()
+        data = await self._api_request(f"/posts/{post_id}")
+        if not data:
+            self.log(f"Post {post_id} not found", 'warning')
+            return None
+
+        user_id = str(data.get('author', {}).get('id', data.get('authorId', '')))
+        post = self._parse_post(data, user_id)
+        return post
+
+    async def get_posts(
+        self,
+        user_id: str,
+        username: str,
+        since_date: Optional[str] = None,
+        until_date: Optional[str] = None,
+        days_back: Optional[int] = None,
+        max_posts: Optional[int] = None,
+        progress_callback: Optional[Callable[[int, int], None]] = None,
+    ) -> List[Post]:
+        """
+        Fetch posts from a creator's timeline using offset-based pagination.
+
+        Args:
+            user_id: The OnlyFans numeric user ID
+            username: The username (for logging/reference)
+            since_date: Only fetch posts after this date (ISO format)
+            until_date: Only fetch posts before this date (ISO format)
+            days_back: Fetch posts from the last N days
+            max_posts: Maximum number of posts to fetch
+            progress_callback: Called with (page, total_posts) during fetching
+
+        Returns:
+            List of Post objects
+        """
+        self.log(f"Fetching posts for {username} (user_id: {user_id})", 'info')
+
+        # Calculate date filters - use naive datetimes to avoid tz comparison issues
+        since_dt = None
+        until_dt = None
+
+        if days_back:
+            from datetime import timedelta
+            since_date = (datetime.now() - timedelta(days=days_back)).isoformat()
+
+        if since_date:
+            try:
+                dt = datetime.fromisoformat(since_date.replace('Z', '+00:00'))
+                since_dt = dt.replace(tzinfo=None)  # Normalize to naive
+            except (ValueError, TypeError):
+                pass
+
+        if until_date:
+            try:
+                dt = datetime.fromisoformat(until_date.replace('Z', '+00:00'))
+                until_dt = dt.replace(tzinfo=None)  # Normalize to naive
+            except (ValueError, TypeError):
+                pass
+
+        if since_dt:
+            self.log(f"Date filter: since_date={since_dt.isoformat()}", 'debug')
+
+        all_posts: List[Post] = []
+        offset = 0
+        page_size = 50
+        page = 0
+        consecutive_old = 0  # Track consecutive old posts for early stop
+
+        while True:
+            self._delay_between_items()
+
+            params = {
+                'limit': str(page_size),
+                'offset': str(offset),
+                'order': 'publish_date_desc',
+            }
+
+            data = await self._api_request(f"/users/{user_id}/posts", params=params)
+            if not data:
+                break
+
+            # OF returns a list of posts directly
+            posts_list = data if isinstance(data, list) else data.get('list', [])
+            if not posts_list:
+                break
+
+            page_had_old_post = False
+            for post_data in posts_list:
+                post = self._parse_post(post_data, user_id)
+                if not post:
+                    continue
+
+                # Check date filters using published_at
+                if post.published_at and since_dt:
+                    try:
+                        post_dt = datetime.fromisoformat(post.published_at.replace('Z', '+00:00'))
+                        post_dt_naive = post_dt.replace(tzinfo=None)  # Normalize to naive
+                        if post_dt_naive < since_dt:
+                            self.log(f"Reached posts older than since_date ({post.published_at}), stopping", 'debug')
+                            return all_posts
+                    except (ValueError, TypeError) as e:
+                        self.log(f"Date comparison error: {e} (post_date={post.published_at})", 'warning')
+
+                if post.published_at and until_dt:
+                    try:
+                        post_dt = datetime.fromisoformat(post.published_at.replace('Z', '+00:00'))
+                        post_dt_naive = post_dt.replace(tzinfo=None)
+                        if post_dt_naive > until_dt:
+                            continue
+                    except (ValueError, TypeError):
+                        pass
+
+                all_posts.append(post)
+
+                if max_posts and len(all_posts) >= max_posts:
+                    self.log(f"Reached max_posts limit: {max_posts}", 'debug')
+                    return all_posts
+
+            page += 1
+            if progress_callback:
+                progress_callback(page, len(all_posts))
+
+            # If we got fewer results than page_size, we've reached the end
+            if len(posts_list) < page_size:
+                break
+
+            offset += page_size
+            self._delay_between_batches()
+
+        # Also fetch pinned posts (they may not appear in the timeline)
+        self._delay_between_items()
+        pinned_data = await self._api_request(
+            f"/users/{user_id}/posts",
+            params={'limit': '50', 'offset': '0', 'order': 'publish_date_desc', 'pinned': '1'},
+        )
+        if pinned_data:
+            pinned_list = pinned_data if isinstance(pinned_data, list) else pinned_data.get('list', [])
+            existing_ids = {p.post_id for p in all_posts}
+            for post_data in pinned_list:
+                post = self._parse_post(post_data, user_id)
+                if post and post.post_id not in existing_ids:
+                    all_posts.append(post)
+
+        self.log(f"Fetched {len(all_posts)} posts for {username}", 'info')
+        return all_posts
+
+    def _parse_post(self, post_data: Dict, user_id: str) -> Optional[Post]:
+        """
+        Parse an OnlyFans post into a Post model.
+
+        Args:
+            post_data: Raw post data from API
+            user_id: Creator's user ID
+
+        Returns:
+            Post object or None if parsing fails
+        """
+        try:
+            post_id = str(post_data.get('id', ''))
+            if not post_id:
+                return None
+
+            # Parse timestamp - OF uses ISO format strings
+            published_at = None
+            raw_date = post_data.get('postedAt') or post_data.get('createdAt')
+            if raw_date:
+                try:
+                    if isinstance(raw_date, str):
+                        published_at = raw_date
+                    elif isinstance(raw_date, (int, float)):
+                        published_at = datetime.fromtimestamp(raw_date).isoformat()
+                except (ValueError, TypeError, OSError):
+                    pass
+
+            # Content text
+            content = self._strip_html(post_data.get('rawText') or post_data.get('text') or '')
+
+            # Parse media attachments
+            attachments = []
+            media_list = post_data.get('media', []) or []
+            for media_item in media_list:
+                attachment = self._parse_attachment(media_item)
+                if attachment:
+                    attachments.append(attachment)
+
+            # Extract embed URLs from content text
+            embed_urls = []
+            if content:
+                url_pattern = r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/|vimeo\.com/|dailymotion\.com/video/)\S+'
+                embed_urls = re.findall(url_pattern, content)
+
+            return Post(
+                post_id=post_id,
+                service_id=self.SERVICE_ID,
+                platform=self.PLATFORM,
+                creator_id=user_id,
+                title=None,
+                content=content,
+                published_at=published_at,
+                added_at=datetime.now().isoformat(),
+                attachments=attachments,
+                embed_urls=embed_urls,
+                is_pinned=bool(post_data.get('isPinned')),
+                pinned_at=post_data.get('pinnedAt'),
+            )
+
+        except Exception as e:
+            self.log(f"Error parsing post: {e}", 'error')
+            return None
+
+    def _parse_attachment(self, media_item: Dict) -> Optional[Attachment]:
+        """
+        Parse an OnlyFans media item into an Attachment.
+
+        OF media structure:
+        {
+            id, type, source: {source: url, width, height, duration},
+            full: {source: url, ...}, preview: {source: url, ...}
+        }
+
+        Prefers 'full' quality (OF's standard since 2024), falls back to 'source'.
+
+        Args:
+            media_item: Raw media dict from API
+
+        Returns:
+            Attachment object or None
+        """
+        try:
+            media_id = str(media_item.get('id', ''))
+            media_type = media_item.get('type', '').lower()
+
+            # Map OF media types to our file types
+            type_map = {
+                'photo': 'image',
+                'video': 'video',
+                'audio': 'audio',
+                'gif': 'image',
+            }
+            file_type = type_map.get(media_type, 'unknown')
+
+            # Get download URL - prefer 'full' quality, fallback to 'source'
+            download_url = None
+            width = None
+            height = None
+            duration = None
+
+            # Current OF API nests media under 'files' key
+            files = media_item.get('files') or media_item
+
+            # Try 'full' first (higher quality)
+            full_data = files.get('full')
+            if full_data and isinstance(full_data, dict):
+                download_url = full_data.get('url') or full_data.get('source')
+                width = full_data.get('width')
+                height = full_data.get('height')
+                duration = full_data.get('duration')
+
+            # Fallback to 'source'
+            if not download_url:
+                source_data = files.get('source')
+                if source_data and isinstance(source_data, dict):
+                    download_url = source_data.get('url') or source_data.get('source')
+                    if not width:
+                        width = source_data.get('width')
+                    if not height:
+                        height = source_data.get('height')
+                    if not duration:
+                        duration = source_data.get('duration')
+
+            # For videos without a direct URL, get metadata from media item
+            can_view = media_item.get('canView', True)
+            if not download_url and media_type == 'video':
+                # OF DRM videos use FairPlay SAMPLE-AES encryption — cannot be downloaded.
+                # Get dimensions/duration for metadata, then fall through to preview frame.
+                if not duration:
+                    duration = media_item.get('duration')
+                if not width:
+                    width = (full_data or {}).get('width')
+                if not height:
+                    height = (full_data or {}).get('height')
+
+            # Fallback to 'preview' for any content type
+            # For DRM videos (canView=true), downloads the preview frame image (shown with lock overlay)
+            # For PPV videos (canView=false), there's no preview — marked unavailable
+            if not download_url:
+                preview_data = files.get('preview')
+                if preview_data and isinstance(preview_data, dict):
+                    download_url = preview_data.get('url') or preview_data.get('source')
+                    if not width:
+                        width = preview_data.get('width')
+                    if not height:
+                        height = preview_data.get('height')
+
+            # Some OF responses have src directly
+            if not download_url:
+                download_url = media_item.get('src')
+
+            # Determine extension from URL
+            ext = ''
+            if download_url:
+                parsed = urlparse(download_url)
+                path = parsed.path
+                if '.' in path:
+                    ext = path.rsplit('.', 1)[-1].lower()
+                    # Clean up common issues
+                    if ext in ('jpeg',):
+                        ext = 'jpg'
+            elif media_type == 'photo':
+                ext = 'jpg'
+            elif media_type == 'video':
+                ext = 'mp4'
+
+            filename = f"{media_id}.{ext}" if ext else str(media_id)
+
+            # Override file_type based on actual extension (OF sometimes misreports type)
+            video_exts = {'mp4', 'mov', 'webm', 'avi', 'mkv', 'flv', 'm4v', 'wmv', 'mpg', 'mpeg'}
+            if ext in video_exts and file_type != 'video':
+                file_type = 'video'
+
+            # Duration may be in seconds (float or int)
+            if duration is not None:
+                try:
+                    duration = int(float(duration))
+                except (ValueError, TypeError):
+                    duration = None
+
+            # Check if content is actually locked (canView=false) vs just missing URL
+            can_view = media_item.get('canView', True)
+            is_preview = not can_view
+            if not download_url and not can_view:
+                self.log(f"PPV/locked content: {filename}", 'debug')
+
+            # Detect preview-only: no full/source URL but got a preview URL
+            if not is_preview and download_url:
+                has_full = False
+                if full_data and isinstance(full_data, dict):
+                    has_full = bool(full_data.get('url') or full_data.get('source'))
+                if not has_full:
+                    source_data = files.get('source')
+                    if source_data and isinstance(source_data, dict):
+                        has_full = bool(source_data.get('url') or source_data.get('source'))
+                    elif not source_data:
+                        has_full = False
+                if not has_full and not media_item.get('src'):
+                    # Only got URL from preview fallback
+                    is_preview = True
+
+            return Attachment(
+                name=filename,
+                server_path=f"/onlyfans/{media_id}",
+                file_type=file_type,
+                extension=ext if ext else None,
+                download_url=download_url,
+                file_size=None,
+                width=width,
+                height=height,
+                duration=duration,
+                is_preview=is_preview,
+            )
+
+        except Exception as e:
+            self.log(f"Error parsing attachment: {e}", 'error')
+            return None
+
+    # ==================== MESSAGES ====================
+
+    async def get_messages(self, user_id: str, max_messages: int = 500) -> List[Message]:
+        """
+        Fetch messages from a conversation with a creator.
+
+        Uses GET /chats/{user_id}/messages with cursor-based pagination.
+        The 'id' param is used as cursor for older messages.
+
+        Args:
+            user_id: OnlyFans numeric user ID of the creator
+            max_messages: Maximum number of messages to fetch
+
+        Returns:
+            List of Message objects
+        """
+        messages = []
+        cursor_id = None
+        page = 0
+
+        while len(messages) < max_messages:
+            page += 1
+            params = {'limit': 50, 'order': 'desc'}
+            if cursor_id:
+                params['id'] = cursor_id
+
+            data = await self._api_request(f"/chats/{user_id}/messages", params=params)
+            if not data:
+                break
+
+            # Response is a dict with 'list' key containing messages
+            msg_list = data.get('list', []) if isinstance(data, dict) else data
+            if not msg_list:
+                break
+
+            for msg_data in msg_list:
+                msg = self._parse_message(msg_data, user_id)
+                if msg:
+                    messages.append(msg)
+
+            self.log(f"Fetched page {page}: {len(msg_list)} messages (total: {len(messages)})", 'debug')
+
+            # Use the last message's id as cursor for next page
+            if len(msg_list) < 50:
+                break  # Last page
+
+            last_id = msg_list[-1].get('id')
+            if last_id and str(last_id) != str(cursor_id):
+                cursor_id = last_id
+            else:
+                break
+
+        self.log(f"Fetched {len(messages)} messages for user {user_id}", 'info')
+        return messages
+
+    def _parse_message(self, msg_data: Dict, creator_user_id: str) -> Optional[Message]:
+        """
+        Parse an OnlyFans message into a Message model.
+
+        Args:
+            msg_data: Raw message dict from API
+            creator_user_id: Numeric user ID of the creator (to determine direction)
+
+        Returns:
+            Message object or None
+        """
+        try:
+            msg_id = str(msg_data.get('id', ''))
+            if not msg_id:
+                return None
+
+            # Determine if message is from creator
+            from_user = msg_data.get('fromUser', {})
+            from_user_id = str(from_user.get('id', ''))
+            is_from_creator = (from_user_id == str(creator_user_id))
+
+            # Parse text
+            text = self._strip_html(msg_data.get('text') or '')
+
+            # Parse timestamp
+            created_at = msg_data.get('createdAt')
+            sent_at = None
+            if created_at:
+                try:
+                    sent_at = datetime.fromisoformat(created_at.replace('Z', '+00:00')).isoformat()
+                except (ValueError, TypeError):
+                    sent_at = created_at
+
+            # PPV/price info
+            price = msg_data.get('price')
+            is_free = msg_data.get('isFree', True)
+            is_purchased = msg_data.get('isOpened', False) or msg_data.get('canPurchase') is False
+            is_tip = msg_data.get('isTip', False)
+            tip_amount = msg_data.get('tipAmount')
+
+            # Parse media attachments (same structure as posts)
+            attachments = []
+            media_list = msg_data.get('media', []) or []
+            for media_item in media_list:
+                att = self._parse_attachment(media_item)
+                if att:
+                    attachments.append(att)
+
+            return Message(
+                message_id=msg_id,
+                platform=self.PLATFORM,
+                service_id=self.SERVICE_ID,
+                creator_id=str(creator_user_id),
+                text=text if text else None,
+                sent_at=sent_at,
+                is_from_creator=is_from_creator,
+                is_tip=bool(is_tip),
+                tip_amount=float(tip_amount) if tip_amount else None,
+                price=float(price) if price else None,
+                is_free=bool(is_free),
+                is_purchased=bool(is_purchased),
+                attachments=attachments,
+            )
+
+        except Exception as e:
+            self.log(f"Error parsing message: {e}", 'error')
+            return None
--- a/modules/paid_content/onlyfans_signing.py
+++ b/modules/paid_content/onlyfans_signing.py
@@ -0,0 +1,109 @@
+"""
+OnlyFans Request Signing Module
+
+Handles the dynamic request signing required by the OnlyFans API.
+Fetches signing rules from the DATAHOARDERS/dynamic-rules GitHub repo
+and computes SHA-1 based signatures for each API request.
+
+Isolated module so it's easy to update when OF changes their signing scheme.
+"""
+
+import hashlib
+import time
+from typing import Dict, Optional
+
+import aiohttp
+
+RULES_URL = "https://raw.githubusercontent.com/DATAHOARDERS/dynamic-rules/main/onlyfans.json"
+
+
+class OnlyFansSigner:
+    """
+    Computes request signatures for the OnlyFans API.
+
+    Uses dynamic rules fetched from a public GitHub repo (same source as OF-Scraper).
+    Rules are cached locally and refreshed every 6 hours.
+    """
+
+    RULES_TTL = 6 * 3600  # 6 hours
+
+    def __init__(self, rules_url: Optional[str] = None):
+        self.rules_url = rules_url or RULES_URL
+        self._rules: Optional[Dict] = None
+        self._rules_fetched_at: float = 0
+
+    @property
+    def rules_stale(self) -> bool:
+        """Check if cached rules need refreshing"""
+        if self._rules is None:
+            return True
+        return (time.time() - self._rules_fetched_at) > self.RULES_TTL
+
+    async def get_rules(self) -> Dict:
+        """
+        Fetch signing rules, using cache if fresh.
+
+        Returns:
+            Dict with keys: static_param, format, checksum_indexes,
+            checksum_constants, checksum_constant, app_token
+        """
+        if not self.rules_stale:
+            return self._rules
+
+        timeout = aiohttp.ClientTimeout(total=15)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(self.rules_url) as resp:
+                if resp.status != 200:
+                    if self._rules is not None:
+                        # Use stale cache rather than failing
+                        return self._rules
+                    raise RuntimeError(
+                        f"Failed to fetch OF signing rules: HTTP {resp.status}"
+                    )
+                self._rules = await resp.json(content_type=None)
+                self._rules_fetched_at = time.time()
+
+        return self._rules
+
+    async def sign(self, endpoint_path: str, user_id: str = "0") -> Dict[str, str]:
+        """
+        Compute signing headers for an OnlyFans API request.
+
+        Args:
+            endpoint_path: The full URL path (e.g. "/api2/v2/users/me")
+            user_id: The authenticated user's ID (from auth_id cookie)
+
+        Returns:
+            Dict with 'sign', 'time', 'app-token' headers
+        """
+        rules = await self.get_rules()
+        # Timestamp in milliseconds (matching OF-Scraper's implementation)
+        timestamp = str(round(time.time() * 1000))
+
+        # 1. Build the message to hash
+        msg = "\n".join([
+            rules["static_param"],
+            timestamp,
+            endpoint_path,
+            str(user_id),
+        ])
+
+        # 2. SHA-1 hash
+        sha1_hash = hashlib.sha1(msg.encode("utf-8")).hexdigest()
+        sha1_bytes = sha1_hash.encode("ascii")
+
+        # 3. Checksum from indexed byte positions + single constant
+        # (matching OF-Scraper's implementation)
+        checksum_indexes = rules["checksum_indexes"]
+        checksum_constant = rules.get("checksum_constant", 0)
+        checksum = sum(sha1_bytes[i] for i in checksum_indexes) + checksum_constant
+
+        # 4. Build the sign header using the format template
+        # Typical format: "53760:{}:{:x}:69723085"
+        sign_value = rules["format"].format(sha1_hash, abs(checksum))
+
+        return {
+            "sign": sign_value,
+            "time": timestamp,
+            "app-token": rules["app_token"],
+        }
--- a/modules/paid_content/pornhub_client.py
+++ b/modules/paid_content/pornhub_client.py
@@ -0,0 +1,755 @@
+"""
+Pornhub Client - Fetches creator info and videos using yt-dlp
+"""
+
+import asyncio
+import html as html_module
+import json
+import os
+import re
+import subprocess
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+class PornhubClient(LoggingMixin):
+    """
+    Client for fetching Pornhub creator information and videos using yt-dlp
+
+    Supports:
+    - Pornstar pages (pornhub.com/pornstar/name)
+    - Channel pages (pornhub.com/channels/name)
+    - User pages (pornhub.com/users/name)
+    - Model pages (pornhub.com/model/name)
+    """
+
+    SERVICE_ID = 'pornhub'
+    PLATFORM = 'pornhub'
+
+    # Quality presets for yt-dlp
+    # Pornhub serves single combined streams with IDs like '1080p', '720p', etc.
+    # NOT separate video+audio streams like YouTube
+    QUALITY_PRESETS = {
+        'best': 'bestvideo+bestaudio/best',
+        '1080p': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]/best',
+        '720p': 'bestvideo[height<=720]+bestaudio/best[height<=720]/best',
+        '480p': 'bestvideo[height<=480]+bestaudio/best[height<=480]/best',
+    }
+
+    def __init__(self, ytdlp_path: str = None, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Pornhub')
+
+        # Find yt-dlp executable
+        self.ytdlp_path = ytdlp_path or self._find_ytdlp()
+        if not self.ytdlp_path:
+            self.log("yt-dlp not found, Pornhub support will be disabled", 'warning')
+
+        # Store database reference for cookie access
+        self.unified_db = unified_db
+        self._cookies_file = None
+
+        # Cache for profile page HTML (avoid re-fetching for avatar/banner/bio)
+        self._profile_page_cache: Dict[str, Optional[str]] = {}
+
+    def _find_ytdlp(self) -> Optional[str]:
+        """Find yt-dlp executable"""
+        common_paths = [
+            '/opt/media-downloader/venv/bin/yt-dlp',
+            '/usr/local/bin/yt-dlp',
+            '/usr/bin/yt-dlp',
+            '/opt/homebrew/bin/yt-dlp',
+            os.path.expanduser('~/.local/bin/yt-dlp'),
+        ]
+
+        for path in common_paths:
+            if os.path.isfile(path) and os.access(path, os.X_OK):
+                return path
+
+        try:
+            result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except Exception:
+            pass
+
+        return None
+
+    def is_available(self) -> bool:
+        """Check if yt-dlp is available"""
+        return self.ytdlp_path is not None
+
+    def _get_cookies_file(self) -> Optional[str]:
+        """Get path to cookies file, creating it from database if needed"""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            return self._cookies_file
+
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?", ('pornhub',))
+                row = cursor.fetchone()
+                if row and row[0]:
+                    data = json.loads(row[0])
+                    # Support both {"cookies": [...]} and [...] formats
+                    if isinstance(data, dict) and 'cookies' in data:
+                        cookies_list = data['cookies']
+                    elif isinstance(data, list):
+                        cookies_list = data
+                    else:
+                        cookies_list = []
+
+                    if cookies_list:
+                        # Write cookies to temp file in Netscape format
+                        fd, self._cookies_file = tempfile.mkstemp(suffix='.txt', prefix='pornhub_cookies_')
+                        with os.fdopen(fd, 'w') as f:
+                            f.write("# Netscape HTTP Cookie File\n")
+                            for cookie in cookies_list:
+                                domain = cookie.get('domain', '')
+                                include_subdomains = 'TRUE' if domain.startswith('.') else 'FALSE'
+                                path = cookie.get('path', '/')
+                                secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
+                                expiry = str(int(cookie.get('expirationDate', 0)))
+                                name = cookie.get('name', '')
+                                value = cookie.get('value', '')
+                                f.write(f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expiry}\t{name}\t{value}\n")
+                        self.log(f"Loaded {len(cookies_list)} cookies from pornhub scraper", 'debug')
+                        return self._cookies_file
+        except Exception as e:
+            self.log(f"Could not load cookies: {e}", 'debug')
+
+        return None
+
+    def _get_cookies_list(self) -> Optional[list]:
+        """Get cookies as a list of dicts for aiohttp requests"""
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?", ('pornhub',))
+                row = cursor.fetchone()
+                if row and row[0]:
+                    data = json.loads(row[0])
+                    if isinstance(data, dict) and 'cookies' in data:
+                        return data['cookies']
+                    elif isinstance(data, list):
+                        return data
+        except Exception as e:
+            self.log(f"Could not load cookies list: {e}", 'debug')
+
+        return None
+
+    def _get_base_cmd(self) -> List[str]:
+        """Get base yt-dlp command with cookies if available"""
+        cmd = [self.ytdlp_path]
+        cookies_file = self._get_cookies_file()
+        if cookies_file:
+            cmd.extend(['--cookies', cookies_file])
+        return cmd
+
+    def cleanup(self):
+        """Clean up temporary files"""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            try:
+                os.unlink(self._cookies_file)
+            except Exception:
+                pass
+            self._cookies_file = None
+        self._profile_page_cache.clear()
+
+    @staticmethod
+    def extract_creator_id(url: str) -> Optional[Tuple[str, str]]:
+        """
+        Extract creator type and identifier from Pornhub URL
+
+        Returns:
+            Tuple of (type, id) where type is 'pornstar', 'channels', 'users', or 'model'
+            or None if not a valid Pornhub creator URL
+        """
+        patterns = [
+            (r'pornhub\.com/pornstar/([a-zA-Z0-9_-]+)', 'pornstar'),
+            (r'pornhub\.com/channels/([a-zA-Z0-9_-]+)', 'channels'),
+            (r'pornhub\.com/users/([a-zA-Z0-9_-]+)', 'users'),
+            (r'pornhub\.com/model/([a-zA-Z0-9_-]+)', 'model'),
+        ]
+
+        for pattern, creator_type in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return (creator_type, match.group(1))
+
+        return None
+
+    @staticmethod
+    def normalize_creator_url(creator_id: str, creator_type: str = 'pornstar') -> str:
+        """Convert creator ID to a consistent URL format
+
+        Args:
+            creator_id: Creator name/identifier (may be 'type/name' format)
+            creator_type: Default type if not embedded in creator_id
+        """
+        # Already a full URL
+        if creator_id.startswith('http://') or creator_id.startswith('https://'):
+            return creator_id
+
+        # Handle 'type/name' format from URL parser
+        if '/' in creator_id:
+            parts = creator_id.split('/', 1)
+            creator_type = parts[0]
+            creator_id = parts[1]
+
+        return f"https://www.pornhub.com/{creator_type}/{creator_id}"
+
+    def _get_listing_url(self, url: str) -> str:
+        """Get the URL to use for listing videos from a creator page.
+
+        For pornstars and models, append /videos to get the video listing.
+        For channels and users, the base URL already lists videos.
+        """
+        # Parse out the type
+        parsed = self.extract_creator_id(url)
+        if parsed:
+            creator_type, _ = parsed
+            if creator_type in ('pornstar', 'model'):
+                # Strip any trailing slash and append /videos
+                url = url.rstrip('/')
+                if not url.endswith('/videos'):
+                    url = f"{url}/videos"
+        return url
+
+    async def get_creator_info(self, url: str) -> Optional[Dict]:
+        """
+        Get creator information using yt-dlp + profile page scraping
+
+        Returns dict with creator metadata or None if not found
+        """
+        if not self.is_available():
+            return None
+
+        creator_type_id = self.extract_creator_id(url)
+        creator_type = creator_type_id[0] if creator_type_id else 'pornstar'
+
+        # Try to scrape the display name from the profile page first
+        creator_name = None
+        try:
+            page_html = await self.get_profile_page(url)
+            if page_html:
+                # Look for <h1 itemprop="name">Name</h1> inside nameSubscribe div
+                name_match = re.search(r'<div class="nameSubscribe">.*?<h1[^>]*>\s*(.+?)\s*</h1>', page_html, re.DOTALL)
+                if name_match:
+                    creator_name = html_module.unescape(name_match.group(1).strip())
+                    self.log(f"Found creator name from profile page: {creator_name}", 'debug')
+        except Exception as e:
+            self.log(f"Could not scrape creator name: {e}", 'debug')
+
+        # If page scraping didn't find a name, try yt-dlp
+        if not creator_name:
+            try:
+                listing_url = self._get_listing_url(url)
+
+                cmd = self._get_base_cmd() + [
+                    '--no-warnings',
+                    '--flat-playlist',
+                    '-j',
+                    '--playlist-items', '1',
+                    listing_url
+                ]
+
+                result = await asyncio.create_subprocess_exec(
+                    *cmd,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE
+                )
+
+                stdout, stderr = await result.communicate()
+
+                if result.returncode == 0:
+                    for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                        if not line:
+                            continue
+                        try:
+                            data = json.loads(line)
+                            playlist_title = data.get('playlist_title') or ''
+                            creator_name = (data.get('channel') or data.get('uploader')
+                                            or playlist_title.replace(' - Videos', '') or None)
+                            if creator_name:
+                                creator_name = html_module.unescape(creator_name)
+                                break
+                        except json.JSONDecodeError:
+                            continue
+            except Exception as e:
+                self.log(f"yt-dlp creator info failed: {e}", 'debug')
+
+        # Fall back to deriving name from URL slug
+        if not creator_name and creator_type_id:
+            creator_name = creator_type_id[1].replace('-', ' ').title()
+
+        if creator_name:
+            return {
+                'creator_id': creator_type_id[1] if creator_type_id else None,
+                'creator_name': creator_name,
+                'creator_url': url,
+                'creator_type': creator_type,
+            }
+
+        return None
+
+    async def get_creator_videos(self, url: str, since_date: str = None,
+                                  max_videos: int = None,
+                                  progress_callback=None) -> List[Dict]:
+        """
+        Get all videos from a creator page using --flat-playlist for speed.
+
+        Args:
+            url: Pornhub creator URL
+            since_date: Only fetch videos published after this date (ISO format)
+            max_videos: Maximum number of videos to fetch
+            progress_callback: Callback function(count) for progress updates
+
+        Returns:
+            List of video metadata dicts
+        """
+        if not self.is_available():
+            return []
+
+        try:
+            listing_url = self._get_listing_url(url)
+
+            # Use --flat-playlist for fast listing (avoids per-video HTTP requests)
+            cmd = self._get_base_cmd() + [
+                '--no-warnings',
+                '--flat-playlist',
+                '-j',
+                '--socket-timeout', '30',
+                '--retries', '3',
+                listing_url
+            ]
+
+            if max_videos:
+                cmd.extend(['--playlist-items', f'1:{max_videos}'])
+
+            self.log(f"Fetching videos from: {url}", 'info')
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error = stderr.decode('utf-8', errors='replace')
+                self.log(f"Failed to get creator videos: {error}", 'warning')
+                return []
+
+            videos = []
+            for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                if not line:
+                    continue
+                try:
+                    data = json.loads(line)
+
+                    # Skip non-video entries
+                    if data.get('_type') == 'playlist':
+                        continue
+
+                    video_id = data.get('id')
+                    if not video_id:
+                        continue
+
+                    # Flat-playlist doesn't provide upload_date for Pornhub, but check anyway
+                    upload_date = data.get('upload_date')
+                    if upload_date:
+                        try:
+                            upload_date = datetime.strptime(upload_date, '%Y%m%d').isoformat()
+                        except ValueError:
+                            pass
+
+                    # Decode HTML entities in title (flat-playlist returns them encoded)
+                    title = html_module.unescape(data.get('title', f'Video {video_id}'))
+
+                    # Build video URL
+                    video_url = (data.get('webpage_url') or data.get('url')
+                                 or f"https://www.pornhub.com/view_video.php?viewkey={video_id}")
+
+                    videos.append({
+                        'video_id': video_id,
+                        'title': title,
+                        'description': data.get('description', ''),
+                        'upload_date': upload_date,
+                        'duration': data.get('duration'),
+                        'view_count': data.get('view_count'),
+                        'thumbnail': data.get('thumbnail'),
+                        'url': video_url,
+                    })
+
+                    if progress_callback:
+                        progress_callback(len(videos))
+
+                    if max_videos and len(videos) >= max_videos:
+                        break
+
+                except json.JSONDecodeError:
+                    continue
+
+            self.log(f"Found {len(videos)} videos", 'info')
+            return videos
+
+        except Exception as e:
+            self.log(f"Error getting creator videos: {e}", 'error')
+            return []
+
+    async def download_video(self, video_url: str, output_dir: Path, quality: str = 'best',
+                            progress_callback=None) -> Dict:
+        """
+        Download a video
+
+        Args:
+            video_url: Pornhub video URL
+            output_dir: Directory to save the video
+            quality: Quality preset
+            progress_callback: Callback for download progress
+
+        Returns:
+            Dict with success status and file info
+        """
+        if not self.is_available():
+            return {'success': False, 'error': 'yt-dlp not available'}
+
+        try:
+            output_dir = Path(output_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            output_template = str(output_dir / '%(title).100s_%(id)s.%(ext)s')
+
+            format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best'])
+
+            cmd = self._get_base_cmd() + [
+                '--no-warnings',
+                '-f', format_str,
+                '-o', output_template,
+                '--print-json',
+                '--no-playlist',
+                '--user-agent', 'Mozilla/5.0',
+                '--referer', 'https://www.pornhub.com/',
+                '--merge-output-format', 'mp4',
+                '--concurrent-fragments', '4',
+                '--no-part',
+                '--retries', '20',
+                video_url
+            ]
+
+            self.log(f"Downloading video: {video_url}", 'debug')
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error_msg = stderr.decode('utf-8', errors='replace').strip()
+                if 'Video unavailable' in error_msg or 'not available' in error_msg:
+                    error_msg = 'Video unavailable or private'
+                elif 'premium' in error_msg.lower():
+                    error_msg = 'Video requires premium access'
+                elif len(error_msg) > 200:
+                    error_msg = error_msg[:200] + '...'
+
+                return {'success': False, 'error': error_msg}
+
+            # Parse output JSON
+            video_info = None
+            for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                try:
+                    video_info = json.loads(line)
+                    break
+                except json.JSONDecodeError:
+                    continue
+
+            if not video_info:
+                # Try to find downloaded file
+                files = list(output_dir.glob('*.mp4'))
+                if files:
+                    file_path = max(files, key=lambda f: f.stat().st_mtime)
+                    return {
+                        'success': True,
+                        'file_path': str(file_path),
+                        'filename': file_path.name,
+                        'file_size': file_path.stat().st_size
+                    }
+                return {'success': False, 'error': 'Could not find downloaded file'}
+
+            file_path = video_info.get('_filename') or video_info.get('filename')
+            if file_path:
+                file_path = Path(file_path)
+
+            return {
+                'success': True,
+                'file_path': str(file_path) if file_path else None,
+                'filename': file_path.name if file_path else None,
+                'file_size': file_path.stat().st_size if file_path and file_path.exists() else video_info.get('filesize'),
+                'title': video_info.get('title'),
+                'duration': video_info.get('duration'),
+                'video_id': video_info.get('id'),
+                'upload_date': video_info.get('upload_date'),
+                'timestamp': video_info.get('timestamp'),
+                'thumbnail': video_info.get('thumbnail'),
+            }
+
+        except Exception as e:
+            self.log(f"Error downloading video: {e}", 'error')
+            return {'success': False, 'error': str(e)}
+
+    async def get_profile_page(self, url: str) -> Optional[str]:
+        """Fetch profile page HTML via aiohttp (with cookies if available).
+        Results are cached to avoid re-fetching for avatar/banner/bio."""
+        # Strip /videos suffix for profile page
+        base_url = re.sub(r'/videos/?$', '', url)
+
+        if base_url in self._profile_page_cache:
+            return self._profile_page_cache[base_url]
+
+        try:
+            import aiohttp
+
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.5',
+            }
+
+            # Build simple cookies dict for the session
+            cookies_dict = {}
+            cookies_list = self._get_cookies_list()
+            if cookies_list:
+                for cookie in cookies_list:
+                    name = cookie.get('name', '')
+                    value = cookie.get('value', '')
+                    if name:
+                        cookies_dict[name] = value
+
+            async with aiohttp.ClientSession(cookies=cookies_dict) as session:
+                async with session.get(
+                    base_url,
+                    headers=headers,
+                    timeout=aiohttp.ClientTimeout(total=15)
+                ) as resp:
+                    if resp.status == 200:
+                        text = await resp.text()
+                        self._profile_page_cache[base_url] = text
+                        return text
+
+        except Exception as e:
+            self.log(f"Could not fetch profile page: {e}", 'debug')
+
+        self._profile_page_cache[base_url] = None
+        return None
+
+    async def get_profile_image(self, url: str) -> Optional[str]:
+        """Scrape profile page for avatar/photo URL"""
+        try:
+            page_html = await self.get_profile_page(url)
+            if not page_html:
+                return None
+
+            # Look for avatar image: <img id="getAvatar" src="...">
+            avatar_match = re.search(r'<img[^>]*id=["\']getAvatar["\'][^>]*src=["\']([^"\']+)["\']', page_html)
+            if avatar_match:
+                self.log("Found Pornhub profile avatar", 'debug')
+                return avatar_match.group(1)
+
+            # Try og:image meta tag
+            og_match = re.search(r'<meta\s+property="og:image"\s+content="([^"]+)"', page_html)
+            if not og_match:
+                og_match = re.search(r'<meta\s+content="([^"]+)"\s+property="og:image"', page_html)
+            if og_match:
+                return og_match.group(1)
+
+        except Exception as e:
+            self.log(f"Could not fetch profile image: {e}", 'debug')
+
+        return None
+
+    async def get_profile_bio(self, url: str) -> Optional[str]:
+        """Scrape bio/about section from profile page"""
+        try:
+            page_html = await self.get_profile_page(url)
+            if not page_html:
+                return None
+
+            # Look for aboutMeSection -> div with the actual text
+            # Structure: <section class="aboutMeSection ..."><div class="title">About Name</div><div>Bio text</div></section>
+            about_match = re.search(
+                r'<section\s+class="aboutMeSection[^"]*"[^>]*>.*?<div class="title">[^<]*</div>\s*<div>\s*(.*?)\s*</div>',
+                page_html, re.DOTALL
+            )
+            if about_match:
+                bio_text = re.sub(r'<[^>]+>', '', about_match.group(1)).strip()
+                if bio_text:
+                    self.log("Found Pornhub profile bio", 'debug')
+                    return html_module.unescape(bio_text)
+
+            # Fallback: look for biographyAbout section
+            bio_match = re.search(
+                r'class="biographyAbout[^"]*"[^>]*>.*?<div class="content[^"]*">(.*?)</div>',
+                page_html, re.DOTALL
+            )
+            if bio_match:
+                bio_text = re.sub(r'<[^>]+>', '', bio_match.group(1)).strip()
+                if bio_text:
+                    self.log("Found Pornhub profile bio (fallback)", 'debug')
+                    return html_module.unescape(bio_text)
+
+        except Exception as e:
+            self.log(f"Could not fetch profile bio: {e}", 'debug')
+
+        return None
+
+    async def get_profile_banner(self, url: str) -> Optional[str]:
+        """Scrape banner/cover image if available"""
+        try:
+            page_html = await self.get_profile_page(url)
+            if not page_html:
+                return None
+
+            # Look for cover image: <img id="coverPictureDefault" src="...">
+            cover_match = re.search(
+                r'<img[^>]*id=["\']coverPictureDefault["\'][^>]*src=["\']([^"\']+)["\']',
+                page_html
+            )
+            if cover_match:
+                self.log("Found Pornhub profile banner", 'debug')
+                return cover_match.group(1)
+
+            # Fallback: any img inside coverImage div
+            cover_match = re.search(
+                r'<div class="coverImage">\s*<img[^>]*src=["\']([^"\']+)["\']',
+                page_html, re.DOTALL
+            )
+            if cover_match:
+                self.log("Found Pornhub profile banner (div)", 'debug')
+                return cover_match.group(1)
+
+        except Exception as e:
+            self.log(f"Could not fetch profile banner: {e}", 'debug')
+
+        return None
+
+    async def get_profile_info(self, url: str) -> Optional[Dict]:
+        """Scrape all profile info from the page in one pass"""
+        page_html = await self.get_profile_page(url)
+        if not page_html:
+            return None
+
+        info = {}
+
+        # Extract infoPiece data (Gender, Birth Place, Height, etc.)
+        info_pieces = re.findall(
+            r'<div class="infoPiece">\s*<span>\s*(.*?)\s*</span>\s*(.*?)\s*</div>',
+            page_html, re.DOTALL
+        )
+        for label, value in info_pieces:
+            label = re.sub(r'<[^>]+>', '', label).strip().rstrip(':')
+            value = re.sub(r'<[^>]+>', '', value).strip()
+            if label and value:
+                info[label.lower().replace(' ', '_')] = value
+
+        return info if info else None
+
+    async def get_joined_date(self, url: str) -> Optional[str]:
+        """Extract a joined/career start date from profile info"""
+        try:
+            profile_info = await self.get_profile_info(url)
+            if not profile_info:
+                return None
+
+            # Pornstar pages have "Career Start and End: 2011 to Present"
+            career = profile_info.get('career_start_and_end')
+            if career:
+                # Extract start year: "2011 to Present" -> "2011"
+                match = re.match(r'(\d{4})', career)
+                if match:
+                    return match.group(1)
+
+            # User/model pages might not have career info but could have other dates
+            return None
+        except Exception as e:
+            self.log(f"Could not get joined date: {e}", 'debug')
+            return None
+
+    async def get_creator(self, url: str) -> Optional[Creator]:
+        """
+        Get Creator object from creator URL
+        """
+        info = await self.get_creator_info(url)
+        if not info:
+            return None
+
+        # Build creator_id as 'type/name' format
+        creator_type_id = self.extract_creator_id(url)
+        if creator_type_id:
+            creator_id = f"{creator_type_id[0]}/{creator_type_id[1]}"
+        else:
+            creator_id = info.get('creator_id', '')
+
+        # Profile image is already fetched during get_creator_info (page was cached)
+        profile_image = await self.get_profile_image(url)
+
+        return Creator(
+            creator_id=creator_id,
+            service_id='pornhub',
+            platform='pornhub',
+            username=info.get('creator_name', 'Unknown'),
+            display_name=info.get('creator_name'),
+            profile_image_url=profile_image,
+        )
+
+    async def get_posts(self, url: str, since_date: str = None,
+                        max_videos: int = None, progress_callback=None) -> List[Post]:
+        """
+        Get videos as Post objects
+        """
+        videos = await self.get_creator_videos(url, since_date, max_videos, progress_callback)
+
+        # Get creator_id from URL
+        creator_type_id = self.extract_creator_id(url)
+        creator_id = f"{creator_type_id[0]}/{creator_type_id[1]}" if creator_type_id else ''
+
+        posts = []
+        for video in videos:
+            # Create attachment for the video
+            attachment = Attachment(
+                name=f"{video['title']}.mp4",
+                file_type='video',
+                extension='.mp4',
+                server_path=video['url'],
+                download_url=video['url'],
+                duration=video.get('duration'),
+            )
+
+            post = Post(
+                post_id=video['video_id'],
+                service_id='pornhub',
+                platform='pornhub',
+                creator_id=creator_id,
+                title=video['title'],
+                content=video.get('description') or video['title'],
+                published_at=video.get('upload_date'),
+                attachments=[attachment],
+            )
+            posts.append(post)
+
+        return posts
--- a/modules/paid_content/reddit_client.py
+++ b/modules/paid_content/reddit_client.py
@@ -0,0 +1,678 @@
+"""
+Reddit Client for Paid Content - Uses gallery-dl to fetch subreddit posts and download media.
+
+Adapts the gallery-dl + metadata parsing pattern from reddit_community_monitor.py
+to produce Post/Attachment objects for the paid content system.
+"""
+
+import asyncio
+import json
+import os
+import shutil
+import subprocess
+import tempfile
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from modules.base_module import LoggingMixin
+from .models import Post, Attachment
+
+
+class RedditClient(LoggingMixin):
+    """
+    Client for fetching Reddit subreddit content via gallery-dl.
+
+    gallery-dl downloads files during fetch, so attachments come with local_path
+    already set. The sync handler moves files to their final location.
+    """
+
+    SERVICE_ID = 'reddit'
+    PLATFORM = 'reddit'
+
+    def __init__(self, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Reddit')
+        self.unified_db = unified_db
+        self.gallery_dl_path = shutil.which('gallery-dl') or '/opt/media-downloader/venv/bin/gallery-dl'
+
+    def get_subreddit_info(self, subreddit: str) -> Optional[Dict]:
+        """Get basic subreddit info by checking the Reddit JSON API.
+
+        Returns dict with creator_id and creator_name.
+        """
+        import urllib.request
+        import urllib.error
+
+        try:
+            # Quick check via Reddit's public JSON endpoint
+            url = f'https://www.reddit.com/r/{subreddit}/about.json'
+            req = urllib.request.Request(url, headers={
+                'User-Agent': 'Mozilla/5.0 (compatible; media-downloader/1.0)'
+            })
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                data = json.loads(resp.read().decode())
+
+            sub_data = data.get('data', {})
+            display_name = sub_data.get('display_name', subreddit)
+            title = sub_data.get('title', '')
+
+            # Extract icon — community_icon is higher res, icon_img is fallback
+            icon_url = (sub_data.get('community_icon') or sub_data.get('icon_img') or '').split('?')[0]
+            # HTML entities in URLs
+            icon_url = icon_url.replace('&amp;', '&') if icon_url else None
+
+            # Extract banner — banner_background_image is the main one
+            banner_url = sub_data.get('banner_background_image') or sub_data.get('mobile_banner_image') or ''
+            banner_url = banner_url.split('?')[0] if banner_url else None
+            if banner_url:
+                banner_url = banner_url.replace('&amp;', '&')
+
+            # Build bio from title + public description
+            public_desc = sub_data.get('public_description', '')
+            bio_parts = []
+            if title:
+                bio_parts.append(title)
+            if public_desc and public_desc != title:
+                bio_parts.append(public_desc)
+            subscribers = sub_data.get('subscribers')
+            if subscribers:
+                bio_parts.append(f"{subscribers:,} subscribers")
+            bio = ' — '.join(bio_parts) if bio_parts else None
+
+            # Subreddit creation date
+            created_utc = sub_data.get('created_utc')
+            joined_date = None
+            if created_utc:
+                try:
+                    joined_date = datetime.fromtimestamp(created_utc, tz=timezone.utc).strftime('%Y-%m-%d')
+                except (ValueError, OSError):
+                    pass
+
+            # Use the subreddit title as display name (e.g. "Reddit Pics")
+            # Fall back to r/name format if no title
+            friendly_name = title if title else f'r/{display_name}'
+
+            return {
+                'creator_id': display_name.lower(),
+                'creator_name': f'r/{display_name}',
+                'display_name': friendly_name,
+                'bio': bio,
+                'joined_date': joined_date,
+                'profile_image_url': icon_url or None,
+                'banner_image_url': banner_url or None,
+            }
+
+        except urllib.error.HTTPError as e:
+            if e.code == 404:
+                self.log(f"Subreddit r/{subreddit} not found (404)", 'warning')
+                return None
+            elif e.code == 403:
+                # Private/quarantined — still exists, return basic info
+                self.log(f"Subreddit r/{subreddit} is private/quarantined", 'warning')
+                return {
+                    'creator_id': subreddit.lower(),
+                    'creator_name': f'r/{subreddit}',
+                }
+            else:
+                self.log(f"HTTP {e.code} checking r/{subreddit}", 'warning')
+                # Return basic info and let sync verify
+                return {
+                    'creator_id': subreddit.lower(),
+                    'creator_name': f'r/{subreddit}',
+                }
+        except Exception as e:
+            self.log(f"Error getting subreddit info for r/{subreddit}: {e}", 'error')
+            return None
+
+    def get_posts(self, subreddit: str, since_date: str = None, max_posts: int = 0,
+                  progress_callback=None) -> tuple:
+        """Fetch posts and download media from a subreddit using gallery-dl.
+
+        Args:
+            subreddit: Subreddit name (without r/)
+            since_date: ISO date string; skip posts older than this
+            max_posts: Maximum posts to fetch (0 = unlimited)
+            progress_callback: Optional callable(downloaded_count, skipped_count, latest_file)
+                for live progress updates
+
+        Returns:
+            Tuple of (List[Post], temp_dir_path) — caller must clean up temp_dir
+            when done moving files. Returns ([], None) on failure.
+        """
+        temp_dir = tempfile.mkdtemp(prefix=f'reddit_paid_{subreddit}_')
+
+        try:
+            downloaded = self.run_gallery_dl(subreddit, temp_dir, since_date, max_posts,
+                                             progress_callback=progress_callback)
+
+            if not downloaded:
+                shutil.rmtree(temp_dir, ignore_errors=True)
+                return [], None
+
+            # Group files by post using metadata sidecars
+            grouped = self._group_files_by_post(downloaded, temp_dir, subreddit)
+
+            if not grouped:
+                shutil.rmtree(temp_dir, ignore_errors=True)
+                return [], None
+
+            posts = []
+            for post_id, post_data in grouped.items():
+                attachments = []
+                for file_path in post_data['files']:
+                    ext = file_path.suffix.lower()
+                    file_type = self._detect_file_type(ext)
+
+                    attachments.append(Attachment(
+                        name=file_path.name,
+                        file_type=file_type,
+                        extension=ext,
+                        server_path=str(file_path),  # temp path, will be moved
+                        download_url=None,  # Already downloaded
+                        file_size=file_path.stat().st_size if file_path.exists() else None,
+                    ))
+
+                if not attachments:
+                    continue
+
+                post = Post(
+                    post_id=post_id,
+                    service_id=self.SERVICE_ID,
+                    platform=self.PLATFORM,
+                    creator_id=subreddit.lower(),
+                    title=post_data.get('title'),
+                    content=post_data.get('title'),
+                    published_at=post_data.get('date'),
+                    attachments=attachments,
+                )
+                posts.append(post)
+
+            self.log(f"Parsed {len(posts)} posts with {sum(len(p.attachments) for p in posts)} attachments from r/{subreddit}", 'info')
+            return posts, temp_dir
+
+        except Exception as e:
+            self.log(f"Error fetching posts from r/{subreddit}: {e}", 'error')
+            shutil.rmtree(temp_dir, ignore_errors=True)
+            return [], None
+
+    def run_gallery_dl(self, subreddit: str, temp_dir: str,
+                       since_date: str = None, max_posts: int = 0,
+                       progress_callback=None, batch_callback=None,
+                       batch_size: int = 50) -> dict:
+        """Run gallery-dl to download media from a subreddit.
+
+        Streams stdout line-by-line. Calls progress_callback for status updates
+        and batch_callback with lists of new file paths for incremental processing.
+
+        Args:
+            progress_callback: Called with (dl_count, skip_count, total_seen)
+            batch_callback: Called with (new_files: List[Path]) every batch_size files
+            batch_size: How many files to accumulate before calling batch_callback
+
+        Returns:
+            Dict with dl_count, skip_count, total.
+        """
+        import time
+
+        # Use a separate download archive for paid content reddit
+        archive_dir = '/opt/media-downloader/data/cache'
+        os.makedirs(archive_dir, exist_ok=True)
+        archive_path = os.path.join(archive_dir, 'reddit_paid_gallery_dl_archive.db')
+
+        cmd = [
+            self.gallery_dl_path,
+            '--write-metadata',
+            '--download-archive', archive_path,
+            '-d', temp_dir,
+        ]
+
+        # REST API mode to avoid shared OAuth rate limits
+        cmd.extend(['-o', 'extractor.reddit.api=rest'])
+
+        # Limit posts (0 = unlimited)
+        if max_posts > 0:
+            cmd.extend(['--range', f'1-{max_posts}'])
+
+        # Date filtering
+        if since_date:
+            try:
+                cutoff = since_date[:10]  # YYYY-MM-DD
+                cmd.extend(['--filter', f"date >= datetime.strptime('{cutoff}', '%Y-%m-%d')"])
+            except (ValueError, IndexError):
+                pass
+
+        cmd.append(f'https://www.reddit.com/r/{subreddit}/new/')
+
+        # Check for Reddit cookies file
+        cookies_file = self._get_cookies_file()
+        if cookies_file:
+            temp_cookie_file = os.path.join(temp_dir, '.cookies.txt')
+            if self._write_netscape_cookie_file(cookies_file, temp_cookie_file):
+                cmd.extend(['--cookies', temp_cookie_file])
+
+        self.log(f"Running gallery-dl for r/{subreddit}", 'info')
+        self.log(f"Command: {' '.join(cmd)}", 'debug')
+
+        dl_count = 0
+        skip_count = 0
+        pending_files = []
+
+        try:
+            proc = subprocess.Popen(
+                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+            )
+
+            start_time = time.time()
+            timeout_secs = 7200  # 2 hours
+
+            while True:
+                if time.time() - start_time > timeout_secs:
+                    proc.kill()
+                    self.log(f"gallery-dl timed out for r/{subreddit}", 'error')
+                    break
+
+                line = proc.stdout.readline()
+                if not line and proc.poll() is not None:
+                    break
+                if not line:
+                    continue
+
+                line = line.strip()
+                if not line:
+                    continue
+
+                if line.startswith('# '):
+                    # Skipped file (already in archive)
+                    skip_count += 1
+                else:
+                    # Downloaded file — gallery-dl prints the full path
+                    dl_count += 1
+                    file_path = Path(line)
+                    if file_path.exists() and not file_path.name.endswith('.json'):
+                        pending_files.append(file_path)
+
+                total = dl_count + skip_count
+                if progress_callback and total % 5 == 0:
+                    progress_callback(dl_count, skip_count, total)
+
+                # Flush batch for processing
+                if batch_callback and len(pending_files) >= batch_size:
+                    batch_callback(list(pending_files))
+                    pending_files.clear()
+
+            proc.wait()
+
+            # Final batch
+            if batch_callback and pending_files:
+                batch_callback(list(pending_files))
+                pending_files.clear()
+
+            if progress_callback:
+                progress_callback(dl_count, skip_count, dl_count + skip_count)
+
+            returncode = proc.returncode
+            if returncode not in (None, 0, 1, 4, 5):
+                stderr = proc.stderr.read()
+                self.log(f"gallery-dl returned code {returncode} for r/{subreddit}", 'warning')
+                if stderr:
+                    self.log(f"gallery-dl stderr: {stderr[:500]}", 'debug')
+
+        except Exception as e:
+            self.log(f"gallery-dl failed for r/{subreddit}: {e}", 'error')
+
+        self.log(f"gallery-dl done for r/{subreddit}: {dl_count} downloaded, {skip_count} skipped", 'info')
+        return {'dl_count': dl_count, 'skip_count': skip_count, 'total': dl_count + skip_count}
+
+    def _group_files_by_post(self, files: List[Path], temp_dir: str,
+                             subreddit: str) -> Dict[str, Dict]:
+        """Group downloaded files by Reddit post ID using metadata JSON sidecars.
+
+        Adapted from reddit_community_monitor.py:_group_files_by_post
+
+        Returns:
+            Dict mapping reddit_post_id -> {
+                'files': [Path],
+                'title': str,
+                'date': str,
+                'source_url': str
+            }
+        """
+        posts: Dict[str, Dict] = {}
+
+        for file_path in files:
+            # Look for matching metadata JSON sidecar
+            json_path = file_path.with_suffix(file_path.suffix + '.json')
+            if not json_path.exists():
+                json_path = file_path.with_suffix('.json')
+
+            metadata = {}
+            if json_path.exists():
+                try:
+                    with open(json_path, 'r', encoding='utf-8') as f:
+                        metadata = json.load(f)
+                except (json.JSONDecodeError, Exception) as e:
+                    self.log(f"Failed to parse metadata for {file_path.name}: {e}", 'debug')
+
+            # Extract Reddit post ID
+            reddit_post_id = None
+            for key in ('id', 'reddit_id', 'parent_id'):
+                if key in metadata:
+                    reddit_post_id = str(metadata[key])
+                    break
+
+            if not reddit_post_id:
+                # Filename-based fallback: subreddit_postid_num.ext
+                parts = file_path.stem.split('_')
+                if len(parts) >= 2:
+                    reddit_post_id = parts[-2] if len(parts) >= 3 else parts[-1]
+                else:
+                    reddit_post_id = file_path.stem
+
+            # Extract post date
+            post_date = None
+            if 'date' in metadata:
+                date_val = metadata['date']
+                if isinstance(date_val, str):
+                    for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d'):
+                        try:
+                            utc_dt = datetime.strptime(date_val, fmt).replace(tzinfo=timezone.utc)
+                            post_date = utc_dt.astimezone().strftime('%Y-%m-%dT%H:%M:%S')
+                            break
+                        except ValueError:
+                            continue
+                    if not post_date:
+                        post_date = date_val
+                elif isinstance(date_val, (int, float)):
+                    try:
+                        post_date = datetime.fromtimestamp(date_val, tz=timezone.utc).isoformat()
+                    except (ValueError, OSError):
+                        pass
+
+            if not post_date and 'created_utc' in metadata:
+                try:
+                    post_date = datetime.fromtimestamp(metadata['created_utc'], tz=timezone.utc).isoformat()
+                except (ValueError, OSError):
+                    pass
+
+            if not post_date:
+                post_date = datetime.now().isoformat()
+
+            title = metadata.get('title', metadata.get('description', ''))
+            sub = metadata.get('subreddit', subreddit)
+            source_url = f"https://www.reddit.com/r/{sub}/comments/{reddit_post_id}" if sub else ''
+
+            if reddit_post_id not in posts:
+                posts[reddit_post_id] = {
+                    'files': [],
+                    'title': title,
+                    'date': post_date,
+                    'source_url': source_url,
+                }
+
+            posts[reddit_post_id]['files'].append(file_path)
+
+        return posts
+
+    def _get_cookies_file(self) -> Optional[str]:
+        """Get Reddit cookies JSON from the scrapers table if configured."""
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    "SELECT cookies FROM scrapers WHERE name = 'reddit' AND cookies IS NOT NULL"
+                )
+                row = cursor.fetchone()
+                if row and row[0]:
+                    return row[0]
+        except Exception as e:
+            self.log(f"Could not load Reddit cookies: {e}", 'debug')
+
+        return None
+
+    def _write_netscape_cookie_file(self, cookies_json: str, output_path: str) -> bool:
+        """Convert JSON cookies array to Netscape cookie file format."""
+        try:
+            cookies = json.loads(cookies_json)
+            if not isinstance(cookies, list):
+                return False
+
+            with open(output_path, 'w') as f:
+                f.write("# Netscape HTTP Cookie File\n")
+                f.write("# https://curl.haxx.se/docs/http-cookies.html\n\n")
+                for cookie in cookies:
+                    domain = cookie.get('domain', '')
+                    include_subdomains = 'TRUE' if domain.startswith('.') else 'FALSE'
+                    path = cookie.get('path', '/')
+                    secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
+                    expires = cookie.get('expirationDate', cookie.get('expiry', cookie.get('expires', 0)))
+                    if expires is None:
+                        expires = 0
+                    expires = str(int(float(expires)))
+                    name = cookie.get('name', '')
+                    value = cookie.get('value', '')
+                    f.write(f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expires}\t{name}\t{value}\n")
+
+            return True
+        except Exception as e:
+            self.log(f"Failed to write Netscape cookie file: {e}", 'error')
+            return False
+
+    def get_pullpush_post_ids(self, subreddit: str, after_ts: int = 0,
+                              before_ts: int = None,
+                              progress_callback=None) -> List[Dict]:
+        """Fetch all historical post IDs for a subreddit from the Pullpush (Pushshift) API.
+
+        Paginates through the full archive using created_utc ascending order.
+        Rate-limited to ~1 request per 2 seconds.
+
+        Args:
+            subreddit: Subreddit name (without r/)
+            after_ts: Unix timestamp to start from (0 = beginning of time)
+            before_ts: Unix timestamp to stop at (None = no upper limit)
+            progress_callback: Optional callable(fetched_count, message)
+
+        Returns:
+            List of dicts: [{id, title, created_utc, url, is_gallery}, ...]
+        """
+        import time
+        import urllib.request
+        import urllib.error
+
+        base_url = 'https://api.pullpush.io/reddit/search/submission/'
+        all_posts = []
+        current_after = after_ts
+        page = 0
+
+        while True:
+            params = (
+                f'subreddit={subreddit}'
+                f'&size=100'
+                f'&sort=asc'
+                f'&sort_type=created_utc'
+                f'&after={current_after}'
+            )
+            if before_ts is not None:
+                params += f'&before={before_ts}'
+
+            url = f'{base_url}?{params}'
+            page += 1
+
+            try:
+                req = urllib.request.Request(url, headers={
+                    'User-Agent': 'Mozilla/5.0 (compatible; media-downloader/1.0)'
+                })
+                with urllib.request.urlopen(req, timeout=30) as resp:
+                    data = json.loads(resp.read().decode())
+            except urllib.error.HTTPError as e:
+                if e.code == 429:
+                    self.log(f"Pullpush rate limited, waiting 5s...", 'warning')
+                    time.sleep(5)
+                    continue
+                self.log(f"Pullpush HTTP {e.code} for r/{subreddit}: {e}", 'error')
+                break
+            except Exception as e:
+                self.log(f"Pullpush request failed for r/{subreddit}: {e}", 'error')
+                break
+
+            posts = data.get('data', [])
+            if not posts:
+                break
+
+            for post in posts:
+                all_posts.append({
+                    'id': post.get('id', ''),
+                    'title': post.get('title', ''),
+                    'created_utc': post.get('created_utc', 0),
+                    'url': post.get('url', ''),
+                    'is_gallery': post.get('is_gallery', False),
+                    'selftext': post.get('selftext', ''),
+                })
+
+            last_ts = posts[-1].get('created_utc', 0)
+
+            if progress_callback:
+                progress_callback(len(all_posts),
+                    f"Fetched {len(all_posts)} post IDs (page {page})")
+
+            # Handle stuck pagination — same timestamp repeating
+            if last_ts <= current_after:
+                current_after = last_ts + 1
+            else:
+                current_after = last_ts
+
+            # If we got fewer than 100, we've reached the end
+            if len(posts) < 100:
+                break
+
+            # Rate limit: 2s between requests
+            time.sleep(2)
+
+        self.log(f"Pullpush: fetched {len(all_posts)} total post IDs for r/{subreddit}", 'info')
+        return all_posts
+
+    def run_gallery_dl_urls(self, urls_file: str, temp_dir: str,
+                            progress_callback=None, batch_callback=None,
+                            batch_size: int = 50) -> dict:
+        """Run gallery-dl with --input-file to download specific Reddit post URLs.
+
+        Same streaming/batch pattern as run_gallery_dl() but reads URLs from a file
+        instead of scraping a subreddit listing.
+
+        Args:
+            urls_file: Path to file containing one URL per line
+            temp_dir: Directory for gallery-dl to download into
+            progress_callback: Called with (dl_count, skip_count, total_seen)
+            batch_callback: Called with (new_files: List[Path]) every batch_size files
+            batch_size: How many files to accumulate before calling batch_callback
+
+        Returns:
+            Dict with dl_count, skip_count, total.
+        """
+        import time
+
+        # Same archive as normal Reddit paid content sync
+        archive_dir = '/opt/media-downloader/data/cache'
+        os.makedirs(archive_dir, exist_ok=True)
+        archive_path = os.path.join(archive_dir, 'reddit_paid_gallery_dl_archive.db')
+
+        cmd = [
+            self.gallery_dl_path,
+            '--write-metadata',
+            '--download-archive', archive_path,
+            '-d', temp_dir,
+            '-o', 'extractor.reddit.api=rest',
+            '--input-file', urls_file,
+        ]
+
+        # Check for Reddit cookies file
+        cookies_file = self._get_cookies_file()
+        if cookies_file:
+            temp_cookie_file = os.path.join(temp_dir, '.cookies.txt')
+            if self._write_netscape_cookie_file(cookies_file, temp_cookie_file):
+                cmd.extend(['--cookies', temp_cookie_file])
+
+        self.log(f"Running gallery-dl with input file ({urls_file})", 'info')
+        self.log(f"Command: {' '.join(cmd)}", 'debug')
+
+        dl_count = 0
+        skip_count = 0
+        pending_files = []
+
+        try:
+            proc = subprocess.Popen(
+                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+            )
+
+            start_time = time.time()
+            timeout_secs = 14400  # 4 hours for backfill (can be large)
+
+            while True:
+                if time.time() - start_time > timeout_secs:
+                    proc.kill()
+                    self.log("gallery-dl backfill timed out", 'error')
+                    break
+
+                line = proc.stdout.readline()
+                if not line and proc.poll() is not None:
+                    break
+                if not line:
+                    continue
+
+                line = line.strip()
+                if not line:
+                    continue
+
+                if line.startswith('# '):
+                    skip_count += 1
+                else:
+                    dl_count += 1
+                    file_path = Path(line)
+                    if file_path.exists() and not file_path.name.endswith('.json'):
+                        pending_files.append(file_path)
+
+                total = dl_count + skip_count
+                if progress_callback:
+                    progress_callback(dl_count, skip_count, total)
+
+                if batch_callback and len(pending_files) >= batch_size:
+                    batch_callback(list(pending_files))
+                    pending_files.clear()
+
+            proc.wait()
+
+            # Final batch
+            if batch_callback and pending_files:
+                batch_callback(list(pending_files))
+                pending_files.clear()
+
+            if progress_callback:
+                progress_callback(dl_count, skip_count, dl_count + skip_count)
+
+            returncode = proc.returncode
+            if returncode not in (None, 0, 1, 4, 5):
+                stderr = proc.stderr.read()
+                self.log(f"gallery-dl backfill returned code {returncode}", 'warning')
+                if stderr:
+                    self.log(f"gallery-dl stderr: {stderr[:500]}", 'debug')
+
+        except Exception as e:
+            self.log(f"gallery-dl backfill failed: {e}", 'error')
+
+        self.log(f"gallery-dl backfill done: {dl_count} downloaded, {skip_count} skipped", 'info')
+        return {'dl_count': dl_count, 'skip_count': skip_count, 'total': dl_count + skip_count}
+
+    @staticmethod
+    def _detect_file_type(ext: str) -> str:
+        """Detect file type from extension."""
+        ext = ext.lower().lstrip('.')
+        image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
+        video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv', 'mpeg', 'mpg'}
+
+        if ext in image_exts:
+            return 'image'
+        elif ext in video_exts:
+            return 'video'
+        return 'unknown'
--- a/modules/paid_content/scraper.py
+++ b/modules/paid_content/scraper.py
--- a/modules/paid_content/snapchat_client.py
+++ b/modules/paid_content/snapchat_client.py
@@ -0,0 +1,259 @@
+"""
+Snapchat Client for Paid Content - Wraps SnapchatClientDownloader for paid content system.
+
+Maps spotlights and highlights to the Post/Attachment model used by the paid content scraper.
+"""
+
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+class SnapchatPaidContentClient(LoggingMixin):
+    """
+    Client for fetching Snapchat creator content via the existing SnapchatClientDownloader.
+
+    Each spotlight/highlight collection maps to one Post with snaps as Attachments.
+    """
+
+    SERVICE_ID = 'snapchat'
+    PLATFORM = 'snapchat'
+
+    def __init__(self, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Snapchat')
+        self.unified_db = unified_db
+        self._downloader = None
+
+    def _get_downloader(self):
+        """Lazy-init the underlying SnapchatClientDownloader."""
+        if self._downloader is None:
+            from modules.snapchat_client_module import SnapchatClientDownloader
+            self._downloader = SnapchatClientDownloader(
+                show_progress=False,
+                use_database=False,
+                log_callback=self.log_callback,
+                unified_db=self.unified_db,
+            )
+        return self._downloader
+
+    def get_creator_info(self, username: str) -> Optional[Dict]:
+        """Get creator information from profile page __NEXT_DATA__.
+
+        Returns dict with display_name and avatar_url if found.
+        """
+        downloader = self._get_downloader()
+
+        profile_url = f"https://story.snapchat.com/@{username}"
+        html = downloader._fetch_page(profile_url)
+        if not html:
+            return {'creator_id': username, 'creator_name': username}
+
+        data = downloader._extract_next_data(html)
+        display_name = username
+        avatar_url = None
+
+        if data:
+            props = data.get('props', {}).get('pageProps', {})
+
+            # userProfile uses a $case/userInfo wrapper
+            user_profile = props.get('userProfile', {})
+            user_info = user_profile.get('userInfo', {})
+            if user_info:
+                name = user_info.get('displayName', '').strip()
+                if name:
+                    display_name = name
+
+                # Bitmoji 3D avatar URL (best quality)
+                bitmoji = user_info.get('bitmoji3d') or {}
+                if isinstance(bitmoji, dict):
+                    avatar_url = bitmoji.get('avatarUrl') or bitmoji.get('url')
+
+            # linkPreview OG images as avatar (preview/square.jpeg — good quality)
+            if not avatar_url:
+                link_preview = props.get('linkPreview', {})
+                for img_key in ('facebookImage', 'twitterImage'):
+                    img = link_preview.get(img_key, {})
+                    if isinstance(img, dict) and img.get('url'):
+                        avatar_url = img['url']
+                        break
+
+            # pageMetadata.pageTitle sometimes has the display name
+            if display_name == username:
+                page_meta = props.get('pageMetadata', {})
+                page_title = page_meta.get('pageTitle', '')
+                # Format: "DisplayName (@username) | Snapchat..."
+                if page_title and '(@' in page_title:
+                    name_part = page_title.split('(@')[0].strip()
+                    if name_part:
+                        display_name = name_part
+
+        return {
+            'creator_id': username,
+            'creator_name': display_name,
+            'profile_image_url': avatar_url,
+        }
+
+    def get_creator(self, username: str) -> Optional[Creator]:
+        """Get Creator model for a Snapchat user."""
+        info = self.get_creator_info(username)
+        if not info:
+            return None
+
+        return Creator(
+            creator_id=username,
+            service_id=self.SERVICE_ID,
+            platform=self.PLATFORM,
+            username=info.get('creator_name', username),
+            display_name=info.get('creator_name'),
+            profile_image_url=info.get('profile_image_url'),
+        )
+
+    def get_posts(self, username: str, since_date: str = None) -> List[Post]:
+        """Fetch spotlights and highlights as Post objects.
+
+        Args:
+            username: Snapchat username (without @)
+            since_date: ISO date string; skip snaps older than this
+
+        Returns:
+            List of Post objects (one per spotlight/highlight collection)
+        """
+        downloader = self._get_downloader()
+
+        # Parse cutoff date
+        cutoff_dt = None
+        if since_date:
+            try:
+                if 'T' in since_date:
+                    cutoff_dt = datetime.fromisoformat(since_date.replace('Z', '+00:00').replace('+00:00', ''))
+                else:
+                    cutoff_dt = datetime.strptime(since_date[:10], '%Y-%m-%d')
+            except (ValueError, IndexError):
+                pass
+
+        # Discover content from profile (spotlights, highlights, stories)
+        profile_content = downloader.get_profile_content(username)
+        self.log(f"Found {len(profile_content.get('spotlights', []))} spotlights, "
+                 f"{len(profile_content.get('highlight_collections', []))} highlights, "
+                 f"{'stories' if profile_content.get('story_collection') else 'no stories'} "
+                 f"for @{username}", 'info')
+
+        posts = []
+
+        # Process story snaps (inline from profile page — no extra HTTP requests)
+        story_collection = profile_content.get('story_collection')
+        if story_collection and story_collection.snaps:
+            post = self._collection_to_post(story_collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        # Process highlights (inline from profile page — no extra HTTP requests)
+        for collection in profile_content.get('highlight_collections', []):
+            post = self._collection_to_post(collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        # Process spotlights (still requires per-URL fetch for full metadata)
+        for url in profile_content.get('spotlights', []):
+            collection = downloader.get_spotlight_metadata(url)
+            if not collection:
+                continue
+            post = self._collection_to_post(collection, username, cutoff_dt)
+            if post and post.attachments:
+                posts.append(post)
+
+        self.log(f"Mapped {len(posts)} posts with attachments for @{username}", 'info')
+        return posts
+
+    def _collection_to_post(self, collection, username: str, cutoff_dt=None) -> Optional[Post]:
+        """Convert a SnapCollection to a Post with Attachments."""
+        if not collection.snaps:
+            return None
+
+        # Use the earliest snap timestamp as the post date
+        timestamps = [s.timestamp for s in collection.snaps if s.timestamp]
+        if timestamps:
+            earliest = min(timestamps)
+            published_at = earliest.strftime('%Y-%m-%d')
+        else:
+            published_at = None
+
+        # Skip if all snaps are older than cutoff
+        if cutoff_dt and timestamps:
+            latest = max(timestamps)
+            if latest < cutoff_dt:
+                return None
+
+        attachments = []
+        for snap in collection.snaps:
+            if not snap.media_url:
+                continue
+
+            # Determine extension from media type
+            ext = '.mp4' if snap.media_type == 'video' else '.jpg'
+            name = f"{snap.media_id}{ext}" if snap.media_id else f"snap_{snap.index}{ext}"
+
+            attachment = Attachment(
+                name=name,
+                file_type=snap.media_type,
+                extension=ext,
+                server_path=snap.media_url,
+                download_url=snap.media_url,
+                width=snap.width if snap.width else None,
+                height=snap.height if snap.height else None,
+                duration=snap.duration_ms // 1000 if snap.duration_ms else None,
+            )
+            attachments.append(attachment)
+
+        if not attachments:
+            return None
+
+        # Build content/title from collection metadata
+        title = collection.title or None
+        content = collection.title if collection.title else None
+
+        # Tag as spotlight or highlight
+        tag_name = collection.collection_type.title()  # "Spotlight" or "Highlight"
+
+        return Post(
+            post_id=collection.collection_id,
+            service_id=self.SERVICE_ID,
+            platform=self.PLATFORM,
+            creator_id=username,
+            title=title,
+            content=content,
+            published_at=published_at,
+            attachments=attachments,
+            auto_tags=[tag_name],
+        )
+
+    def download_snap(self, media_url: str, output_path: str) -> bool:
+        """Download a single snap file via curl_cffi.
+
+        Args:
+            media_url: Direct URL to the media file
+            output_path: Local path to save the file
+
+        Returns:
+            True if download succeeded
+        """
+        import os
+        downloader = self._get_downloader()
+        session = downloader._get_session()
+
+        try:
+            url = media_url.replace('&amp;', '&')
+            resp = session.get(url, timeout=60)
+            if resp.status_code == 200 and len(resp.content) > 0:
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                with open(output_path, 'wb') as f:
+                    f.write(resp.content)
+                return True
+            else:
+                self.log(f"Download failed: HTTP {resp.status_code}, size={len(resp.content)}", 'warning')
+                return False
+        except Exception as e:
+            self.log(f"Download error: {e}", 'error')
+            return False
--- a/modules/paid_content/soundgasm_client.py
+++ b/modules/paid_content/soundgasm_client.py
@@ -0,0 +1,508 @@
+"""
+Soundgasm + Liltsome Archive Client for Paid Content
+
+Handles:
+- Soundgasm profile scraping (no auth/Cloudflare needed)
+- Liltsome archive (liltsome.yerf.org) as supplementary source
+- Bracket tag parsing from audio titles: [F4M] [Whisper] etc.
+- Direct HTTP audio downloads (.m4a)
+"""
+
+import asyncio
+import json
+import os
+import re
+from pathlib import Path
+from typing import Dict, List, Optional, Set, Tuple
+from urllib.parse import quote
+
+import aiohttp
+import aiofiles
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+# ---------------------------------------------------------------------------
+# Bracket tag helpers
+# ---------------------------------------------------------------------------
+
+def parse_bracket_tags(title: str) -> Tuple[str, List[str]]:
+    """Extract [bracket] tags from a title, normalize, return (clean_title, tags)."""
+    tags = re.findall(r'\[([^\]]+)\]', title)
+    clean_title = re.sub(r'\s*\[[^\]]+\]\s*', ' ', title).strip()
+    normalized: List[str] = []
+    seen: Set[str] = set()
+    for tag in tags:
+        tag_lower = tag.strip().lower()
+        if tag_lower and tag_lower not in seen:
+            seen.add(tag_lower)
+            normalized.append(tag_lower)
+    return clean_title, normalized
+
+
+def format_tag_display(tag_lower: str) -> str:
+    """Format a normalized lowercase tag for display.
+
+    Gender tags (f4m, m4f, f4a …) → uppercase.
+    Everything else → title case.
+    """
+    if re.match(r'^[a-z]+\d[a-z]+$', tag_lower):
+        return tag_lower.upper()
+    return tag_lower.title()
+
+
+# ---------------------------------------------------------------------------
+# SoundgasmClient
+# ---------------------------------------------------------------------------
+
+class SoundgasmClient(LoggingMixin):
+    """Client for fetching audio from Soundgasm and the Liltsome archive."""
+
+    SERVICE_ID = 'soundgasm'
+    PLATFORM = 'soundgasm'
+
+    SOUNDGASM_BASE = 'https://soundgasm.net'
+    LILTSOME_BASE = 'https://liltsome.yerf.org'
+    LILTSOME_LIBRARY_URL = f'{LILTSOME_BASE}/data/library.json'
+    LILTSOME_CACHE_PATH = Path('/opt/media-downloader/data/liltsome_library.json')
+    LILTSOME_ETAG_PATH = Path('/opt/media-downloader/data/liltsome_library.json.etag')
+
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                       '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.9',
+    }
+
+    def __init__(self, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='Soundgasm')
+        self._liltsome_data: Optional[Dict] = None  # cached in-memory per sync run
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    async def get_profile_info(self, username: str) -> Optional[Dict]:
+        """Return basic profile info (post count) from Soundgasm and/or Liltsome."""
+        post_count = 0
+        source = None
+
+        # Try Soundgasm profile page first
+        try:
+            sg_posts = await self._fetch_soundgasm_profile(username)
+            if sg_posts is not None:
+                post_count = len(sg_posts)
+                source = 'soundgasm'
+        except Exception as e:
+            self.log(f"Soundgasm profile fetch failed for {username}: {e}", 'debug')
+
+        # Also check Liltsome for additional posts
+        try:
+            lt_entries = await self._get_liltsome_entries(username)
+            if lt_entries:
+                post_count = max(post_count, len(lt_entries))
+                if source is None:
+                    source = 'liltsome'
+        except Exception as e:
+            self.log(f"Liltsome lookup failed for {username}: {e}", 'debug')
+
+        if post_count == 0 and source is None:
+            return None
+
+        return {
+            'username': username,
+            'post_count': post_count,
+            'source': source,
+        }
+
+    async def get_posts(self, username: str, known_post_ids: Optional[Set[str]] = None,
+                        progress_callback=None) -> List[Post]:
+        """Fetch posts from both Soundgasm and Liltsome, deduplicating by post_id."""
+        known = known_post_ids or set()
+        posts: List[Post] = []
+        seen_ids: Set[str] = set(known)
+
+        # 1. Soundgasm (may fail if account deleted — that's OK)
+        try:
+            sg_posts = await self._fetch_soundgasm_posts(username, seen_ids)
+            for p in sg_posts:
+                if p.post_id not in seen_ids:
+                    seen_ids.add(p.post_id)
+                    posts.append(p)
+            self.log(f"Soundgasm: {len(sg_posts)} new posts for {username}", 'info')
+        except Exception as e:
+            self.log(f"Soundgasm fetch failed for {username} (account may be deleted): {e}", 'warning')
+
+        if progress_callback:
+            progress_callback(len(posts))
+
+        # 2. Liltsome archive (always)
+        try:
+            lt_posts = await self._fetch_liltsome_posts(username, seen_ids)
+            for p in lt_posts:
+                if p.post_id not in seen_ids:
+                    seen_ids.add(p.post_id)
+                    posts.append(p)
+            self.log(f"Liltsome: {len(lt_posts)} new posts for {username}", 'info')
+        except Exception as e:
+            self.log(f"Liltsome fetch failed for {username}: {e}", 'warning')
+
+        if progress_callback:
+            progress_callback(len(posts))
+
+        return posts
+
+    async def download_audio(self, download_url: str, output_path: Path) -> Dict:
+        """Download an audio file via direct HTTP GET."""
+        try:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            timeout = aiohttp.ClientTimeout(total=300)
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.get(download_url, headers=self.HEADERS) as resp:
+                    if resp.status != 200:
+                        return {'success': False, 'error': f'HTTP {resp.status}'}
+
+                    async with aiofiles.open(str(output_path), 'wb') as f:
+                        total = 0
+                        async for chunk in resp.content.iter_chunked(65536):
+                            await f.write(chunk)
+                            total += len(chunk)
+
+            return {
+                'success': True,
+                'file_path': str(output_path),
+                'file_size': total,
+            }
+
+        except Exception as e:
+            self.log(f"Download failed for {download_url}: {e}", 'error')
+            return {'success': False, 'error': str(e)}
+
+    # ------------------------------------------------------------------
+    # Soundgasm scraping
+    # ------------------------------------------------------------------
+
+    async def _fetch_soundgasm_profile(self, username: str) -> Optional[List[Dict]]:
+        """Scrape the Soundgasm profile page, return list of {slug, title, plays}."""
+        url = f'{self.SOUNDGASM_BASE}/u/{username}'
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(url, headers=self.HEADERS) as resp:
+                if resp.status == 404:
+                    return None
+                if resp.status != 200:
+                    self.log(f"Soundgasm profile returned {resp.status}", 'warning')
+                    return None
+                html = await resp.text()
+
+        # Parse .sound-details divs for links
+        entries: List[Dict] = []
+        # Pattern: <a href="https://soundgasm.net/u/{username}/{slug}">title</a>
+        # (profile page uses absolute URLs)
+        for m in re.finditer(
+            r'<a\s+href="(?:https?://soundgasm\.net)?/u/' + re.escape(username) + r'/([^"]+)"[^>]*>\s*([^<]+)',
+            html, re.IGNORECASE
+        ):
+            slug = m.group(1).strip()
+            title = m.group(2).strip()
+            entries.append({'slug': slug, 'title': title})
+
+        return entries
+
+    async def _fetch_soundgasm_posts(self, username: str, seen_ids: Set[str]) -> List[Post]:
+        """Fetch full post details from Soundgasm for new posts."""
+        profile_entries = await self._fetch_soundgasm_profile(username)
+        if not profile_entries:
+            return []
+
+        posts: List[Post] = []
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            for entry in profile_entries:
+                slug = entry['slug']
+                if slug in seen_ids:
+                    continue
+
+                try:
+                    detail = await self._fetch_soundgasm_detail(session, username, slug)
+                    if detail is None:
+                        continue
+
+                    title_raw = detail.get('title', entry.get('title', slug))
+                    clean_title, tags = parse_bracket_tags(title_raw)
+                    description = detail.get('description', '')
+                    audio_url = detail.get('audio_url')
+
+                    if not audio_url:
+                        continue
+
+                    # Determine extension from URL
+                    ext = '.m4a'
+                    if audio_url:
+                        url_path = audio_url.split('?')[0]
+                        if '.' in url_path.split('/')[-1]:
+                            ext = '.' + url_path.split('/')[-1].rsplit('.', 1)[1]
+
+                    filename = f"{slug}{ext}"
+
+                    attachment = Attachment(
+                        name=filename,
+                        file_type='audio',
+                        extension=ext.lstrip('.'),
+                        server_path=f'/u/{username}/{slug}',
+                        download_url=audio_url,
+                    )
+
+                    post = Post(
+                        post_id=slug,
+                        service_id='soundgasm',
+                        platform='soundgasm',
+                        creator_id=username,
+                        title=clean_title or None,
+                        content=description or None,
+                        published_at=None,  # Soundgasm has no dates
+                        attachments=[attachment],
+                        auto_tags=tags,
+                    )
+                    posts.append(post)
+
+                except Exception as e:
+                    self.log(f"Error fetching Soundgasm detail for {slug}: {e}", 'debug')
+
+        return posts
+
+    async def _fetch_soundgasm_detail(self, session: aiohttp.ClientSession,
+                                       username: str, slug: str) -> Optional[Dict]:
+        """Fetch a single Soundgasm audio detail page and extract metadata."""
+        url = f'{self.SOUNDGASM_BASE}/u/{username}/{slug}'
+
+        async with session.get(url, headers=self.HEADERS) as resp:
+            if resp.status != 200:
+                return None
+            html = await resp.text()
+
+        # Title: <div aria-label="title"...>Title Text</div>
+        # or from the page title tag
+        title = None
+        title_match = re.search(r'aria-label="title"[^>]*>([^<]+)', html)
+        if title_match:
+            title = title_match.group(1).strip()
+        if not title:
+            title_match = re.search(r'<title>([^<]+)</title>', html, re.IGNORECASE)
+            if title_match:
+                title = title_match.group(1).strip()
+                # Remove " - Soundgasm" suffix if present
+                title = re.sub(r'\s*[-–—]\s*Soundgasm.*$', '', title, flags=re.IGNORECASE).strip()
+
+        # Description: <div class="jp-description">...</div>
+        description = None
+        desc_match = re.search(r'class="jp-description"[^>]*>(.*?)</div>', html, re.DOTALL)
+        if desc_match:
+            desc_html = desc_match.group(1)
+            # Strip HTML tags
+            description = re.sub(r'<br\s*/?>', '\n', desc_html)
+            description = re.sub(r'<[^>]+>', '', description).strip()
+
+        # Audio URL: m4a: "https://..."
+        audio_url = None
+        audio_match = re.search(r'm4a:\s*"([^"]+)"', html)
+        if audio_match:
+            audio_url = audio_match.group(1)
+
+        if not audio_url:
+            return None
+
+        return {
+            'title': title or slug,
+            'description': description,
+            'audio_url': audio_url,
+        }
+
+    # ------------------------------------------------------------------
+    # Liltsome archive
+    # ------------------------------------------------------------------
+
+    async def _ensure_liltsome_cache(self) -> bool:
+        """Download/refresh the Liltsome library.json using ETag-based invalidation.
+
+        Returns True if cache is available (fresh or existing), False otherwise.
+        """
+        etag_file = self.LILTSOME_ETAG_PATH
+        cache_file = self.LILTSOME_CACHE_PATH
+
+        stored_etag = None
+        if etag_file.exists():
+            try:
+                stored_etag = etag_file.read_text().strip()
+            except Exception:
+                pass
+
+        timeout = aiohttp.ClientTimeout(total=600)  # 131MB can take a while
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                # HEAD request to check ETag
+                async with session.head(self.LILTSOME_LIBRARY_URL, headers=self.HEADERS) as resp:
+                    if resp.status != 200:
+                        self.log(f"Liltsome HEAD returned {resp.status}", 'warning')
+                        return cache_file.exists()
+
+                    remote_etag = resp.headers.get('ETag', '').strip()
+
+                if stored_etag and remote_etag and stored_etag == remote_etag and cache_file.exists():
+                    self.log("Liltsome cache is fresh (ETag match)", 'debug')
+                    return True
+
+                # Download the full library
+                self.log("Downloading Liltsome library.json (this may take a while)...", 'info')
+                async with session.get(self.LILTSOME_LIBRARY_URL, headers=self.HEADERS) as resp:
+                    if resp.status != 200:
+                        self.log(f"Liltsome GET returned {resp.status}", 'warning')
+                        return cache_file.exists()
+
+                    cache_file.parent.mkdir(parents=True, exist_ok=True)
+                    async with aiofiles.open(str(cache_file), 'wb') as f:
+                        async for chunk in resp.content.iter_chunked(262144):
+                            await f.write(chunk)
+
+                    new_etag = resp.headers.get('ETag', remote_etag or '').strip()
+
+                if new_etag:
+                    etag_file.write_text(new_etag)
+
+                self.log("Liltsome library.json downloaded successfully", 'info')
+                self._liltsome_data = None  # force re-parse
+                return True
+
+        except Exception as e:
+            self.log(f"Failed to refresh Liltsome cache: {e}", 'warning')
+            return cache_file.exists()
+
+    async def _load_liltsome_data(self) -> Optional[Dict]:
+        """Load and cache the Liltsome library data in memory."""
+        if self._liltsome_data is not None:
+            return self._liltsome_data
+
+        cache_file = self.LILTSOME_CACHE_PATH
+        if not cache_file.exists():
+            return None
+
+        try:
+            data = await asyncio.to_thread(self._read_liltsome_json, cache_file)
+            self._liltsome_data = data
+            return data
+        except Exception as e:
+            self.log(f"Failed to parse Liltsome library.json: {e}", 'error')
+            return None
+
+    @staticmethod
+    def _read_liltsome_json(path: Path) -> Dict:
+        """Read and parse the Liltsome JSON file (blocking, run in thread)."""
+        with open(path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    async def _get_liltsome_entries(self, username: str) -> Optional[List[Dict]]:
+        """Find artist entries in Liltsome data by username (case-insensitive).
+
+        library.json structure: {"artists": [{"id": "name", "files": {"audio": [...]}}]}
+        """
+        await self._ensure_liltsome_cache()
+        data = await self._load_liltsome_data()
+        if not data:
+            return None
+
+        username_lower = username.lower()
+
+        # Top-level is {"artists": [...]}
+        artists = data.get('artists', []) if isinstance(data, dict) else data
+
+        for artist in artists:
+            artist_id = str(artist.get('id', '')).lower()
+            artist_name = str(artist.get('name', '')).lower()
+            if artist_id == username_lower or artist_name == username_lower:
+                # Audio entries are in files.audio
+                files = artist.get('files', {})
+                if isinstance(files, dict):
+                    return files.get('audio', [])
+                return []
+
+        return None
+
+    async def _fetch_liltsome_posts(self, username: str, seen_ids: Set[str]) -> List[Post]:
+        """Convert Liltsome archive entries to Post objects."""
+        entries = await self._get_liltsome_entries(username)
+        if not entries:
+            return []
+
+        posts: List[Post] = []
+        for entry in entries:
+            filename = entry.get('filename', '')
+            path = entry.get('path', '')
+            title_raw = entry.get('title', filename)
+            entry_tags = entry.get('tags', [])  # already lowercase in Liltsome
+            duration = None
+            file_size = entry.get('size')
+
+            if isinstance(entry.get('metadata'), dict):
+                duration = entry['metadata'].get('duration')
+
+            # Build post_id: prefix with liltsome- to avoid collision
+            sanitized_name = re.sub(r'[^a-zA-Z0-9_.-]', '_', filename) if filename else path
+            post_id = f'liltsome-{sanitized_name}'
+
+            if post_id in seen_ids:
+                continue
+
+            # Parse bracket tags from title for clean_title
+            clean_title, title_tags = parse_bracket_tags(title_raw)
+
+            # Merge: use Liltsome's pre-parsed tags + any extra from title
+            all_tags_set: Set[str] = set()
+            all_tags: List[str] = []
+            for t in entry_tags:
+                t_lower = t.strip().lower()
+                if t_lower and t_lower not in all_tags_set:
+                    all_tags_set.add(t_lower)
+                    all_tags.append(t_lower)
+            for t in title_tags:
+                if t not in all_tags_set:
+                    all_tags_set.add(t)
+                    all_tags.append(t)
+
+            # Build download URL
+            download_url = f'{self.LILTSOME_BASE}/audio_files/{quote(path, safe="/")}' if path else None
+
+            # Determine extension
+            ext = 'm4a'
+            if filename and '.' in filename:
+                ext = filename.rsplit('.', 1)[1].lower()
+            elif path and '.' in path:
+                ext = path.rsplit('.', 1)[1].lower()
+
+            attachment = Attachment(
+                name=f"{sanitized_name}.{ext}" if not filename.endswith(f'.{ext}') else filename,
+                file_type='audio',
+                extension=ext,
+                server_path=path or filename,
+                download_url=download_url,
+                file_size=file_size,
+                duration=duration,
+            )
+
+            post = Post(
+                post_id=post_id,
+                service_id='soundgasm',
+                platform='soundgasm',
+                creator_id=username,
+                title=clean_title or None,
+                content=None,
+                published_at=None,
+                attachments=[attachment],
+                auto_tags=all_tags,
+            )
+            posts.append(post)
+
+        return posts
--- a/modules/paid_content/tiktok_client.py
+++ b/modules/paid_content/tiktok_client.py
@@ -0,0 +1,827 @@
+"""
+TikTok Client for Paid Content - Uses yt-dlp for listing and gallery-dl for downloading
+
+Adapts the hybrid approach from modules/tiktok_module.py into the paid content client pattern.
+"""
+
+import asyncio
+import html as html_module
+import json
+import os
+import re
+import subprocess
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import aiohttp
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+class TikTokClient(LoggingMixin):
+    """
+    Client for fetching TikTok creator information and videos.
+
+    Uses yt-dlp for listing (fast flat-playlist) and gallery-dl for downloading
+    (handles carousels/slideshows properly).
+    """
+
+    SERVICE_ID = 'tiktok'
+    PLATFORM = 'tiktok'
+
+    def __init__(self, unified_db=None, log_callback=None):
+        self._init_logger('PaidContent', log_callback, default_module='TikTok')
+
+        self.ytdlp_path = self._find_executable('yt-dlp')
+        self.gallery_dl_path = self._find_executable('gallery-dl')
+        self.unified_db = unified_db
+        self._cookies_file = None
+        self._last_pinned_posts = {}
+
+        if not self.ytdlp_path:
+            self.log("yt-dlp not found, TikTok listing will be disabled", 'warning')
+        if not self.gallery_dl_path:
+            self.log("gallery-dl not found, TikTok downloading will be disabled", 'warning')
+
+    def _find_executable(self, name: str) -> Optional[str]:
+        """Find an executable by name"""
+        common_paths = [
+            f'/opt/media-downloader/venv/bin/{name}',
+            f'/usr/local/bin/{name}',
+            f'/usr/bin/{name}',
+            f'/opt/homebrew/bin/{name}',
+            os.path.expanduser(f'~/.local/bin/{name}'),
+        ]
+
+        for path in common_paths:
+            if os.path.isfile(path) and os.access(path, os.X_OK):
+                return path
+
+        try:
+            result = subprocess.run(['which', name], capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except Exception:
+            pass
+
+        return None
+
+    def is_available(self) -> bool:
+        """Check if both yt-dlp and gallery-dl are available"""
+        return self.ytdlp_path is not None and self.gallery_dl_path is not None
+
+    def cleanup(self):
+        """Clean up any temporary files"""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            try:
+                os.unlink(self._cookies_file)
+            except Exception:
+                pass
+
+    def _get_cookies_file(self) -> Optional[str]:
+        """Get path to cookies file, creating from database if needed."""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            return self._cookies_file
+
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                # Check for tiktok scraper cookies
+                for scraper_id in ('tiktok', 'tiktok_client'):
+                    cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?", (scraper_id,))
+                    row = cursor.fetchone()
+                    if row and row[0]:
+                        data = json.loads(row[0])
+                        if isinstance(data, dict) and 'cookies' in data:
+                            cookies_list = data['cookies']
+                        elif isinstance(data, list):
+                            cookies_list = data
+                        else:
+                            cookies_list = []
+
+                        if cookies_list:
+                            import tempfile
+                            fd, self._cookies_file = tempfile.mkstemp(suffix='.txt', prefix='tiktok_cookies_')
+                            with os.fdopen(fd, 'w') as f:
+                                f.write("# Netscape HTTP Cookie File\n")
+                                for cookie in cookies_list:
+                                    domain = cookie.get('domain', '')
+                                    include_subdomains = 'TRUE' if domain.startswith('.') else 'FALSE'
+                                    path = cookie.get('path', '/')
+                                    secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
+                                    expiry = str(int(cookie.get('expirationDate', 0)))
+                                    name = cookie.get('name', '')
+                                    value = cookie.get('value', '')
+                                    f.write(f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expiry}\t{name}\t{value}\n")
+                            self.log(f"Loaded {len(cookies_list)} TikTok cookies", 'debug')
+                            return self._cookies_file
+        except Exception as e:
+            self.log(f"Could not load TikTok cookies: {e}", 'debug')
+
+        return None
+
+    def _save_cookies_back(self):
+        """Read updated cookies from temp file and save back to database.
+        yt-dlp and gallery-dl update the cookies file with refreshed tokens
+        from TikTok (e.g. msToken), so we need to persist those changes."""
+        if not self._cookies_file or not os.path.exists(self._cookies_file):
+            return
+        if not self.unified_db:
+            return
+
+        try:
+            import http.cookiejar
+            jar = http.cookiejar.MozillaCookieJar(self._cookies_file)
+            jar.load(ignore_discard=True, ignore_expires=True)
+
+            updated_cookies = []
+            for cookie in jar:
+                updated_cookies.append({
+                    'name': cookie.name,
+                    'value': cookie.value,
+                    'domain': cookie.domain,
+                    'path': cookie.path,
+                    'secure': cookie.secure,
+                    'expirationDate': cookie.expires or 0,
+                })
+
+            if not updated_cookies:
+                return
+
+            # Merge updated cookies back to DB
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?", ('tiktok',))
+                row = cursor.fetchone()
+
+            if row and row[0]:
+                existing_data = json.loads(row[0])
+                existing_cookies = existing_data if isinstance(existing_data, list) else existing_data.get('cookies', [])
+                # Merge: updated cookies override existing by name+domain
+                cookie_map = {(c.get('name'), c.get('domain')): c for c in existing_cookies}
+                for c in updated_cookies:
+                    cookie_map[(c['name'], c['domain'])] = c
+                final_cookies = list(cookie_map.values())
+            else:
+                final_cookies = updated_cookies
+
+            self.unified_db.save_scraper_cookies('tiktok', final_cookies, merge=False)
+            self.log(f"Saved {len(final_cookies)} refreshed cookies back to DB", 'debug')
+
+            # Clear cached file so next use gets fresh cookies from DB
+            self._cookies_file = None
+        except Exception as e:
+            self.log(f"Failed to save cookies back: {e}", 'debug')
+
+    def _get_base_cmd(self) -> List[str]:
+        """Get base yt-dlp command with cookies if available."""
+        cmd = [self.ytdlp_path]
+        cookies_file = self._get_cookies_file()
+        if cookies_file:
+            cmd.extend(['--cookies', cookies_file])
+        return cmd
+
+    @staticmethod
+    def extract_username(url: str) -> Optional[str]:
+        """Extract username from TikTok URL"""
+        match = re.search(r'tiktok\.com/@([a-zA-Z0-9_.]+)', url)
+        if match:
+            return match.group(1)
+        return None
+
+    @staticmethod
+    def normalize_creator_url(username: str) -> str:
+        """Convert username to a consistent URL format"""
+        if username.startswith('http://') or username.startswith('https://'):
+            return username
+        username = username.lstrip('@')
+        return f"https://www.tiktok.com/@{username}"
+
+    async def _resolve_channel_id(self, username: str) -> Optional[str]:
+        """Resolve a TikTok username to a channel_id (secUid).
+
+        When yt-dlp can't extract the secondary user ID from the profile page,
+        we try to find a video URL from TikTok's embed/RSS and then extract
+        the channel_id (secUid) from that video's metadata via yt-dlp.
+        """
+        if not self.ytdlp_path:
+            return None
+
+        try:
+            # Step 1: Get a video URL from this user via the oembed embed HTML
+            video_url = None
+            async with aiohttp.ClientSession() as session:
+                # The oembed HTML often contains a video ID we can use
+                oembed_url = f"https://www.tiktok.com/oembed?url=https://www.tiktok.com/@{username}"
+                async with session.get(oembed_url, timeout=aiohttp.ClientTimeout(total=15)) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        embed_html = data.get('html', '')
+                        # Extract video URL from embed iframe
+                        match = re.search(r'cite="(https://www\.tiktok\.com/@[^"]+/video/\d+)"', embed_html)
+                        if not match:
+                            match = re.search(r'data-video-id="(\d+)"', embed_html)
+                            if match:
+                                video_url = f"https://www.tiktok.com/@{username}/video/{match.group(1)}"
+                        else:
+                            video_url = match.group(1)
+
+                        if not video_url:
+                            # oembed thumbnail_url sometimes contains the video ID
+                            thumb = data.get('thumbnail_url', '')
+                            vid_match = re.search(r'/video/(\d+)', thumb)
+                            if vid_match:
+                                video_url = f"https://www.tiktok.com/@{username}/video/{vid_match.group(1)}"
+
+            if not video_url:
+                # Step 1b: Check if we have any existing video URLs in the database
+                if self.unified_db:
+                    try:
+                        with self.unified_db.get_connection() as conn:
+                            cursor = conn.cursor()
+                            cursor.execute("""
+                                SELECT a.download_url FROM paid_content_attachments a
+                                JOIN paid_content_posts p ON a.post_id = p.id
+                                JOIN paid_content_creators c ON p.creator_id = c.id
+                                WHERE c.username = ? AND a.download_url LIKE '%tiktok.com%'
+                                LIMIT 1
+                            """, (username,))
+                            row = cursor.fetchone()
+                            if row and row[0]:
+                                video_url = row[0]
+                    except Exception:
+                        pass
+
+            if not video_url:
+                self.log(f"No video URL found for @{username} to resolve channel_id", 'debug')
+                return None
+
+            # Step 2: Use yt-dlp to get the channel_id from the single video
+            self.log(f"Resolving channel_id from video: {video_url}", 'debug')
+            cmd = self._get_base_cmd() + [
+                '-j',
+                '--no-warnings',
+                '--no-download',
+                '--socket-timeout', '30',
+                video_url
+            ]
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            stdout, stderr = await result.communicate()
+
+            if result.returncode == 0:
+                for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                    if not line.strip():
+                        continue
+                    try:
+                        video_data = json.loads(line)
+                        channel_id = video_data.get('channel_id') or video_data.get('playlist_id')
+                        if channel_id:
+                            self.log(f"Resolved @{username} channel_id: {channel_id[:30]}...", 'info')
+                            return channel_id
+                    except json.JSONDecodeError:
+                        continue
+
+        except Exception as e:
+            self.log(f"Failed to resolve channel_id for @{username}: {e}", 'debug')
+
+        return None
+
+    async def get_creator_info(self, url: str) -> Optional[Dict]:
+        """Get creator information using yt-dlp + profile page scraping"""
+        username = self.extract_username(url)
+        if not username:
+            return None
+
+        profile_url = self.normalize_creator_url(username)
+        creator_name = username
+
+        # Try yt-dlp for display name from video metadata
+        if self.ytdlp_path:
+            try:
+                cmd = self._get_base_cmd() + [
+                    '--no-warnings',
+                    '--flat-playlist',
+                    '-j',
+                    '--playlist-items', '1',
+                    '--socket-timeout', '30',
+                    profile_url
+                ]
+
+                result = await asyncio.create_subprocess_exec(
+                    *cmd,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE
+                )
+
+                stdout, stderr = await result.communicate()
+
+                if result.returncode == 0:
+                    for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                        if not line:
+                            continue
+                        try:
+                            data = json.loads(line)
+                            creator_name = (data.get('channel') or data.get('uploader')
+                                            or data.get('playlist_title') or username)
+                            break
+                        except json.JSONDecodeError:
+                            continue
+                else:
+                    # Fallback: try tiktokuser: scheme if secondary user ID extraction fails
+                    err_text = stderr.decode('utf-8', errors='replace')
+                    if 'secondary user ID' in err_text or 'Unable to extract' in err_text:
+                        channel_id = await self._resolve_channel_id(username)
+                        if channel_id:
+                            fb_cmd = self._get_base_cmd() + [
+                                '--no-warnings', '--flat-playlist',
+                                '-j', '--playlist-items', '1', '--socket-timeout', '30',
+                                f"tiktokuser:{channel_id}"
+                            ]
+                            fb_result = await asyncio.create_subprocess_exec(
+                                *fb_cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+                            )
+                            fb_stdout, _ = await fb_result.communicate()
+                            if fb_result.returncode == 0:
+                                for line in fb_stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                                    if not line:
+                                        continue
+                                    try:
+                                        data = json.loads(line)
+                                        creator_name = (data.get('channel') or data.get('uploader')
+                                                        or data.get('playlist_title') or username)
+                                        break
+                                    except json.JSONDecodeError:
+                                        continue
+            except Exception as e:
+                self.log(f"Failed to get creator info via yt-dlp: {e}", 'debug')
+
+        # Scrape profile page for avatar and bio
+        profile_image = None
+        bio = None
+        try:
+            profile_image, bio, page_name = await self._scrape_profile_page(profile_url)
+            if page_name and creator_name == username:
+                creator_name = page_name
+        except Exception as e:
+            self.log(f"Failed to scrape profile page: {e}", 'debug')
+
+        return {
+            'creator_id': username,
+            'creator_name': creator_name,
+            'creator_url': profile_url,
+            'profile_image_url': profile_image,
+            'bio': bio,
+        }
+
+    async def _fetch_profile_with_cookies(self, url: str) -> Optional[str]:
+        """Fetch TikTok profile page using curl_cffi with cookies from database."""
+        cookies_file = self._get_cookies_file()
+        if not cookies_file:
+            return None
+
+        try:
+            from curl_cffi import requests as cf_requests
+            import http.cookiejar
+
+            # Load cookies from the Netscape file
+            jar = http.cookiejar.MozillaCookieJar(cookies_file)
+            jar.load(ignore_discard=True, ignore_expires=True)
+
+            # Try multiple browser versions for curl_cffi compatibility
+            for _browser in ("chrome136", "chrome131", "chrome"):
+                try:
+                    session = cf_requests.Session(impersonate=_browser)
+                    break
+                except Exception:
+                    continue
+            else:
+                session = cf_requests.Session()
+            for cookie in jar:
+                session.cookies.set(cookie.name, cookie.value, domain=cookie.domain)
+
+            resp = session.get(url, timeout=15)
+            if resp.status_code == 200 and 'avatarLarger' in resp.text:
+                self.log("Fetched TikTok profile with cookies (curl_cffi)", 'debug')
+                return resp.text
+            elif 'captcha' in resp.text.lower():
+                self.log("TikTok profile still returned captcha with cookies", 'debug')
+            session.close()
+        except Exception as e:
+            self.log(f"curl_cffi profile fetch failed: {e}", 'debug')
+
+        return None
+
+    async def _scrape_profile_page(self, url: str) -> tuple:
+        """
+        Scrape TikTok profile page for avatar and bio from embedded JSON data.
+        TikTok embeds user data in __UNIVERSAL_DATA_FOR_REHYDRATION__ script tag.
+        Returns (profile_image_url, bio, display_name).
+        """
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9',
+        }
+
+        profile_image = None
+        bio = None
+        display_name = None
+
+        try:
+            page_html = None
+            async with aiohttp.ClientSession() as session:
+                async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(total=15)) as resp:
+                    if resp.status == 200:
+                        page_html = await resp.text()
+
+            # If we got a captcha page, try curl_cffi with cookies
+            if not page_html or ('captcha' in page_html.lower() and 'avatarLarger' not in page_html):
+                page_html = await self._fetch_profile_with_cookies(url)
+                if not page_html:
+                    return (None, None, None)
+
+            # Try structured JSON first (__UNIVERSAL_DATA_FOR_REHYDRATION__)
+            rehydration_match = re.search(
+                r'<script[^>]*id="__UNIVERSAL_DATA_FOR_REHYDRATION__"[^>]*>(.*?)</script>',
+                page_html, re.DOTALL
+            )
+            if rehydration_match:
+                try:
+                    rdata = json.loads(rehydration_match.group(1))
+                    user_detail = (rdata.get('__DEFAULT_SCOPE__', {})
+                            .get('webapp.user-detail', {}))
+                    user = user_detail.get('userInfo', {}).get('user', {})
+                    if user:
+                        avatar_val = user.get('avatarLarger') or user.get('avatarMedium')
+                        if avatar_val and not avatar_val.endswith('.mp4'):
+                            profile_image = avatar_val
+                            self.log("Found TikTok profile avatar (rehydration)", 'debug')
+                        sig_val = user.get('signature', '')
+                        if sig_val and sig_val.strip():
+                            bio = sig_val.strip()
+                            self.log("Found TikTok bio (rehydration)", 'debug')
+                        nick_val = user.get('nickname')
+                        if nick_val:
+                            display_name = nick_val
+                            self.log(f"Found TikTok display name (rehydration): {display_name}", 'debug')
+
+                    # Extract pinned post IDs
+                    pinned_list = user_detail.get('pinnedList', [])
+                    if pinned_list:
+                        self._last_pinned_posts = {}
+                        for item in pinned_list:
+                            vid = str(item.get('id', ''))
+                            if vid:
+                                self._last_pinned_posts[vid] = {'pinned_at': None}
+                        if self._last_pinned_posts:
+                            self.log(f"Found {len(self._last_pinned_posts)} pinned TikTok posts", 'debug')
+                except (json.JSONDecodeError, KeyError):
+                    pass
+
+            # Fallback: regex extraction from raw HTML
+            # Use json.loads to decode values (handles \uXXXX, surrogate pairs, and raw UTF-8)
+            if not profile_image:
+                avatar_match = re.search(r'"avatarLarger":"([^"]+)"', page_html)
+                if not avatar_match:
+                    avatar_match = re.search(r'"avatarMedium":"([^"]+)"', page_html)
+                if avatar_match:
+                    try:
+                        avatar_url = json.loads(f'"{avatar_match.group(1)}"')
+                    except (json.JSONDecodeError, ValueError):
+                        avatar_url = avatar_match.group(1)
+                    if avatar_url and not avatar_url.endswith('.mp4'):
+                        profile_image = avatar_url
+                        self.log("Found TikTok profile avatar", 'debug')
+
+            if not bio:
+                sig_match = re.search(r'"signature":"([^"]*)"', page_html)
+                if sig_match:
+                    try:
+                        raw_bio = json.loads(f'"{sig_match.group(1)}"')
+                    except (json.JSONDecodeError, ValueError):
+                        raw_bio = sig_match.group(1)
+                    if raw_bio and raw_bio.strip():
+                        bio = raw_bio.strip()
+                        self.log("Found TikTok bio", 'debug')
+
+            if not display_name:
+                nick_match = re.search(r'"nickname":"([^"]+)"', page_html)
+                if nick_match:
+                    try:
+                        display_name = json.loads(f'"{nick_match.group(1)}"')
+                    except (json.JSONDecodeError, ValueError):
+                        display_name = nick_match.group(1)
+                    self.log(f"Found TikTok display name: {display_name}", 'debug')
+
+            # Extract banner/cover from "coverLarger" field
+            # (stored separately, not returned here but could be used later)
+
+        except asyncio.TimeoutError:
+            self.log("TikTok profile page request timed out", 'debug')
+        except Exception as e:
+            self.log(f"Error scraping TikTok profile: {e}", 'debug')
+
+        return (profile_image, bio, display_name)
+
+    async def get_creator_videos(self, url: str, since_date: str = None,
+                                  max_videos: int = None,
+                                  progress_callback=None) -> List[Dict]:
+        """
+        Get all videos from a TikTok profile using yt-dlp --flat-playlist -j.
+
+        Uses JSON output to properly handle multi-line descriptions/titles.
+        Returns list of video metadata dicts with video_id and upload_date.
+        """
+        if not self.ytdlp_path:
+            return []
+
+        username = self.extract_username(url)
+        if not username:
+            return []
+
+        profile_url = self.normalize_creator_url(username)
+
+        try:
+            # Use yt-dlp flat-playlist with JSON output for full metadata
+            cmd = self._get_base_cmd() + [
+                '--flat-playlist',
+                '-j',
+                '--no-warnings',
+                '--socket-timeout', '30',
+                profile_url
+            ]
+
+            self.log(f"Fetching TikTok videos for @{username}", 'info')
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error = stderr.decode('utf-8', errors='replace')
+
+                # Fallback: if yt-dlp can't extract secondary user ID, try tiktokuser: scheme
+                if 'secondary user ID' in error or 'Unable to extract' in error:
+                    self.log(f"yt-dlp can't extract user ID for @{username}, trying channel_id fallback", 'info')
+                    channel_id = await self._resolve_channel_id(username)
+                    if channel_id:
+                        fallback_cmd = self._get_base_cmd() + [
+                            '--flat-playlist',
+                            '-j',
+                            '--no-warnings',
+                            '--socket-timeout', '30',
+                            f"tiktokuser:{channel_id}"
+                        ]
+                        fb_result = await asyncio.create_subprocess_exec(
+                            *fallback_cmd,
+                            stdout=asyncio.subprocess.PIPE,
+                            stderr=asyncio.subprocess.PIPE
+                        )
+                        stdout, stderr = await fb_result.communicate()
+                        if fb_result.returncode == 0:
+                            self.log(f"Fallback tiktokuser: succeeded for @{username}", 'info')
+                        else:
+                            fb_error = stderr.decode('utf-8', errors='replace')
+                            self.log(f"Fallback also failed for @{username}: {fb_error}", 'warning')
+                            return []
+                    else:
+                        self.log(f"Could not resolve channel_id for @{username}", 'warning')
+                        return []
+                else:
+                    self.log(f"Failed to list TikTok videos: {error}", 'warning')
+                    return []
+
+            lines = stdout.decode('utf-8', errors='replace').strip().split('\n')
+
+            # Parse since_date for filtering
+            cutoff_str = None
+            if since_date:
+                try:
+                    if 'T' in since_date:
+                        cutoff_dt = datetime.fromisoformat(since_date.replace('Z', '+00:00').replace('+00:00', ''))
+                    else:
+                        cutoff_dt = datetime.strptime(since_date[:10], '%Y-%m-%d')
+                    cutoff_str = cutoff_dt.strftime('%Y%m%d')
+                except (ValueError, IndexError):
+                    pass
+
+            videos = []
+            for line in lines:
+                if not line.strip():
+                    continue
+
+                try:
+                    data = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+
+                video_id = str(data.get('id', ''))
+                if not video_id:
+                    continue
+
+                upload_date = data.get('upload_date', '')
+                title = data.get('title', '')
+                description = data.get('description', '')
+
+                # Skip posts where yt-dlp returned no metadata at all
+                # When cookies are expired, yt-dlp returns no date, no title,
+                # and no description. Real posts with empty captions still have
+                # upload_date, so we use that as the key signal.
+                if not upload_date and not title and not description:
+                    self.log(f"Skipping TikTok {video_id}: no metadata (cookies may be expired)", 'debug')
+                    continue
+
+                title = title or description or f"TikTok video #{video_id}"
+                description = description or title
+
+                # Filter by date if cutoff specified
+                if cutoff_str and upload_date and upload_date < cutoff_str:
+                    continue
+
+                # Format upload_date to ISO
+                formatted_date = None
+                if upload_date and len(upload_date) == 8 and upload_date.isdigit():
+                    formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
+
+                video_url = data.get('url') or f"https://www.tiktok.com/@{username}/video/{video_id}"
+
+                videos.append({
+                    'video_id': video_id,
+                    'title': title,
+                    'description': description,
+                    'upload_date': formatted_date,
+                    'url': video_url,
+                    'username': username,
+                })
+
+                if progress_callback:
+                    progress_callback(len(videos))
+
+                if max_videos and len(videos) >= max_videos:
+                    break
+
+            self.log(f"Found {len(videos)} TikTok videos for @{username}", 'info')
+            self._save_cookies_back()
+            return videos
+
+        except Exception as e:
+            self.log(f"Error getting TikTok videos: {e}", 'error')
+            self._save_cookies_back()
+            return []
+
+    async def download_video(self, video_url: str, output_dir: Path, username: str = '') -> Dict:
+        """
+        Download a TikTok video/carousel using gallery-dl.
+
+        gallery-dl handles both regular videos and carousel/slideshow posts.
+        Returns dict with success status and list of downloaded files.
+        """
+        if not self.gallery_dl_path:
+            return {'success': False, 'error': 'gallery-dl not available'}
+
+        try:
+            output_dir = Path(output_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            cmd = [
+                self.gallery_dl_path,
+                '--write-metadata',
+                '-D', str(output_dir),
+                '-f', '{id}_{num}.{extension}',
+            ]
+
+            # Add cookies for age-restricted / login-required content
+            cookies_file = self._get_cookies_file()
+            if cookies_file:
+                cmd.extend(['--cookies', cookies_file])
+
+            cmd.append(video_url)
+
+            self.log(f"Downloading TikTok: {video_url}", 'debug')
+
+            # Snapshot existing files before download so we only pick up new ones
+            existing_files = set(f.name for f in output_dir.iterdir() if f.is_file())
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            # Find newly downloaded files (exclude .json metadata and audio-only files)
+            downloaded_files = []
+            for f in output_dir.iterdir():
+                if f.is_file() and f.name not in existing_files and f.suffix.lower() not in ('.json',):
+                    # Skip audio-only files
+                    if f.suffix.lower() in ('.mp3', '.m4a', '.aac', '.wav', '.ogg'):
+                        continue
+                    downloaded_files.append(f)
+
+            if result.returncode != 0:
+                # gallery-dl exit code 4 = partial failure (e.g. slideshow images OK but audio failed)
+                # If we got media files, treat as success
+                if downloaded_files:
+                    self.log(f"gallery-dl partial failure (code {result.returncode}) but {len(downloaded_files)} files downloaded", 'debug')
+                else:
+                    error_msg = stderr.decode('utf-8', errors='replace').strip()
+                    if 'not available' in error_msg.lower() or '404' in error_msg:
+                        error_msg = 'Video not available (deleted or private)'
+                    elif len(error_msg) > 200:
+                        error_msg = error_msg[:200] + '...'
+                    return {'success': False, 'error': error_msg}
+
+            if not downloaded_files:
+                return {'success': False, 'error': 'No files downloaded'}
+
+            # Sort by name to maintain carousel order (e.g. id_1.jpg, id_2.jpg)
+            downloaded_files.sort(key=lambda f: f.name)
+            primary_file = downloaded_files[0]
+
+            # Determine if this is a photo carousel (multiple images)
+            image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
+            is_carousel = len(downloaded_files) > 1 and all(
+                f.suffix.lower() in image_exts for f in downloaded_files
+            )
+
+            self._save_cookies_back()
+            return {
+                'success': True,
+                'file_path': str(primary_file),
+                'filename': primary_file.name,
+                'file_size': primary_file.stat().st_size,
+                'all_files': [str(f) for f in downloaded_files],
+                'file_count': len(downloaded_files),
+                'is_carousel': is_carousel,
+            }
+
+        except Exception as e:
+            self.log(f"Error downloading TikTok video: {e}", 'error')
+            self._save_cookies_back()
+            return {'success': False, 'error': str(e)}
+
+    async def get_creator(self, url: str) -> Optional[Creator]:
+        """Get Creator object from URL"""
+        info = await self.get_creator_info(url)
+        if not info:
+            return None
+
+        username = info.get('creator_id', '')
+
+        return Creator(
+            creator_id=username,
+            service_id='tiktok',
+            platform='tiktok',
+            username=info.get('creator_name', username),
+            display_name=info.get('creator_name'),
+            profile_image_url=info.get('profile_image_url'),
+            bio=info.get('bio'),
+        )
+
+    async def get_posts(self, url: str, since_date: str = None,
+                        max_videos: int = None, progress_callback=None) -> List[Post]:
+        """Get TikTok videos as Post objects"""
+        videos = await self.get_creator_videos(url, since_date, max_videos, progress_callback)
+
+        username = self.extract_username(url) or ''
+
+        posts = []
+        for video in videos:
+            # Each TikTok post could be video or carousel
+            # We create a single attachment for now; the actual download determines type
+            attachment = Attachment(
+                name=f"{video['video_id']}.mp4",
+                file_type='video',
+                extension='.mp4',
+                server_path=video['url'],
+                download_url=video['url'],
+            )
+
+            post = Post(
+                post_id=video['video_id'],
+                service_id='tiktok',
+                platform='tiktok',
+                creator_id=username,
+                title=None,
+                content=video.get('description') or video.get('title', ''),
+                published_at=video.get('upload_date'),
+                attachments=[attachment],
+            )
+            posts.append(post)
+
+        return posts
--- a/modules/paid_content/twitch_client.py
+++ b/modules/paid_content/twitch_client.py
@@ -0,0 +1,751 @@
+"""
+Twitch Clips Client - Fetches channel clips using yt-dlp
+"""
+
+import aiohttp
+import asyncio
+import hashlib
+import json
+import os
+import re
+import subprocess
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from modules.base_module import LoggingMixin
+from .models import Creator, Post, Attachment
+
+
+class TwitchThumbnailCache:
+    """Cache for Twitch clip thumbnails"""
+
+    def __init__(self, cache_dir: str = None):
+        self.cache_dir = Path(cache_dir or '/opt/media-downloader/data/cache/twitch_thumbnails')
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _get_cache_path(self, thumbnail_url: str) -> Path:
+        """Get local cache path for a thumbnail URL"""
+        # Create a hash of the URL for the filename
+        url_hash = hashlib.md5(thumbnail_url.encode()).hexdigest()
+        # Extract extension from URL or default to jpg
+        ext = '.jpg'
+        if '.png' in thumbnail_url.lower():
+            ext = '.png'
+        elif '.webp' in thumbnail_url.lower():
+            ext = '.webp'
+        return self.cache_dir / f"{url_hash}{ext}"
+
+    def get_cached(self, thumbnail_url: str) -> Optional[str]:
+        """Get cached thumbnail path if it exists"""
+        cache_path = self._get_cache_path(thumbnail_url)
+        if cache_path.exists():
+            return str(cache_path)
+        return None
+
+    async def cache_thumbnail(self, thumbnail_url: str, session: aiohttp.ClientSession = None) -> Optional[str]:
+        """Download and cache a thumbnail, return local path"""
+        if not thumbnail_url:
+            return None
+
+        # Check if already cached
+        cache_path = self._get_cache_path(thumbnail_url)
+        if cache_path.exists():
+            return str(cache_path)
+
+        # Download thumbnail
+        try:
+            close_session = False
+            if session is None:
+                session = aiohttp.ClientSession()
+                close_session = True
+
+            try:
+                async with session.get(thumbnail_url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
+                    if resp.status == 200:
+                        content = await resp.read()
+                        with open(cache_path, 'wb') as f:
+                            f.write(content)
+                        return str(cache_path)
+            finally:
+                if close_session:
+                    await session.close()
+        except Exception:
+            pass
+
+        return None
+
+    async def cache_thumbnails_batch(self, thumbnail_urls: List[str], max_concurrent: int = 5) -> Dict[str, str]:
+        """Cache multiple thumbnails in parallel, return url->local_path mapping"""
+        result = {}
+
+        # Filter out already cached
+        to_download = []
+        for url in thumbnail_urls:
+            if not url:
+                continue
+            cached = self.get_cached(url)
+            if cached:
+                result[url] = cached
+            else:
+                to_download.append(url)
+
+        if not to_download:
+            return result
+
+        # Download in batches
+        async with aiohttp.ClientSession() as session:
+            semaphore = asyncio.Semaphore(max_concurrent)
+
+            async def download_one(url: str):
+                async with semaphore:
+                    path = await self.cache_thumbnail(url, session)
+                    if path:
+                        result[url] = path
+
+            await asyncio.gather(*[download_one(url) for url in to_download])
+
+        return result
+
+
+class TwitchClient(LoggingMixin):
+    """
+    Client for fetching Twitch channel clips using yt-dlp
+
+    Supports:
+    - Channel clips URLs (twitch.tv/username/clips)
+    - Fetching channel metadata
+    - Listing all clips from a channel
+    - Downloading clips
+    """
+
+    # Quality presets for yt-dlp
+    QUALITY_PRESETS = {
+        'best': 'best',
+        '1080p': 'best[height<=1080]',
+        '720p': 'best[height<=720]',
+        '480p': 'best[height<=480]',
+    }
+
+    def __init__(self, ytdlp_path: str = None, unified_db=None, log_callback=None, cache_dir: str = None):
+        self._init_logger('PaidContent', log_callback, default_module='Twitch')
+
+        # Find yt-dlp executable
+        self.ytdlp_path = ytdlp_path or self._find_ytdlp()
+        if not self.ytdlp_path:
+            self.log("yt-dlp not found, Twitch support will be disabled", 'warning')
+
+        # Store database reference for cookie access
+        self.unified_db = unified_db
+        self._cookies_file = None
+
+        # Initialize thumbnail cache
+        self.thumbnail_cache = TwitchThumbnailCache(cache_dir)
+
+    def _find_ytdlp(self) -> Optional[str]:
+        """Find yt-dlp executable"""
+        common_paths = [
+            '/opt/media-downloader/venv/bin/yt-dlp',  # Prefer venv version (kept up to date)
+            '/usr/local/bin/yt-dlp',
+            '/usr/bin/yt-dlp',
+            '/opt/homebrew/bin/yt-dlp',
+            os.path.expanduser('~/.local/bin/yt-dlp'),
+        ]
+
+        for path in common_paths:
+            if os.path.isfile(path) and os.access(path, os.X_OK):
+                return path
+
+        try:
+            result = subprocess.run(['which', 'yt-dlp'], capture_output=True, text=True)
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except Exception:
+            pass
+
+        return None
+
+    def is_available(self) -> bool:
+        """Check if yt-dlp is available"""
+        return self.ytdlp_path is not None
+
+    def _get_cookies_file(self) -> Optional[str]:
+        """Get path to cookies file, creating it from database if needed"""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            return self._cookies_file
+
+        if not self.unified_db:
+            return None
+
+        try:
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                # Try twitch-specific cookies first, then fall back to ytdlp
+                for scraper_id in ['twitch', 'ytdlp']:
+                    cursor.execute("SELECT cookies_json FROM scrapers WHERE id = ?", (scraper_id,))
+                    row = cursor.fetchone()
+                    if row and row[0]:
+                        data = json.loads(row[0])
+                        # Support both {"cookies": [...]} and [...] formats
+                        if isinstance(data, dict) and 'cookies' in data:
+                            cookies_list = data['cookies']
+                        elif isinstance(data, list):
+                            cookies_list = data
+                        else:
+                            cookies_list = []
+
+                        if cookies_list:
+                            # Write cookies to temp file in Netscape format
+                            fd, self._cookies_file = tempfile.mkstemp(suffix='.txt', prefix='twitch_cookies_')
+                            with os.fdopen(fd, 'w') as f:
+                                f.write("# Netscape HTTP Cookie File\n")
+                                for cookie in cookies_list:
+                                    domain = cookie.get('domain', '')
+                                    include_subdomains = 'TRUE' if domain.startswith('.') else 'FALSE'
+                                    path = cookie.get('path', '/')
+                                    secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
+                                    expiry = str(int(cookie.get('expirationDate', 0)))
+                                    name = cookie.get('name', '')
+                                    value = cookie.get('value', '')
+                                    f.write(f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expiry}\t{name}\t{value}\n")
+                            self.log(f"Loaded {len(cookies_list)} cookies from {scraper_id} scraper", 'debug')
+                            return self._cookies_file
+        except Exception as e:
+            self.log(f"Could not load cookies: {e}", 'debug')
+
+        return None
+
+    def _get_base_cmd(self) -> List[str]:
+        """Get base yt-dlp command with cookies if available"""
+        cmd = [self.ytdlp_path]
+        cookies_file = self._get_cookies_file()
+        if cookies_file:
+            cmd.extend(['--cookies', cookies_file])
+        return cmd
+
+    def cleanup(self):
+        """Clean up temporary files"""
+        if self._cookies_file and os.path.exists(self._cookies_file):
+            try:
+                os.unlink(self._cookies_file)
+            except Exception:
+                pass
+            self._cookies_file = None
+
+    @staticmethod
+    def extract_channel_name(url: str) -> Optional[str]:
+        """
+        Extract channel name from Twitch URL
+
+        Supports:
+        - twitch.tv/username
+        - twitch.tv/username/clips
+        - m.twitch.tv/username/clips
+        """
+        patterns = [
+            r'twitch\.tv/([a-zA-Z0-9_]+)(?:/clips)?',
+        ]
+
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1).lower()
+
+        return None
+
+    @staticmethod
+    def normalize_clips_url(channel_name: str) -> str:
+        """Convert channel name to clips URL with all-time filter"""
+        return f"https://www.twitch.tv/{channel_name}/clips?filter=clips&range=all"
+
+    async def get_channel_info(self, channel_url: str, count_clips: bool = True) -> Optional[Dict]:
+        """
+        Get channel information and optionally count all clips
+        """
+        if not self.is_available():
+            return None
+
+        channel_name = self.extract_channel_name(channel_url)
+        if not channel_name:
+            return None
+
+        try:
+            clips_url = self.normalize_clips_url(channel_name)
+
+            # First get basic info from first clip
+            cmd = self._get_base_cmd() + [
+                '--no-warnings',
+                '--flat-playlist',
+                '-j',
+                '--playlist-items', '1',
+                clips_url
+            ]
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                self.log(f"Failed to get channel info: {stderr.decode()}", 'warning')
+                return None
+
+            first_clip_data = None
+            for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                if not line:
+                    continue
+                try:
+                    first_clip_data = json.loads(line)
+                    break
+                except json.JSONDecodeError:
+                    continue
+
+            if not first_clip_data:
+                return None
+
+            # Count all clips if requested (this can take a while for channels with many clips)
+            clip_count = 0
+            if count_clips:
+                self.log(f"Counting clips for {channel_name}...", 'debug')
+                count_cmd = self._get_base_cmd() + [
+                    '--no-warnings',
+                    '--flat-playlist',
+                    '--print', 'id',
+                    clips_url
+                ]
+
+                count_result = await asyncio.create_subprocess_exec(
+                    *count_cmd,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE
+                )
+
+                count_stdout, _ = await count_result.communicate()
+                if count_result.returncode == 0:
+                    clip_count = len([l for l in count_stdout.decode('utf-8', errors='replace').strip().split('\n') if l])
+                    self.log(f"Found {clip_count} clips for {channel_name}", 'info')
+
+            return {
+                'channel_id': channel_name,
+                'channel_name': channel_name,
+                'channel_url': f"https://www.twitch.tv/{channel_name}",
+                'clips_url': clips_url,
+                'thumbnail': first_clip_data.get('thumbnail'),
+                'clip_count': clip_count,
+            }
+
+        except Exception as e:
+            self.log(f"Error getting channel info: {e}", 'error')
+            return None
+
+    async def get_channel_clips(self, channel_url: str, since_date: str = None,
+                                 max_clips: int = None, progress_callback=None,
+                                 cache_thumbnails: bool = True) -> List[Dict]:
+        """
+        Get all clips from a channel
+
+        Args:
+            channel_url: Twitch channel URL
+            since_date: Only fetch clips created after this date (ISO format)
+            max_clips: Maximum number of clips to fetch
+            progress_callback: Callback function(count) for progress updates
+            cache_thumbnails: Whether to download and cache thumbnails locally
+
+        Returns:
+            List of clip metadata dicts with cached thumbnail paths
+        """
+        if not self.is_available():
+            return []
+
+        channel_name = self.extract_channel_name(channel_url)
+        if not channel_name:
+            self.log(f"Could not extract channel name from URL: {channel_url}", 'error')
+            return []
+
+        try:
+            clips_url = self.normalize_clips_url(channel_name)
+
+            # Use flat-playlist for faster extraction (full metadata available in flat mode for Twitch clips)
+            cmd = self._get_base_cmd() + [
+                '--no-warnings',
+                '--flat-playlist',
+                '-j',
+                clips_url
+            ]
+
+            # Add date filter at yt-dlp level for efficiency
+            if since_date:
+                try:
+                    from datetime import datetime
+                    # Convert ISO date to YYYYMMDD format for yt-dlp
+                    date_obj = datetime.fromisoformat(since_date.replace('Z', '+00:00'))
+                    dateafter = date_obj.strftime('%Y%m%d')
+                    cmd.extend(['--dateafter', dateafter])
+                    self.log(f"Filtering clips after {dateafter}", 'debug')
+                except (ValueError, AttributeError):
+                    pass
+
+            if max_clips:
+                cmd.extend(['--playlist-items', f'1:{max_clips}'])
+
+            self.log(f"Fetching clips from channel: {channel_name}", 'info')
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error = stderr.decode('utf-8', errors='replace')
+                self.log(f"Failed to get channel clips: {error}", 'warning')
+                return []
+
+            clips = []
+            for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                if not line:
+                    continue
+                try:
+                    data = json.loads(line)
+
+                    clip_id = data.get('id')
+                    if not clip_id:
+                        continue
+
+                    # Parse timestamp to ISO format
+                    timestamp = data.get('timestamp')
+                    upload_date = data.get('upload_date')
+                    if timestamp:
+                        try:
+                            upload_date = datetime.fromtimestamp(timestamp).isoformat()
+                        except (ValueError, OSError):
+                            pass
+                    elif upload_date:
+                        # Convert YYYYMMDD to ISO format
+                        try:
+                            upload_date = datetime.strptime(upload_date, '%Y%m%d').isoformat()
+                        except ValueError:
+                            pass
+
+                    # Check if clip is newer than since_date
+                    if since_date and upload_date and upload_date <= since_date:
+                        self.log(f"Reached clip from {upload_date}, stopping", 'debug')
+                        break
+
+                    # Extract clip slug from URL
+                    clip_url = data.get('url') or data.get('webpage_url', '')
+                    clip_slug = clip_url.split('/')[-1] if clip_url else clip_id
+
+                    clips.append({
+                        'clip_id': clip_id,
+                        'clip_slug': clip_slug,
+                        'title': data.get('title', f'Clip {clip_id}'),
+                        'upload_date': upload_date,
+                        'timestamp': timestamp,
+                        'duration': data.get('duration'),
+                        'view_count': data.get('view_count'),
+                        'thumbnail': data.get('thumbnail'),
+                        'url': clip_url,
+                        'language': data.get('language'),
+                        'channel_name': channel_name,
+                    })
+
+                    if progress_callback:
+                        progress_callback(len(clips))
+
+                    if max_clips and len(clips) >= max_clips:
+                        break
+
+                except json.JSONDecodeError:
+                    continue
+
+            self.log(f"Found {len(clips)} clips", 'info')
+
+            # Cache thumbnails if requested
+            if cache_thumbnails and clips:
+                thumbnail_urls = [c.get('thumbnail') for c in clips if c.get('thumbnail')]
+                if thumbnail_urls:
+                    self.log(f"Caching {len(thumbnail_urls)} thumbnails...", 'debug')
+                    cached_paths = await self.thumbnail_cache.cache_thumbnails_batch(thumbnail_urls)
+
+                    # Update clips with cached thumbnail paths
+                    for clip in clips:
+                        thumb_url = clip.get('thumbnail')
+                        if thumb_url and thumb_url in cached_paths:
+                            clip['thumbnail_cached'] = cached_paths[thumb_url]
+
+                    self.log(f"Cached {len(cached_paths)} thumbnails", 'debug')
+
+            return clips
+
+        except Exception as e:
+            self.log(f"Error getting channel clips: {e}", 'error')
+            return []
+
+    async def download_clip(self, clip_url: str, output_dir: Path, quality: str = 'best',
+                            progress_callback=None) -> Dict:
+        """
+        Download a clip
+
+        Args:
+            clip_url: Twitch clip URL
+            output_dir: Directory to save the clip
+            quality: Quality preset
+            progress_callback: Callback for download progress
+
+        Returns:
+            Dict with success status and file info
+        """
+        if not self.is_available():
+            return {'success': False, 'error': 'yt-dlp not available'}
+
+        try:
+            output_dir = Path(output_dir)
+            output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Output template preserves title and ID
+            output_template = str(output_dir / '%(title).100s_%(id)s.%(ext)s')
+
+            format_str = self.QUALITY_PRESETS.get(quality, self.QUALITY_PRESETS['best'])
+
+            cmd = self._get_base_cmd() + [
+                '--no-warnings',
+                '-f', format_str,
+                '-o', output_template,
+                '--print-json',
+                clip_url
+            ]
+
+            self.log(f"Downloading clip: {clip_url}", 'debug')
+
+            result = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+
+            stdout, stderr = await result.communicate()
+
+            if result.returncode != 0:
+                error_msg = stderr.decode('utf-8', errors='replace').strip()
+                if len(error_msg) > 200:
+                    error_msg = error_msg[:200] + '...'
+                return {'success': False, 'error': error_msg}
+
+            # Parse output JSON
+            clip_info = None
+            for line in stdout.decode('utf-8', errors='replace').strip().split('\n'):
+                try:
+                    clip_info = json.loads(line)
+                    break
+                except json.JSONDecodeError:
+                    continue
+
+            if not clip_info:
+                # Try to find downloaded file
+                files = list(output_dir.glob('*.mp4'))
+                if files:
+                    file_path = max(files, key=lambda f: f.stat().st_mtime)
+                    return {
+                        'success': True,
+                        'file_path': str(file_path),
+                        'filename': file_path.name,
+                        'file_size': file_path.stat().st_size
+                    }
+                return {'success': False, 'error': 'Could not find downloaded file'}
+
+            file_path = clip_info.get('_filename') or clip_info.get('filename')
+            if file_path:
+                file_path = Path(file_path)
+
+            return {
+                'success': True,
+                'file_path': str(file_path) if file_path else None,
+                'filename': file_path.name if file_path else None,
+                'file_size': file_path.stat().st_size if file_path and file_path.exists() else clip_info.get('filesize'),
+                'title': clip_info.get('title'),
+                'duration': clip_info.get('duration'),
+                'clip_id': clip_info.get('id'),
+                'upload_date': clip_info.get('upload_date'),
+                'thumbnail': clip_info.get('thumbnail'),
+            }
+
+        except Exception as e:
+            self.log(f"Error downloading clip: {e}", 'error')
+            return {'success': False, 'error': str(e)}
+
+    async def get_channel_avatar(self, channel_name: str) -> Optional[str]:
+        """
+        Try to fetch channel avatar from Twitch
+
+        Note: This requires either Twitch API credentials or scraping.
+        Returns None if avatar cannot be fetched.
+        """
+        profile = await self.get_channel_profile(channel_name)
+        return profile.get('avatar') if profile else None
+
+    async def get_channel_profile(self, channel_name: str) -> Optional[Dict]:
+        """
+        Fetch channel profile info using Twitch's GQL API.
+
+        Returns dict with avatar, banner, display_name, bio, joined_date, external_links
+        """
+        try:
+            import aiohttp
+
+            async with aiohttp.ClientSession() as session:
+                headers = {
+                    'Client-Id': 'kimne78kx3ncx6brgo4mv6wki5h1ko',  # Public Twitch web client ID
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+                }
+
+                # GQL query for comprehensive user info
+                query = '''
+                query {
+                    user(login: "%s") {
+                        id
+                        login
+                        displayName
+                        description
+                        createdAt
+                        profileImageURL(width: 300)
+                        bannerImageURL
+                        offlineImageURL
+                        channel {
+                            socialMedias {
+                                name
+                                url
+                            }
+                        }
+                    }
+                }
+                ''' % channel_name
+
+                async with session.post(
+                    'https://gql.twitch.tv/gql',
+                    headers=headers,
+                    json={'query': query},
+                    timeout=aiohttp.ClientTimeout(total=15)
+                ) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        user = data.get('data', {}).get('user')
+
+                        if not user:
+                            self.log(f"Twitch user not found: {channel_name}", 'warning')
+                            return None
+
+                        result = {}
+
+                        # Avatar
+                        if user.get('profileImageURL'):
+                            result['avatar'] = user['profileImageURL']
+
+                        # Banner - prefer offlineImageURL (larger), fall back to bannerImageURL
+                        if user.get('offlineImageURL'):
+                            result['banner'] = user['offlineImageURL']
+                        elif user.get('bannerImageURL'):
+                            result['banner'] = user['bannerImageURL']
+
+                        # Display name
+                        if user.get('displayName'):
+                            result['display_name'] = user['displayName']
+
+                        # Bio/description
+                        if user.get('description'):
+                            result['bio'] = user['description']
+
+                        # Joined date (format: "Jun 10, 2016")
+                        if user.get('createdAt'):
+                            try:
+                                created_dt = datetime.fromisoformat(user['createdAt'].replace('Z', '+00:00'))
+                                result['joined_date'] = created_dt.strftime('%b %d, %Y')
+                                self.log(f"Found Twitch joined date: {result['joined_date']}", 'debug')
+                            except (ValueError, TypeError):
+                                pass
+
+                        # Social links
+                        social_medias = user.get('channel', {}).get('socialMedias', [])
+                        if social_medias:
+                            links = []
+                            for social in social_medias:
+                                name = social.get('name', 'Link')
+                                url = social.get('url', '')
+                                if url:
+                                    # Capitalize first letter of name
+                                    title = name.capitalize() if name else 'Link'
+                                    links.append({'title': title, 'url': url})
+                            if links:
+                                result['external_links'] = json.dumps(links)
+                                self.log(f"Found {len(links)} Twitch external links", 'debug')
+
+                        if result:
+                            self.log(f"Fetched Twitch profile via GQL for {channel_name}: {list(result.keys())}", 'debug')
+                            return result
+
+        except Exception as e:
+            self.log(f"Could not fetch Twitch profile: {e}", 'debug')
+
+        return None
+
+    async def get_creator(self, channel_url: str) -> Optional[Creator]:
+        """
+        Get Creator object from channel URL
+        """
+        info = await self.get_channel_info(channel_url)
+        if not info:
+            return None
+
+        channel_name = info.get('channel_name') or self.extract_channel_name(channel_url)
+
+        # Try to get the actual channel avatar (not clip thumbnail)
+        avatar_url = await self.get_channel_avatar(channel_name)
+
+        return Creator(
+            creator_id=info.get('channel_id') or channel_name,
+            service_id='twitch',
+            platform='twitch',
+            username=channel_name or 'Unknown',
+            display_name=channel_name,
+            profile_image_url=avatar_url,  # Use actual avatar, not clip thumbnail
+            post_count=info.get('clip_count', 0)
+        )
+
+    async def get_posts(self, channel_url: str, since_date: str = None,
+                        max_clips: int = None, progress_callback=None) -> List[Post]:
+        """
+        Get clips as Post objects
+        """
+        clips = await self.get_channel_clips(channel_url, since_date, max_clips, progress_callback)
+
+        posts = []
+        for clip in clips:
+            # Create attachment for the clip
+            attachment = Attachment(
+                name=f"{clip['title']}.mp4",
+                file_type='video',
+                extension='.mp4',
+                server_path=clip['url'],  # Use URL as server_path
+                download_url=clip['url'],
+                duration=clip.get('duration'),
+            )
+
+            post = Post(
+                post_id=clip['clip_id'],
+                service_id='twitch',
+                platform='twitch',
+                creator_id=clip.get('channel_name', ''),
+                title=clip['title'],
+                content='',  # Clips don't have descriptions
+                published_at=clip.get('upload_date'),
+                attachments=[attachment],
+            )
+            posts.append(post)
+
+        return posts
--- a/modules/paid_content/utils.py
+++ b/modules/paid_content/utils.py
@@ -0,0 +1,484 @@
+"""
+Utility functions for Paid Content feature
+"""
+
+import re
+from typing import Optional, Tuple
+from urllib.parse import urlparse
+
+
+def _extract_xenforo_search_query(parsed) -> Optional[str]:
+    """Extract the 'q' search parameter from a XenForo search URL."""
+    from urllib.parse import parse_qs, unquote_plus
+    qs = parse_qs(parsed.query)
+    query = qs.get('q', [''])[0]
+    if not query:
+        m = re.search(r'[&?]q=([^&]+)', parsed.query)
+        if m:
+            query = unquote_plus(m.group(1))
+    return query or None
+
+
+def parse_creator_url(url: str) -> Optional[Tuple[str, str, str]]:
+    """
+    Parse a Coomer/Kemono/YouTube/Twitch/Fansly creator URL
+
+    Args:
+        url: URL like https://coomer.party/onlyfans/user/creatorid
+             or https://www.youtube.com/@channelhandle
+             or https://www.youtube.com/channel/UCxxxxx
+             or https://www.twitch.tv/username/clips
+             or https://fansly.com/username
+
+    Returns:
+        Tuple of (service_id, platform, creator_id) or None if invalid
+    """
+    try:
+        parsed = urlparse(url)
+        host = parsed.netloc.lower()
+
+        # Handle YouTube URLs
+        if 'youtube.com' in host or 'youtu.be' in host:
+            channel_id = _extract_youtube_channel_id(url)
+            if channel_id:
+                return ('youtube', 'youtube', channel_id)
+            return None
+
+        # Handle Twitch URLs
+        if 'twitch.tv' in host:
+            channel_name = _extract_twitch_channel_name(url)
+            if channel_name:
+                return ('twitch', 'twitch', channel_name)
+            return None
+
+        # Handle Fansly URLs (direct API)
+        if 'fansly.com' in host:
+            username = _extract_fansly_username(url)
+            if username:
+                return ('fansly_direct', 'fansly', username)
+            return None
+
+        # Handle OnlyFans URLs (direct API)
+        if 'onlyfans.com' in host:
+            path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+            if path_parts:
+                username = path_parts[0]
+                if username.lower() not in ('my', 'api2', 'settings', 'search', 'notifications', 'chats', 'vault', 'lists', 'bookmarks', 'statements', 'help', 'terms', 'privacy', 'dmca', 'contact'):
+                    return ('onlyfans_direct', 'onlyfans', username)
+            return None
+
+        # Handle Pornhub URLs
+        if 'pornhub.com' in host:
+            creator_id = _extract_pornhub_creator_id(url)
+            if creator_id:
+                return ('pornhub', 'pornhub', creator_id)
+            return None
+
+        # Handle XHamster URLs
+        if 'xhamster' in host:
+            creator_id = _extract_xhamster_creator_id(url)
+            if creator_id:
+                return ('xhamster', 'xhamster', creator_id)
+            return None
+
+        # Handle TikTok URLs
+        if 'tiktok.com' in host:
+            username = _extract_tiktok_username(url)
+            if username:
+                return ('tiktok', 'tiktok', username)
+            return None
+
+        # Handle Instagram URLs
+        if 'instagram.com' in host:
+            username = _extract_instagram_username(url)
+            if username:
+                return ('instagram', 'instagram', username)
+            return None
+
+        # Handle BestEyeCandy URLs
+        if 'besteyecandy.com' in host:
+            cid_match = re.search(r'cid-(\d+)', parsed.path)
+            slug_match = re.search(r'/([^/]+)\.html$', parsed.path)
+            if cid_match and slug_match:
+                slug = slug_match.group(1)
+                return ('besteyecandy', 'besteyecandy', f"{cid_match.group(1)}/{slug}")
+            elif cid_match:
+                return ('besteyecandy', 'besteyecandy', cid_match.group(1))
+            return None
+
+        # Handle Coppermine gallery URLs
+        # Match: domain.com/gallery/, domain.com/cpg/, domain.com/coppermine/
+        # Also match direct index.php/thumbnails.php/displayimage.php pages
+        if any(p in parsed.path.lower() for p in ['/gallery/', '/cpg/', '/coppermine/']) or \
+           re.search(r'(?:index|thumbnails|displayimage)\.php', parsed.path):
+            # Normalize to gallery root
+            base_path = re.sub(
+                r'(?:index|thumbnails|displayimage)\.php.*$', '', parsed.path
+            )
+            base_path = base_path.rstrip('/')
+            if base_path:
+                # Use domain + path as creator_id (e.g. kylie-jenner.org/gallery)
+                creator_id = host.replace('www.', '') + base_path
+                return ('coppermine', 'coppermine', creator_id)
+
+        # Handle Bellazon URLs (forum threads as creators)
+        if 'bellazon' in host:
+            match = re.search(r'/topic/(\d+)-([^/]+)', parsed.path)
+            if match:
+                topic_id = match.group(1)
+                return ('bellazon', 'bellazon', topic_id)
+            return None
+
+        # Handle Reddit URLs
+        if 'reddit.com' in host:
+            # Handle reddit.com/r/subreddit, old.reddit.com/r/subreddit, etc.
+            path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+            if len(path_parts) >= 2 and path_parts[0] == 'r':
+                subreddit = path_parts[1].lower()
+                return ('reddit', 'reddit', subreddit)
+            return None
+
+        # Handle Snapchat URLs
+        if 'snapchat.com' in host:
+            # Handle snapchat.com/@username and story.snapchat.com/@username
+            path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+            if path_parts:
+                username = path_parts[0].lstrip('@')
+                if username:
+                    return ('snapchat', 'snapchat', username)
+            return None
+
+        # Handle HQCelebCorner URLs
+        if 'hqcelebcorner' in host:
+            query = _extract_xenforo_search_query(parsed)
+            if query:
+                return ('hqcelebcorner', 'hqcelebcorner', query)
+            return None
+
+        # Handle PicturePub URLs
+        if 'picturepub' in host:
+            query = _extract_xenforo_search_query(parsed)
+            if query:
+                return ('picturepub', 'picturepub', query)
+            return None
+
+        # Handle Soundgasm URLs
+        if 'soundgasm.net' in host:
+            path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+            if len(path_parts) >= 2 and path_parts[0] in ('u', 'user'):
+                return ('soundgasm', 'soundgasm', path_parts[1])
+            return None
+
+        # Handle Liltsome URLs (archive, maps to soundgasm platform)
+        if 'liltsome.yerf.org' in host:
+            # Hash-based routing: /#/artist/{name}
+            fragment = parsed.fragment  # e.g. "/artist/kinkyshibby"
+            if fragment:
+                parts = [p for p in fragment.strip('/').split('/') if p]
+                if len(parts) >= 2 and parts[0] == 'artist':
+                    return ('soundgasm', 'soundgasm', parts[1])
+            return None
+
+        # Determine service (Coomer/Kemono)
+        if 'coomer' in host:
+            service_id = 'coomer'
+        elif 'kemono' in host:
+            service_id = 'kemono'
+        else:
+            return None
+
+        # Parse path: /platform/user/creatorid
+        path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+
+        if len(path_parts) >= 3 and path_parts[1] == 'user':
+            platform = path_parts[0]
+            creator_id = path_parts[2]
+            return (service_id, platform, creator_id)
+
+        return None
+
+    except Exception:
+        return None
+
+
+def _extract_youtube_channel_id(url: str) -> Optional[str]:
+    """
+    Extract channel identifier from various YouTube URL formats
+
+    Supports:
+    - youtube.com/channel/UC...
+    - youtube.com/@handle
+    - youtube.com/c/channelname
+    - youtube.com/user/username
+    """
+    patterns = [
+        r'youtube\.com/channel/([a-zA-Z0-9_-]+)',
+        r'youtube\.com/@([a-zA-Z0-9_.-]+)',
+        r'youtube\.com/c/([a-zA-Z0-9_-]+)',
+        r'youtube\.com/user/([a-zA-Z0-9_-]+)',
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+
+    return None
+
+
+def _extract_twitch_channel_name(url: str) -> Optional[str]:
+    """
+    Extract channel name from Twitch URL
+
+    Supports:
+    - twitch.tv/username
+    - twitch.tv/username/clips
+    - m.twitch.tv/username/clips
+    """
+    patterns = [
+        r'twitch\.tv/([a-zA-Z0-9_]+)(?:/clips)?',
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1).lower()
+
+    return None
+
+
+def _extract_fansly_username(url: str) -> Optional[str]:
+    """
+    Extract username from Fansly URL
+
+    Supports:
+    - fansly.com/username
+    - fansly.com/username/posts
+    - fansly.com/username/media
+    """
+    patterns = [
+        r'fansly\.com/([a-zA-Z0-9_.-]+)(?:/(?:posts|media))?',
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            username = match.group(1)
+            # Filter out known non-username paths
+            if username.lower() not in ('explore', 'search', 'settings', 'notifications', 'messages', 'live'):
+                return username
+
+    return None
+
+
+def _extract_pornhub_creator_id(url: str) -> Optional[str]:
+    """Extract creator identifier from Pornhub URL, returns 'type/name' format"""
+    patterns = [
+        r'pornhub\.com/pornstar/([a-zA-Z0-9_-]+)',
+        r'pornhub\.com/channels/([a-zA-Z0-9_-]+)',
+        r'pornhub\.com/users/([a-zA-Z0-9_-]+)',
+        r'pornhub\.com/model/([a-zA-Z0-9_-]+)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            # Store as "type/name" to preserve the URL type
+            type_match = re.search(r'pornhub\.com/(pornstar|channels|users|model)/', url)
+            return f"{type_match.group(1)}/{match.group(1)}" if type_match else match.group(1)
+    return None
+
+
+def _extract_xhamster_creator_id(url: str) -> Optional[str]:
+    """Extract creator identifier from XHamster URL, returns 'type/name' format"""
+    patterns = [
+        r'xhamster\d*\.com/creators/([a-zA-Z0-9_-]+)',
+        r'xhamster\d*\.com/channels/([a-zA-Z0-9_-]+)',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            type_match = re.search(r'xhamster\d*\.com/(creators|channels)/', url)
+            return f"{type_match.group(1)}/{match.group(1)}" if type_match else match.group(1)
+    return None
+
+
+def _extract_tiktok_username(url: str) -> Optional[str]:
+    """Extract username from TikTok URL"""
+    match = re.search(r'tiktok\.com/@([a-zA-Z0-9_.]+)', url)
+    if match:
+        return match.group(1)
+    return None
+
+
+def _extract_instagram_username(url: str) -> Optional[str]:
+    """Extract username from Instagram URL"""
+    match = re.search(r'instagram\.com/([a-zA-Z0-9_.]+)/?', url)
+    if match:
+        username = match.group(1).lower()
+        non_usernames = {
+            'explore', 'reels', 'stories', 'p', 'tv', 'accounts',
+            'direct', 'about', 'legal', 'developer', 'privacy',
+            'terms', 'help', 'api', 'reel', 'tags'
+        }
+        if username not in non_usernames:
+            return username
+    return None
+
+
+def parse_post_url(url: str) -> Optional[Tuple[str, str, str, str]]:
+    """
+    Parse a Coomer/Kemono post URL
+
+    Args:
+        url: URL like https://coomer.party/onlyfans/user/creatorid/post/postid
+
+    Returns:
+        Tuple of (service_id, platform, creator_id, post_id) or None if invalid
+    """
+    try:
+        parsed = urlparse(url)
+        host = parsed.netloc.lower()
+
+        # Determine service
+        if 'coomer' in host:
+            service_id = 'coomer'
+        elif 'kemono' in host:
+            service_id = 'kemono'
+        else:
+            return None
+
+        # Parse path: /platform/user/creatorid/post/postid
+        path_parts = [p for p in parsed.path.strip('/').split('/') if p]
+
+        if len(path_parts) >= 5 and path_parts[1] == 'user' and path_parts[3] == 'post':
+            platform = path_parts[0]
+            creator_id = path_parts[2]
+            post_id = path_parts[4]
+            return (service_id, platform, creator_id, post_id)
+
+        return None
+
+    except Exception:
+        return None
+
+
+def format_file_size(size_bytes: int) -> str:
+    """Format file size in human-readable format"""
+    if size_bytes is None:
+        return 'Unknown'
+
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if abs(size_bytes) < 1024.0:
+            return f"{size_bytes:.1f} {unit}"
+        size_bytes /= 1024.0
+
+    return f"{size_bytes:.1f} PB"
+
+
+def sanitize_filename(name: str, max_length: int = 200) -> str:
+    """
+    Sanitize a string for use in a filename
+
+    Args:
+        name: String to sanitize
+        max_length: Maximum length of result
+
+    Returns:
+        Sanitized filename
+    """
+    if not name:
+        return 'unnamed'
+
+    # Remove/replace invalid characters
+    name = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', name)
+    name = re.sub(r'\s+', '-', name.strip())
+    name = name.strip('.-')
+
+    if len(name) > max_length:
+        name = name[:max_length]
+
+    return name or 'unnamed'
+
+
+def extract_platform_from_domain(domain: str) -> Optional[str]:
+    """Extract platform name from domain"""
+    domain = domain.lower().replace('www.', '')
+
+    platform_domains = {
+        'onlyfans.com': 'onlyfans',
+        'fansly.com': 'fansly',
+        'patreon.com': 'patreon',
+        'fanbox.cc': 'fanbox',
+        'gumroad.com': 'gumroad',
+        'subscribestar.com': 'subscribestar',
+        'subscribestar.adult': 'subscribestar',
+        'discord.com': 'discord',
+        'discord.gg': 'discord',
+        'candfans.jp': 'candfans',
+    }
+
+    return platform_domains.get(domain)
+
+
+def detect_content_type(filename: str) -> str:
+    """Detect content type from filename extension"""
+    if not filename:
+        return 'unknown'
+
+    ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else ''
+
+    image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
+    video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv', 'mpeg', 'mpg', '3gp'}
+    audio_exts = {'mp3', 'wav', 'flac', 'aac', 'm4a', 'ogg', 'wma'}
+    archive_exts = {'zip', 'rar', '7z', 'tar', 'gz', 'bz2'}
+    document_exts = {'pdf', 'doc', 'docx', 'txt', 'rtf', 'odt'}
+
+    if ext in image_exts:
+        return 'image'
+    elif ext in video_exts:
+        return 'video'
+    elif ext in audio_exts:
+        return 'audio'
+    elif ext in archive_exts:
+        return 'archive'
+    elif ext in document_exts:
+        return 'document'
+    else:
+        return 'unknown'
+
+
+def get_service_platforms(service_id: str) -> list:
+    """Get supported platforms for a service"""
+    platforms = {
+        'coomer': ['onlyfans', 'fansly', 'candfans'],
+        'kemono': ['patreon', 'fanbox', 'gumroad', 'subscribestar', 'discord'],
+        'youtube': ['youtube'],
+        'twitch': ['twitch'],
+        'fansly_direct': ['fansly'],
+        'onlyfans_direct': ['onlyfans'],
+        'pornhub': ['pornhub'],
+        'xhamster': ['xhamster'],
+        'tiktok': ['tiktok'],
+        'instagram': ['instagram'],
+        'soundgasm': ['soundgasm'],
+        'bellazon': ['bellazon'],
+        'besteyecandy': ['besteyecandy'],
+        'snapchat': ['snapchat'],
+        'reddit': ['reddit'],
+        'coppermine': ['coppermine'],
+        'hqcelebcorner': ['hqcelebcorner'],
+        'picturepub': ['picturepub'],
+    }
+    return platforms.get(service_id, [])
+
+
+def get_service_base_url(service_id: str) -> Optional[str]:
+    """
+    Get base URL for a service.
+
+    Note: For dynamic URLs, use the database (paid_content_services table).
+    These are fallback defaults only.
+    """
+    # Import here to avoid circular dependency
+    from .api_client import PaidContentAPIClient
+    return PaidContentAPIClient.DEFAULT_SERVICE_URLS.get(service_id)
--- a/modules/paid_content/xenforo_forum_client.py
+++ b/modules/paid_content/xenforo_forum_client.py
@@ -0,0 +1,744 @@
+"""
+Generic XenForo Forum Client for Paid Content
+
+Scrapes XenForo-based celebrity image forums (HQCelebCorner, PicturePub, etc.)
+treating each celebrity name as a "creator" and each matching thread as a post.
+
+Images are hosted on external hosts (imagebam, pixhost, imagetwist, etc.)
+and resolved via ImageHostHandler from forum_downloader.
+"""
+
+import asyncio
+import html
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+from urllib.parse import urlparse, unquote_plus
+
+import aiohttp
+
+from modules.base_module import LoggingMixin
+from .models import Post, Attachment
+
+
+class XenForoForumClient(LoggingMixin):
+    """Generic client for scraping XenForo-based forum threads."""
+
+    FLARESOLVERR_URL = 'http://localhost:8191/v1'
+
+    HEADERS = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
+                       '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'en-US,en;q=0.9',
+    }
+
+    IMAGE_EXTS = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff'}
+
+    # External image host domains to look for in post links
+    IMAGE_HOST_DOMAINS = [
+        'imagebam.com', 'pixhost.to', 'imagetwist.com', 'imgur.com',
+        'imgbox.com', 'postimg.cc', 'postimages.org', 'catbox.moe',
+        'turboimagehost.com', 'imageban.ru', 'img.yt', 'acidimg.cc',
+        'pixxxels.cc', 'imx.to', 'imgbb.com', 'ibb.co',
+    ]
+
+    def __init__(self, service_id: str, base_url: str, cookie_path: str, log_callback=None):
+        self.SERVICE_ID = service_id
+        self.BASE_URL = base_url.rstrip('/')
+        self.COOKIE_PATH = cookie_path
+        self._init_logger('PaidContent', log_callback, default_module=service_id)
+        self._cookies: Optional[Dict[str, str]] = None
+        self._image_host_handler = None
+
+    # ------------------------------------------------------------------
+    # Cookie handling
+    # ------------------------------------------------------------------
+
+    def _load_cookies(self) -> Dict[str, str]:
+        """Load Playwright-format cookies and convert to {name: value} dict."""
+        if self._cookies is not None:
+            return self._cookies
+
+        try:
+            cookie_path = Path(self.COOKIE_PATH)
+            if cookie_path.exists():
+                with open(cookie_path, 'r') as f:
+                    raw_cookies = json.load(f)
+                self._cookies = {c['name']: c['value'] for c in raw_cookies}
+                self.log(f"Loaded {len(self._cookies)} cookies from {self.COOKIE_PATH}", 'debug')
+            else:
+                self.log(f"Cookie file not found: {self.COOKIE_PATH}", 'warning')
+                self._cookies = {}
+        except Exception as e:
+            self.log(f"Error loading cookies: {e}", 'warning')
+            self._cookies = {}
+
+        return self._cookies
+
+    def _get_cookie_header(self) -> str:
+        """Build Cookie header string from loaded cookies."""
+        cookies = self._load_cookies()
+        return '; '.join(f'{k}={v}' for k, v in cookies.items())
+
+    def _get_request_headers(self) -> Dict[str, str]:
+        """Get headers with cookies for authenticated requests."""
+        headers = dict(self.HEADERS)
+        cookie_str = self._get_cookie_header()
+        if cookie_str:
+            headers['Cookie'] = cookie_str
+        return headers
+
+    # ------------------------------------------------------------------
+    # Image host handling
+    # ------------------------------------------------------------------
+
+    def _get_image_host_handler(self):
+        """Get or create ImageHostHandler instance."""
+        if self._image_host_handler is None:
+            try:
+                from modules.forum_downloader import ImageHostHandler
+                self._image_host_handler = ImageHostHandler
+                self.log("Loaded ImageHostHandler from forum_downloader", 'debug')
+            except ImportError:
+                self.log("ImageHostHandler not available", 'warning')
+                self._image_host_handler = False  # sentinel to avoid retrying
+        return self._image_host_handler if self._image_host_handler is not False else None
+
+    # ------------------------------------------------------------------
+    # HTTP helpers
+    # ------------------------------------------------------------------
+
+    async def _fetch_page(self, session: aiohttp.ClientSession, url: str) -> Optional[str]:
+        """Fetch a page with cookies. Falls back to FlareSolverr on 403."""
+        headers = self._get_request_headers()
+        try:
+            async with session.get(url, headers=headers, allow_redirects=True) as resp:
+                if resp.status == 200:
+                    return await resp.text()
+                if resp.status == 403:
+                    self.log(f"Got 403 for {url}, trying FlareSolverr", 'debug')
+                    return await self._fetch_via_flaresolverr(url)
+                self.log(f"HTTP {resp.status} for {url}", 'warning')
+                return None
+        except Exception as e:
+            self.log(f"Error fetching {url}: {e}", 'warning')
+            return await self._fetch_via_flaresolverr(url)
+
+    async def _fetch_via_flaresolverr(self, url: str) -> Optional[str]:
+        """Fetch a page using FlareSolverr to bypass Cloudflare."""
+        try:
+            import requests as std_requests
+        except ImportError:
+            self.log("requests library not available for FlareSolverr", 'warning')
+            return None
+
+        fs_session_id = None
+        try:
+            # Create session
+            resp = std_requests.post(self.FLARESOLVERR_URL, json={
+                'cmd': 'sessions.create'
+            }, timeout=30)
+            data = resp.json()
+            if data.get('status') != 'ok':
+                self.log("Failed to create FlareSolverr session", 'warning')
+                return None
+            fs_session_id = data.get('session')
+
+            # Fetch page
+            cookies = self._load_cookies()
+            resp = std_requests.post(self.FLARESOLVERR_URL, json={
+                'cmd': 'request.get',
+                'url': url,
+                'session': fs_session_id,
+                'cookies': [{'name': k, 'value': v} for k, v in cookies.items()],
+                'maxTimeout': 60000,
+            }, timeout=70)
+            page_data = resp.json()
+            if page_data.get('status') == 'ok':
+                return page_data.get('solution', {}).get('response', '')
+            self.log(f"FlareSolverr failed for {url}: {page_data.get('message', 'unknown')}", 'warning')
+            return None
+
+        except Exception as e:
+            self.log(f"FlareSolverr error for {url}: {e}", 'warning')
+            return None
+        finally:
+            if fs_session_id:
+                try:
+                    std_requests.post(self.FLARESOLVERR_URL, json={
+                        'cmd': 'sessions.destroy',
+                        'session': fs_session_id,
+                    }, timeout=10)
+                except Exception:
+                    pass
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    async def search_threads(self, query: str) -> List[Dict]:
+        """Search for threads matching a celebrity name.
+
+        Returns list of {thread_id, title, url, reply_count}.
+        """
+        threads = []
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            # XenForo search: POST form to /search/search
+            search_url = f'{self.BASE_URL}/search/search'
+            headers = self._get_request_headers()
+            headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+            # Need CSRF token - fetch search page first
+            search_page_url = f'{self.BASE_URL}/search/'
+            page_html = await self._fetch_page(session, search_page_url)
+            if not page_html:
+                self.log("Failed to fetch search page", 'warning')
+                return threads
+
+            # Extract CSRF token
+            csrf_match = re.search(r'name="_xfToken"\s+value="([^"]+)"', page_html)
+            xf_token = csrf_match.group(1) if csrf_match else ''
+
+            form_data = {
+                'keywords': query,
+                'search_type': 'post',
+                'c[title_only]': '1',
+                'order': 'date',
+                '_xfToken': xf_token,
+            }
+
+            try:
+                async with session.post(search_url, headers=headers, data=form_data,
+                                        allow_redirects=True) as resp:
+                    if resp.status != 200:
+                        self.log(f"Search returned HTTP {resp.status}", 'warning')
+                        return threads
+                    result_html = await resp.text()
+                    result_url = str(resp.url)
+            except Exception as e:
+                self.log(f"Search failed: {e}", 'error')
+                return threads
+
+            threads = self._parse_search_results(result_html)
+
+            # Handle search result pagination
+            page = 2
+            while True:
+                next_url = self._find_next_search_page(result_html, result_url, page)
+                if not next_url:
+                    break
+                await asyncio.sleep(0.3)
+                result_html = await self._fetch_page(session, next_url)
+                if not result_html:
+                    break
+                more = self._parse_search_results(result_html)
+                if not more:
+                    break
+                threads.extend(more)
+                page += 1
+
+        self.log(f"Search for '{query}' found {len(threads)} threads", 'info')
+        return threads
+
+    async def get_thread_info(self, thread_url: str) -> Optional[Dict]:
+        """Fetch page 1 of a thread and extract metadata.
+
+        Returns {thread_id, title, reply_count, page_count, url}.
+        """
+        timeout = aiohttp.ClientTimeout(total=30)
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                page_html = await self._fetch_page(session, thread_url)
+                if not page_html:
+                    return None
+
+                title = self._extract_title(page_html)
+                page_count = self._extract_page_count(page_html)
+                reply_count = self._extract_reply_count(page_html)
+                thread_id = self._extract_thread_id(thread_url)
+
+                return {
+                    'thread_id': thread_id,
+                    'title': title or 'Untitled',
+                    'reply_count': reply_count,
+                    'page_count': page_count,
+                    'url': thread_url.split('#')[0].rstrip('/'),
+                }
+        except Exception as e:
+            self.log(f"Error getting thread info for {thread_url}: {e}", 'error')
+            return None
+
+    async def get_thread_images(self, thread_url: str, page_count: int = None,
+                                start_page: int = 1) -> List[Dict]:
+        """Scrape all pages of a thread and extract image host links.
+
+        Returns list of {url, host, post_number} dicts (deduplicated).
+        """
+        images = []
+        seen_urls: Set[str] = set()
+
+        timeout = aiohttp.ClientTimeout(total=30)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            # If page_count not provided, fetch page 1 to determine it
+            if page_count is None:
+                page1_html = await self._fetch_page(session, thread_url)
+                if not page1_html:
+                    return images
+                page_count = self._extract_page_count(page1_html)
+                page_images = self._extract_image_links(page1_html)
+                for img in page_images:
+                    if img['url'] not in seen_urls:
+                        seen_urls.add(img['url'])
+                        images.append(img)
+                start_page = 2
+
+            for page_num in range(start_page, page_count + 1):
+                page_url = self._build_page_url(thread_url, page_num)
+                await asyncio.sleep(0.5)  # Rate limit
+
+                page_html = await self._fetch_page(session, page_url)
+                if not page_html:
+                    self.log(f"Failed to fetch page {page_num}, stopping", 'warning')
+                    break
+
+                page_images = self._extract_image_links(page_html)
+                new_count = 0
+                for img in page_images:
+                    if img['url'] not in seen_urls:
+                        seen_urls.add(img['url'])
+                        images.append(img)
+                        new_count += 1
+
+                self.log(f"Page {page_num}/{page_count}: {new_count} new image links", 'debug')
+
+        self.log(f"Total: {len(images)} unique image links from {page_count} pages", 'info')
+        return images
+
+    async def resolve_image_url(self, host_page_url: str, session: aiohttp.ClientSession = None) -> Optional[str]:
+        """Resolve an image host page URL to a direct image URL.
+
+        Uses ImageHostHandler from forum_downloader where possible.
+        """
+        handler = self._get_image_host_handler()
+
+        # Try direct extraction without fetching the page
+        if handler:
+            direct = handler.extract_direct_url(host_page_url)
+            if direct:
+                return direct
+
+        # imgbox thumbnail → full image conversion (thumbs2 → images2)
+        m = re.match(r'https?://thumbs(\d*)\.imgbox\.com/([a-f0-9]+/[a-f0-9]+/)(\w+)_t\.\w+', host_page_url)
+        if m:
+            return f"https://images{m.group(1)}.imgbox.com/{m.group(2)}{m.group(3)}_o.jpg"
+
+        # For hosts that need page content, fetch and parse
+        own_session = session is None
+        if own_session:
+            timeout = aiohttp.ClientTimeout(total=30)
+            session = aiohttp.ClientSession(timeout=timeout)
+
+        try:
+            # ImageBam requires sfw_inter=1 cookie to bypass consent page
+            headers = dict(self.HEADERS)
+            if 'imagebam' in host_page_url:
+                headers['Cookie'] = 'sfw_inter=1'
+
+            try:
+                async with session.get(host_page_url, headers=headers,
+                                       allow_redirects=True) as resp:
+                    if resp.status != 200:
+                        return None
+                    page_content = await resp.text()
+                    final_url = str(resp.url)
+            except Exception as e:
+                self.log(f"Failed to fetch image host page {host_page_url}: {e}", 'debug')
+                return None
+
+            # Try handler with page content
+            if handler:
+                direct = handler.extract_direct_url(host_page_url, page_content=page_content)
+                if direct:
+                    return direct
+
+            # Manual extraction fallbacks
+            return self._extract_direct_image_from_html(host_page_url, page_content, final_url)
+
+        finally:
+            if own_session:
+                await session.close()
+
+    # ------------------------------------------------------------------
+    # HTML parsing helpers
+    # ------------------------------------------------------------------
+
+    def _parse_search_results(self, html_content: str) -> List[Dict]:
+        """Parse XenForo search results page for thread links."""
+        threads = []
+
+        # Parse each contentRow block to extract title, URL, and date
+        for block_match in re.finditer(
+            r'<div\s+class="contentRow[^"]*"[^>]*>(.*?)</div>\s*</div>\s*</div>',
+            html_content, re.DOTALL
+        ):
+            block = block_match.group(1)
+
+            # Extract thread URL and title
+            title_match = re.search(
+                r'class="contentRow-title">\s*<a\s+href="([^"]*threads/[^"]*)"[^>]*>(.*?)</a>',
+                block, re.DOTALL
+            )
+            if not title_match:
+                continue
+
+            url = title_match.group(1)
+            title_raw = title_match.group(2)
+            title_raw = re.sub(r'<span\s+class="label[^"]*"[^>]*>.*?</span>', '', title_raw)
+            title_raw = re.sub(r'<span\s+class="label-append"[^>]*>.*?</span>', '', title_raw)
+            title_raw = re.sub(r'<em\s+class="textHighlight"[^>]*>(.*?)</em>', r'\1', title_raw)
+            title = html.unescape(re.sub(r'<[^>]+>', '', title_raw).strip())
+
+            if not title:
+                continue
+
+            if not url.startswith('http'):
+                url = self.BASE_URL + url
+
+            thread_id = self._extract_thread_id(url)
+            if not thread_id:
+                continue
+
+            # Extract date from <time datetime="..."> tag
+            published_at = None
+            time_match = re.search(r'<time[^>]+datetime="([^"]+)"', block)
+            if time_match:
+                published_at = time_match.group(1)
+
+            threads.append({
+                'thread_id': thread_id,
+                'title': title,
+                'url': url.split('#')[0].rstrip('/'),
+                'reply_count': 0,
+                'published_at': published_at,
+            })
+
+        # Fallback: if contentRow block parsing found nothing, try simpler title-only parsing
+        if not threads:
+            for m in re.finditer(
+                r'class="contentRow-title">\s*<a\s+href="([^"]*threads/[^"]*)"[^>]*>(.*?)</a>',
+                html_content, re.DOTALL
+            ):
+                url = m.group(1)
+                title_raw = m.group(2)
+                title_raw = re.sub(r'<span\s+class="label[^"]*"[^>]*>.*?</span>', '', title_raw)
+                title_raw = re.sub(r'<span\s+class="label-append"[^>]*>.*?</span>', '', title_raw)
+                title_raw = re.sub(r'<em\s+class="textHighlight"[^>]*>(.*?)</em>', r'\1', title_raw)
+                title = html.unescape(re.sub(r'<[^>]+>', '', title_raw).strip())
+                if not title:
+                    continue
+                if not url.startswith('http'):
+                    url = self.BASE_URL + url
+                thread_id = self._extract_thread_id(url)
+                if not thread_id:
+                    continue
+                threads.append({
+                    'thread_id': thread_id,
+                    'title': title,
+                    'url': url.split('#')[0].rstrip('/'),
+                    'reply_count': 0,
+                    'published_at': None,
+                })
+
+        # Deduplicate by thread_id
+        seen = set()
+        unique = []
+        for t in threads:
+            if t['thread_id'] not in seen:
+                seen.add(t['thread_id'])
+                unique.append(t)
+
+        return unique
+
+    def _find_next_search_page(self, html_content: str, current_url: str, page_num: int) -> Optional[str]:
+        """Find URL for the next page of search results."""
+        # XenForo pagination: <a href="...page-{N}..." class="pageNav-page">
+        pattern = rf'<a\s+href="([^"]*)"[^>]*class="pageNav-jump[^"]*"[^>]*>\s*Next'
+        m = re.search(pattern, html_content, re.IGNORECASE)
+        if m:
+            url = m.group(1)
+            if not url.startswith('http'):
+                url = self.BASE_URL + html.unescape(url)
+            return url
+        return None
+
+    # Domains/patterns for non-content images (reaction GIFs, emojis, signatures, etc.)
+    JUNK_URL_PATTERNS = [
+        'giphy.com', 'tenor.com', 'gfycat.com',          # reaction GIFs
+        'jsdelivr.net', 'joypixels', 'twemoji',           # emoji CDNs
+        'wp-content/',                                      # WordPress media (blog graphics, profile pics)
+        '/unicode/', '/emoji/',                             # emoji paths
+        'haboodadi.com',                                    # forum signature images
+    ]
+
+    # Image hosts that are permanently dead (DNS gone / domain expired)
+    DEAD_HOSTS = [
+        'someimage.com',
+    ]
+
+    def _extract_image_links(self, page_html: str) -> List[Dict]:
+        """Extract image host links from all posts on a page."""
+        images = []
+
+        # Find all message bodies: XenForo uses <article class="message ..."> and
+        # <div class="bbWrapper"> for post content
+        for content_match in re.finditer(
+            r'<div\s+class="bbWrapper">(.*?)</div>\s*(?:</div>|<div\s+class="(?:js-post|message))',
+            page_html, re.DOTALL
+        ):
+            content = content_match.group(1)
+
+            # Extract links to known image hosts
+            for link_match in re.finditer(r'<a\s+[^>]*href="([^"]+)"[^>]*>', content):
+                link_url = html.unescape(link_match.group(1))
+                if self._is_image_host_url(link_url) and not self._is_junk_url(link_url):
+                    images.append({'url': link_url, 'host': self._identify_host(link_url)})
+
+            # Also catch direct image URLs (full-size, not thumbnails)
+            # NOTE: Skip images hosted on known image host CDNs (imgbox, imgur, etc.)
+            # — legitimate gallery images are posted as <a href> links to host pages
+            #   (handled above), while inline <img> from these hosts are signatures.
+            for img_match in re.finditer(r'<img\s+[^>]*src="([^"]+)"[^>]*>', content):
+                img_url = html.unescape(img_match.group(1))
+                # Skip thumbnails, avatars, smilies, and junk
+                if any(skip in img_url.lower() for skip in [
+                    'thumb', 'avatar', 'smili', 'emoji', 'icon', 'logo',
+                    'data/assets', '/styles/', 'xenforo'
+                ]):
+                    continue
+                if self._is_junk_url(img_url):
+                    continue
+                # Skip inline images from known image hosts — these are signatures,
+                # not gallery content (gallery images come through as <a> links above)
+                if self._is_image_host_url(img_url):
+                    continue
+                if self._is_direct_image_url(img_url):
+                    images.append({'url': img_url, 'host': 'direct'})
+
+        return images
+
+    def _is_junk_url(self, url: str) -> bool:
+        """Filter out non-content images: reaction GIFs, emojis, blog graphics, dead hosts, etc."""
+        url_lower = url.lower()
+        if any(pat in url_lower for pat in self.JUNK_URL_PATTERNS):
+            return True
+        if any(host in url_lower for host in self.DEAD_HOSTS):
+            return True
+        return False
+
+    def _is_image_host_url(self, url: str) -> bool:
+        """Check if a URL belongs to a known image hosting service."""
+        try:
+            domain = urlparse(url).netloc.lower()
+            return any(host in domain for host in self.IMAGE_HOST_DOMAINS)
+        except Exception:
+            return False
+
+    def _is_direct_image_url(self, url: str) -> bool:
+        """Check if a URL points directly to an image file."""
+        try:
+            path = urlparse(url).path.lower()
+            return any(path.endswith(f'.{ext}') for ext in self.IMAGE_EXTS)
+        except Exception:
+            return False
+
+    def _identify_host(self, url: str) -> str:
+        """Identify which image host a URL belongs to."""
+        handler = self._get_image_host_handler()
+        if handler:
+            host = handler.identify_host(url)
+            if host:
+                return host
+        # Fallback
+        try:
+            domain = urlparse(url).netloc.lower()
+            for host_domain in self.IMAGE_HOST_DOMAINS:
+                if host_domain in domain:
+                    return host_domain.split('.')[0]
+        except Exception:
+            pass
+        return 'unknown'
+
+    def _extract_direct_image_from_html(self, url: str, page_content: str, final_url: str) -> Optional[str]:
+        """Manually extract direct image URL from host page HTML."""
+        domain = urlparse(url).netloc.lower()
+
+        # imagebam: <img class="main-image ..." src="..."> (class may have extra classes)
+        if 'imagebam' in domain:
+            m = re.search(r'<img\s+[^>]*src="(https?://images\d*\.imagebam\.com/[^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            m = re.search(r'<img\s+[^>]*class="main-image[^"]*"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            # Alternative: og:image meta tag
+            m = re.search(r'<meta\s+property="og:image"\s+content="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # pixhost: <img id="image" src="..."> or img.pixhost.to URL
+        if 'pixhost' in domain:
+            m = re.search(r'<img\s+[^>]*id="image"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            # Convert thumbnail URL to full: t{N}.pixhost.to/thumbs/ -> img{N}.pixhost.to/images/
+            m = re.search(r'https?://t(\d+)\.pixhost\.to/thumbs/(\d+)/(.+)', url)
+            if m:
+                return f"https://img{m.group(1)}.pixhost.to/images/{m.group(2)}/{m.group(3)}"
+
+        # imagetwist: <img class="pic" src="...">
+        if 'imagetwist' in domain:
+            m = re.search(r'<img\s+[^>]*class="pic"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            m = re.search(r'<p\s+[^>]*style="text-align:center"[^>]*>\s*<img\s+[^>]*src="([^"]+)"',
+                          page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # imgbox: <img id="img" src="..."> or src before id
+        if 'imgbox' in domain:
+            m = re.search(r'<img\s+[^>]*id="img"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            m = re.search(r'<img\s+[^>]*src="([^"]+)"[^>]*id="img"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+            # Direct image URL pattern
+            m = re.search(r'(https?://images\d*\.imgbox\.com/[^\s"<>]+)', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # turboimagehost: <img class="uImage" src="...">
+        if 'turboimagehost' in domain:
+            m = re.search(r'<img\s+[^>]*class="uImage"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # acidimg: <img class="centred" src="...">
+        if 'acidimg' in domain:
+            m = re.search(r'<img\s+[^>]*class="centred"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # pixxxels: same pattern as acidimg
+        if 'pixxxels' in domain:
+            m = re.search(r'<img\s+[^>]*class="centred"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # imx.to: <img class="image-show" src="...">
+        if 'imx.to' in domain:
+            m = re.search(r'<img\s+[^>]*class="image-show"[^>]*src="([^"]+)"', page_content)
+            if m:
+                return html.unescape(m.group(1))
+
+        # Generic: try og:image meta tag
+        m = re.search(r'<meta\s+property="og:image"\s+content="([^"]+)"', page_content)
+        if m:
+            img_url = html.unescape(m.group(1))
+            if self._is_direct_image_url(img_url):
+                return img_url
+
+        return None
+
+    # ------------------------------------------------------------------
+    # Utility helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_title(page_html: str) -> Optional[str]:
+        """Extract thread title from XenForo <h1 class="p-title-value">."""
+        m = re.search(r'<h1\s+class="p-title-value"[^>]*>(.*?)</h1>', page_html, re.DOTALL)
+        if m:
+            # Remove inner tags (like <span> for prefixes/labels, viewer count spans)
+            title = re.sub(r'<[^>]+>', '', m.group(1))
+            # Clean up non-breaking spaces and extra whitespace
+            title = title.replace('\xa0', ' ')
+            title = re.sub(r'\s*\(\d+\s*Viewer[s]?\)', '', title)  # Remove "(1 Viewer)"
+            title = re.sub(r'\s+', ' ', title).strip()
+            return html.unescape(title)
+        # Fallback: <title> — strip common XenForo site name suffixes
+        m = re.search(r'<title>([^<]+)</title>', page_html, re.IGNORECASE)
+        if m:
+            title = html.unescape(m.group(1).strip())
+            title = re.sub(r'\s*[-–—|]\s*(?:HQCelebCorner|PicturePub|XenForo).*$', '', title, flags=re.IGNORECASE).strip()
+            return title
+        return None
+
+    @staticmethod
+    def _extract_page_count(page_html: str) -> int:
+        """Extract total page count from XenForo pagination."""
+        # <li class="pageNav-page"><a href="...">42</a></li>
+        pages = re.findall(r'<li\s+class="pageNav-page[^"]*">\s*<a[^>]*>(\d+)</a>', page_html)
+        if pages:
+            return max(int(p) for p in pages)
+        return 1
+
+    @staticmethod
+    def _extract_reply_count(page_html: str) -> int:
+        """Extract reply count from XenForo thread info."""
+        # <dl class="pairs pairs--inline"><dt>Replies</dt><dd>123</dd></dl>
+        m = re.search(r'<dt>Replies</dt>\s*<dd>([\d,]+)</dd>', page_html)
+        if m:
+            return int(m.group(1).replace(',', ''))
+        return 0
+
+    @staticmethod
+    def _extract_thread_id(url: str) -> Optional[str]:
+        """Extract thread ID from XenForo URL.
+
+        Handles both formats:
+        - /threads/title.12345/
+        - /index.php?threads/title.12345/
+        """
+        m = re.search(r'threads/[^/]*?\.(\d+)', url)
+        if m:
+            return m.group(1)
+        # Fallback: just /threads/{id}/
+        m = re.search(r'threads/(\d+)', url)
+        if m:
+            return m.group(1)
+        return None
+
+    @staticmethod
+    def _build_page_url(thread_url: str, page_num: int) -> str:
+        """Build paginated thread URL for XenForo.
+
+        Handles: /index.php?threads/slug.12345/page-2
+        """
+        # Remove existing page- suffix and fragment
+        base = thread_url.split('#')[0].rstrip('/')
+        base = re.sub(r'/page-\d+$', '', base)
+        if page_num == 1:
+            return base + '/'
+        return f'{base}/page-{page_num}'
+
+    @staticmethod
+    def _get_extension(filename_or_url: str) -> str:
+        """Get lowercase file extension."""
+        clean = filename_or_url.split('?')[0].split('#')[0]
+        if '.' in clean.split('/')[-1]:
+            return clean.rsplit('.', 1)[-1].lower()
+        return ''
+
+    @staticmethod
+    def _filename_from_url(url: str) -> str:
+        """Extract filename from URL path."""
+        path = urlparse(url).path
+        name = path.rstrip('/').split('/')[-1]
+        return name if name else 'unnamed.jpg'
--- a/modules/paid_content/xhamster_client.py
+++ b/modules/paid_content/xhamster_client.py
--- a/modules/paid_content/youtube_client.py
+++ b/modules/paid_content/youtube_client.py
--- a/modules/pg_adapter.py
+++ b/modules/pg_adapter.py
--- a/modules/plex_client.py
+++ b/modules/plex_client.py
@@ -0,0 +1,690 @@
+"""Plex Media Server client for linking appearances to library items"""
+import asyncio
+import uuid
+from typing import Dict, List, Optional, Any
+from web.backend.core.http_client import http_client
+from modules.universal_logger import get_logger
+
+logger = get_logger('Plex')
+
+# Plex API constants
+PLEX_TV_API = "https://plex.tv/api/v2"
+PLEX_AUTH_URL = "https://app.plex.tv/auth"
+CLIENT_IDENTIFIER = "media-downloader-appearances"
+PRODUCT_NAME = "Media Downloader"
+
+
+class PlexOAuth:
+    """Handle Plex OAuth PIN-based authentication flow"""
+
+    def __init__(self):
+        self._headers = {
+            'Accept': 'application/json',
+            'X-Plex-Client-Identifier': CLIENT_IDENTIFIER,
+            'X-Plex-Product': PRODUCT_NAME,
+            'X-Plex-Version': '1.0.0',
+            'X-Plex-Device': 'Web',
+            'X-Plex-Platform': 'Web',
+        }
+
+    async def create_pin(self) -> Optional[Dict]:
+        """
+        Create a new PIN for authentication.
+
+        Returns:
+            Dict with 'id', 'code', and 'auth_url' or None on failure
+        """
+        try:
+            url = f"{PLEX_TV_API}/pins"
+            response = await http_client.post(
+                url,
+                headers=self._headers,
+                data={'strong': 'true'}
+            )
+            data = response.json()
+
+            pin_id = data.get('id')
+            pin_code = data.get('code')
+
+            if pin_id and pin_code:
+                # Build the auth URL for the user to visit
+                auth_url = (
+                    f"{PLEX_AUTH_URL}#?"
+                    f"clientID={CLIENT_IDENTIFIER}&"
+                    f"code={pin_code}&"
+                    f"context%5Bdevice%5D%5Bproduct%5D={PRODUCT_NAME.replace(' ', '%20')}"
+                )
+
+                logger.info(f"Created Plex PIN {pin_id}")
+                return {
+                    'id': pin_id,
+                    'code': pin_code,
+                    'auth_url': auth_url,
+                    'expires_at': data.get('expiresAt'),
+                }
+
+            return None
+
+        except Exception as e:
+            logger.error(f"Failed to create Plex PIN: {e}")
+            return None
+
+    async def check_pin(self, pin_id: int) -> Optional[str]:
+        """
+        Check if the user has authenticated with the PIN.
+
+        Args:
+            pin_id: The PIN ID returned from create_pin
+
+        Returns:
+            The auth token if authenticated, None if still pending or expired
+        """
+        try:
+            url = f"{PLEX_TV_API}/pins/{pin_id}"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+
+            auth_token = data.get('authToken')
+            if auth_token:
+                logger.info("Plex authentication successful")
+                return auth_token
+
+            return None
+
+        except Exception as e:
+            logger.error(f"Failed to check Plex PIN: {e}")
+            return None
+
+    async def wait_for_auth(self, pin_id: int, timeout: int = 120, poll_interval: int = 2) -> Optional[str]:
+        """
+        Poll for authentication completion.
+
+        Args:
+            pin_id: The PIN ID to check
+            timeout: Maximum seconds to wait
+            poll_interval: Seconds between checks
+
+        Returns:
+            The auth token if successful, None on timeout/failure
+        """
+        elapsed = 0
+        while elapsed < timeout:
+            token = await self.check_pin(pin_id)
+            if token:
+                return token
+
+            await asyncio.sleep(poll_interval)
+            elapsed += poll_interval
+
+        logger.warning(f"Plex authentication timed out after {timeout}s")
+        return None
+
+    async def get_user_info(self, token: str) -> Optional[Dict]:
+        """
+        Get information about the authenticated user.
+
+        Args:
+            token: Plex auth token
+
+        Returns:
+            User info dict or None
+        """
+        try:
+            url = f"{PLEX_TV_API}/user"
+            headers = {**self._headers, 'X-Plex-Token': token}
+            response = await http_client.get(url, headers=headers)
+            data = response.json()
+
+            return {
+                'username': data.get('username'),
+                'email': data.get('email'),
+                'thumb': data.get('thumb'),
+                'title': data.get('title'),
+            }
+
+        except Exception as e:
+            logger.error(f"Failed to get Plex user info: {e}")
+            return None
+
+    async def get_user_servers(self, token: str) -> List[Dict]:
+        """
+        Get list of Plex servers available to the user.
+
+        Args:
+            token: Plex auth token
+
+        Returns:
+            List of server dictionaries
+        """
+        try:
+            url = f"{PLEX_TV_API}/resources"
+            headers = {**self._headers, 'X-Plex-Token': token}
+            params = {'includeHttps': 1, 'includeRelay': 1}
+            response = await http_client.get(url, headers=headers, params=params)
+            data = response.json()
+
+            servers = []
+            for resource in data:
+                if resource.get('provides') == 'server':
+                    connections = resource.get('connections', [])
+
+                    # Prefer non-local (relay/remote) connections for server-to-server communication
+                    # Local connections often use internal IPs that aren't reachable externally
+                    remote_conn = next((c for c in connections if not c.get('local') and c.get('relay')), None)
+                    https_conn = next((c for c in connections if not c.get('local') and 'https' in c.get('uri', '')), None)
+                    any_remote = next((c for c in connections if not c.get('local')), None)
+                    local_conn = next((c for c in connections if c.get('local')), None)
+
+                    # Try in order: relay, https remote, any remote, local
+                    best_conn = remote_conn or https_conn or any_remote or local_conn or (connections[0] if connections else None)
+
+                    if best_conn:
+                        # Also include all connection URLs for debugging/manual selection
+                        all_urls = [{'url': c.get('uri'), 'local': c.get('local', False), 'relay': c.get('relay', False)} for c in connections]
+
+                        servers.append({
+                            'name': resource.get('name'),
+                            'machineIdentifier': resource.get('clientIdentifier'),
+                            'owned': resource.get('owned', False),
+                            'url': best_conn.get('uri'),
+                            'local': best_conn.get('local', False),
+                            'relay': best_conn.get('relay', False),
+                            'accessToken': resource.get('accessToken'),
+                            'all_connections': all_urls,
+                        })
+
+            return servers
+
+        except Exception as e:
+            logger.error(f"Failed to get Plex servers: {e}")
+            return []
+
+
+class PlexClient:
+    """Client for interacting with Plex Media Server API"""
+
+    def __init__(self, base_url: str, token: str):
+        """
+        Initialize Plex client.
+
+        Args:
+            base_url: Plex server URL (e.g., 'http://192.168.1.100:32400')
+            token: Plex authentication token
+        """
+        self.base_url = base_url.rstrip('/')
+        self.token = token
+        self._headers = {
+            'X-Plex-Token': token,
+            'Accept': 'application/json'
+        }
+
+    async def test_connection(self) -> bool:
+        """
+        Test connection to Plex server.
+
+        Returns:
+            True if connection successful, False otherwise
+        """
+        try:
+            url = f"{self.base_url}/identity"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+            server_name = data.get('MediaContainer', {}).get('friendlyName', 'Unknown')
+            logger.info(f"Connected to Plex server: {server_name}")
+            return True
+        except Exception as e:
+            logger.error(f"Plex connection test failed: {e}")
+            return False
+
+    async def get_libraries(self) -> List[Dict]:
+        """
+        Get list of Plex libraries.
+
+        Returns:
+            List of library dictionaries with id, title, type
+        """
+        try:
+            url = f"{self.base_url}/library/sections"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+
+            libraries = []
+            for section in data.get('MediaContainer', {}).get('Directory', []):
+                libraries.append({
+                    'id': section.get('key'),
+                    'title': section.get('title'),
+                    'type': section.get('type'),  # 'movie', 'show', 'artist', etc.
+                    'uuid': section.get('uuid'),
+                })
+            return libraries
+        except Exception as e:
+            logger.error(f"Failed to get Plex libraries: {e}")
+            return []
+
+    async def search_by_tmdb_id(self, tmdb_id: int, media_type: str = 'movie') -> Optional[Dict]:
+        """
+        Search for an item in Plex library by TMDB ID.
+
+        Args:
+            tmdb_id: The Movie Database ID
+            media_type: 'movie' or 'show'
+
+        Returns:
+            Plex item dict with ratingKey, title, etc. or None if not found
+        """
+        try:
+            # Plex uses guid format like: tmdb://12345
+            guid = f"tmdb://{tmdb_id}"
+
+            # Search across all libraries
+            url = f"{self.base_url}/library/all"
+            params = {
+                'guid': guid,
+                'type': 1 if media_type == 'movie' else 2  # 1=movie, 2=show
+            }
+            response = await http_client.get(url, headers=self._headers, params=params)
+            data = response.json()
+
+            items = data.get('MediaContainer', {}).get('Metadata', [])
+            if items:
+                item = items[0]
+                return {
+                    'ratingKey': item.get('ratingKey'),
+                    'title': item.get('title'),
+                    'year': item.get('year'),
+                    'thumb': item.get('thumb'),
+                    'type': item.get('type'),
+                    'librarySectionID': item.get('librarySectionID'),
+                }
+            return None
+        except Exception as e:
+            logger.debug(f"TMDB search failed for {tmdb_id}: {e}")
+            return None
+
+    async def search_by_title(self, title: str, year: Optional[int] = None,
+                              media_type: str = 'movie') -> Optional[Dict]:
+        """
+        Search for an item in Plex library by title.
+
+        Args:
+            title: Movie or show title
+            year: Optional release year for more accurate matching
+            media_type: 'movie' or 'show'
+
+        Returns:
+            Plex item dict or None if not found
+        """
+        try:
+            url = f"{self.base_url}/search"
+            params = {
+                'query': title,
+                'type': 1 if media_type == 'movie' else 2
+            }
+            response = await http_client.get(url, headers=self._headers, params=params)
+            data = response.json()
+
+            items = data.get('MediaContainer', {}).get('Metadata', [])
+
+            # If year provided, filter for matching year
+            if year and items:
+                for item in items:
+                    if item.get('year') == year:
+                        return {
+                            'ratingKey': item.get('ratingKey'),
+                            'title': item.get('title'),
+                            'year': item.get('year'),
+                            'thumb': item.get('thumb'),
+                            'type': item.get('type'),
+                            'librarySectionID': item.get('librarySectionID'),
+                        }
+
+            # Return first result if no exact year match
+            if items:
+                item = items[0]
+                return {
+                    'ratingKey': item.get('ratingKey'),
+                    'title': item.get('title'),
+                    'year': item.get('year'),
+                    'thumb': item.get('thumb'),
+                    'type': item.get('type'),
+                    'librarySectionID': item.get('librarySectionID'),
+                }
+            return None
+        except Exception as e:
+            logger.debug(f"Title search failed for '{title}': {e}")
+            return None
+
+    async def get_episode(self, show_rating_key: str, season: int, episode: int) -> Optional[Dict]:
+        """
+        Get a specific episode from a TV show.
+
+        Args:
+            show_rating_key: Plex ratingKey for the show
+            season: Season number
+            episode: Episode number
+
+        Returns:
+            Episode dict with ratingKey, title, etc. or None if not found
+        """
+        try:
+            # Get all episodes of the show
+            url = f"{self.base_url}/library/metadata/{show_rating_key}/allLeaves"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+
+            episodes = data.get('MediaContainer', {}).get('Metadata', [])
+            for ep in episodes:
+                if ep.get('parentIndex') == season and ep.get('index') == episode:
+                    return {
+                        'ratingKey': ep.get('ratingKey'),
+                        'title': ep.get('title'),
+                        'season': season,
+                        'episode': episode,
+                        'show_rating_key': show_rating_key,
+                        'type': 'episode',
+                    }
+            return None
+        except Exception as e:
+            logger.debug(f"Episode search failed for S{season}E{episode}: {e}")
+            return None
+
+    async def get_all_episodes(self, show_rating_key: str) -> Dict[tuple, Dict]:
+        """
+        Get all episodes for a TV show, indexed by (season, episode) tuple.
+
+        Args:
+            show_rating_key: Plex ratingKey for the show
+
+        Returns:
+            Dict mapping (season_num, episode_num) to episode info
+        """
+        episodes_map = {}
+        try:
+            url = f"{self.base_url}/library/metadata/{show_rating_key}/allLeaves"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+
+            episodes = data.get('MediaContainer', {}).get('Metadata', [])
+            for ep in episodes:
+                season = ep.get('parentIndex')
+                episode = ep.get('index')
+                if season is not None and episode is not None:
+                    episodes_map[(season, episode)] = {
+                        'ratingKey': ep.get('ratingKey'),
+                        'title': ep.get('title'),
+                        'season': season,
+                        'episode': episode,
+                        'show_rating_key': show_rating_key,
+                        'air_date': ep.get('originallyAvailableAt'),
+                    }
+
+            logger.debug(f"Found {len(episodes_map)} episodes for show {show_rating_key}")
+            return episodes_map
+
+        except Exception as e:
+            logger.debug(f"Failed to get episodes for show {show_rating_key}: {e}")
+            return {}
+
+    def get_watch_url(self, rating_key: str) -> str:
+        """
+        Generate a direct watch URL for a Plex item.
+
+        Args:
+            rating_key: Plex ratingKey for the item
+
+        Returns:
+            URL to open the item in Plex Web
+        """
+        # Extract server machine identifier from base URL or use a generic format
+        # Plex Web URL format: /web/index.html#!/server/{machineId}/details?key=/library/metadata/{ratingKey}
+        return f"{self.base_url}/web/index.html#!/server/1/details?key=%2Flibrary%2Fmetadata%2F{rating_key}"
+
+    async def get_server_identity(self) -> Optional[Dict]:
+        """
+        Get Plex server identity including machine identifier.
+
+        Returns:
+            Server identity dict or None
+        """
+        try:
+            url = f"{self.base_url}/identity"
+            response = await http_client.get(url, headers=self._headers)
+            data = response.json()
+            container = data.get('MediaContainer', {})
+            return {
+                'machineIdentifier': container.get('machineIdentifier'),
+                'friendlyName': container.get('friendlyName'),
+                'version': container.get('version'),
+            }
+        except Exception as e:
+            logger.error(f"Failed to get server identity: {e}")
+            return None
+
+    def get_full_watch_url(self, rating_key: str, machine_id: str) -> str:
+        """
+        Generate a complete Plex watch URL with machine identifier.
+
+        Args:
+            rating_key: Plex ratingKey for the item
+            machine_id: Plex server machine identifier
+
+        Returns:
+            Complete Plex Web URL
+        """
+        encoded_key = f"%2Flibrary%2Fmetadata%2F{rating_key}"
+        return f"{self.base_url}/web/index.html#!/server/{machine_id}/details?key={encoded_key}"
+
+    async def search_by_actor(self, actor_name: str) -> List[Dict]:
+        """
+        Search Plex library for all movies and TV shows featuring an actor.
+
+        Uses Plex's actor filter to find all content with the actor in cast.
+
+        Args:
+            actor_name: Name of the actor to search for
+
+        Returns:
+            List of appearances with show/movie info and role details
+        """
+        appearances = []
+        seen_keys = set()  # Track to avoid duplicates
+        actor_name_lower = actor_name.lower()
+
+        try:
+            # Get all libraries
+            libraries = await self.get_libraries()
+
+            for library in libraries:
+                lib_key = library.get('id')
+                lib_type = library.get('type')
+
+                # Only search movie and show libraries
+                if lib_type not in ('movie', 'show'):
+                    continue
+
+                try:
+                    # Use actor filter to find all content featuring this actor
+                    # This is the most reliable method in Plex
+                    url = f"{self.base_url}/library/sections/{lib_key}/all"
+                    params = {
+                        'type': 1 if lib_type == 'movie' else 2,  # 1=movie, 2=show
+                        'actor': actor_name,  # Plex accepts actor name directly
+                    }
+
+                    response = await http_client.get(url, headers=self._headers, params=params)
+                    data = response.json()
+                    items = data.get('MediaContainer', {}).get('Metadata', [])
+
+                    logger.debug(f"Found {len(items)} {lib_type}s for '{actor_name}' in library {library.get('title')}")
+
+                    for item in items:
+                        rating_key = item.get('ratingKey')
+                        if not rating_key or rating_key in seen_keys:
+                            continue
+
+                        seen_keys.add(rating_key)
+
+                        # Get detailed metadata for character name
+                        detail_url = f"{self.base_url}/library/metadata/{rating_key}"
+                        detail_response = await http_client.get(detail_url, headers=self._headers)
+                        detail_data = detail_response.json()
+                        detail_items = detail_data.get('MediaContainer', {}).get('Metadata', [])
+
+                        if not detail_items:
+                            continue
+
+                        detail = detail_items[0]
+
+                        # Find the actor's role/character name
+                        character_name = None
+                        roles = detail.get('Role', [])
+                        for role in roles:
+                            role_tag = (role.get('tag') or '').lower()
+                            if actor_name_lower in role_tag or role_tag in actor_name_lower:
+                                character_name = role.get('role')
+                                break
+
+                        # Build poster URL with auth token
+                        thumb = detail.get('thumb')
+                        poster_url = None
+                        if thumb:
+                            poster_url = f"{self.base_url}{thumb}?X-Plex-Token={self.token}"
+
+                        # Build appearance data
+                        appearance = {
+                            'appearance_type': 'Movie' if lib_type == 'movie' else 'TV',
+                            'show_name': detail.get('title'),
+                            'episode_title': None,
+                            'network': detail.get('studio'),
+                            'appearance_date': detail.get('originallyAvailableAt'),
+                            'year': detail.get('year'),
+                            'status': 'aired',
+                            'description': detail.get('summary'),
+                            'poster_url': poster_url,
+                            'credit_type': 'acting',
+                            'character_name': character_name,
+                            'plex_rating_key': rating_key,
+                            'plex_library_id': lib_key,
+                            'source': 'plex',
+                        }
+
+                        # For TV shows, get episode count
+                        if lib_type == 'show':
+                            appearance['episode_count'] = detail.get('leafCount', 1)
+
+                        appearances.append(appearance)
+                        logger.info(f"Found Plex appearance: {actor_name} in '{detail.get('title')}'" +
+                                   (f" as {character_name}" if character_name else ""))
+
+                        # Small delay between detail requests
+                        await asyncio.sleep(0.02)
+
+                except Exception as e:
+                    logger.debug(f"Error searching library {lib_key}: {e}")
+                    continue
+
+            logger.info(f"Found {len(appearances)} Plex appearances for {actor_name}")
+            return appearances
+
+        except Exception as e:
+            logger.error(f"Failed to search Plex by actor: {e}")
+            return []
+
+    async def batch_match_appearances(self, appearances: List[Dict], on_match=None) -> Dict[int, Dict]:
+        """
+        Match multiple appearances to Plex library items.
+
+        Args:
+            appearances: List of appearance dicts with tmdb_show_id or tmdb_movie_id
+            on_match: Optional async callback(appearance_id, match_info) called for each match
+
+        Returns:
+            Dict mapping appearance ID to Plex match info {rating_key, library_id}
+        """
+        matches = {}
+        server_info = await self.get_server_identity()
+        machine_id = server_info.get('machineIdentifier') if server_info else None
+
+        # Dedupe by TMDB ID to avoid redundant searches
+        tmdb_cache: Dict[tuple, Optional[Dict]] = {}
+        # Cache episode lookups per show
+        episode_cache: Dict[str, Dict[tuple, Optional[Dict]]] = {}
+
+        for appearance in appearances:
+            appearance_id = appearance.get('id')
+            if not appearance_id:
+                continue
+
+            # Determine media type and TMDB ID
+            tmdb_id = appearance.get('tmdb_movie_id') or appearance.get('tmdb_show_id')
+            is_movie = appearance.get('appearance_type') == 'Movie'
+            media_type = 'movie' if is_movie else 'show'
+
+            if not tmdb_id:
+                continue
+
+            cache_key = (tmdb_id, media_type)
+
+            # Check cache first
+            if cache_key in tmdb_cache:
+                plex_item = tmdb_cache[cache_key]
+            else:
+                # Rate limiting
+                await asyncio.sleep(0.1)
+
+                # Try TMDB ID first
+                plex_item = await self.search_by_tmdb_id(tmdb_id, media_type)
+
+                # Fall back to title search if no TMDB match
+                if not plex_item:
+                    title = appearance.get('movie_name') or appearance.get('show_name')
+                    year = None
+                    if appearance.get('release_date'):
+                        try:
+                            year = int(appearance['release_date'][:4])
+                        except (ValueError, TypeError):
+                            pass
+                    if title:
+                        plex_item = await self.search_by_title(title, year, media_type)
+
+                tmdb_cache[cache_key] = plex_item
+
+            if plex_item:
+                show_rating_key = plex_item.get('ratingKey')  # Always the show/movie key
+                rating_key = show_rating_key if is_movie else None  # Movies get the key, TV starts with None
+                library_id = plex_item.get('librarySectionID')
+
+                # For TV shows with season/episode data, try to match the specific episode
+                season = appearance.get('season_number')
+                episode = appearance.get('episode_number')
+                if not is_movie and season and episode:
+                    # Check episode cache first
+                    show_key = str(show_rating_key)
+                    ep_key = (season, episode)
+                    if show_key not in episode_cache:
+                        episode_cache[show_key] = {}
+
+                    if ep_key in episode_cache[show_key]:
+                        episode_item = episode_cache[show_key][ep_key]
+                    else:
+                        episode_item = await self.get_episode(show_rating_key, season, episode)
+                        episode_cache[show_key][ep_key] = episode_item
+
+                    if episode_item:
+                        rating_key = episode_item.get('ratingKey')  # Episode-specific key
+                    # If episode not found, rating_key stays None - episode not in Plex
+
+                match_info = {
+                    'plex_rating_key': rating_key,  # Episode key if found, movie key for movies, None for missing TV episodes
+                    'plex_show_rating_key': show_rating_key if not is_movie else None,  # Show key for TV (for series-level navigation)
+                    'plex_library_id': library_id,
+                    'plex_watch_url': self.get_full_watch_url(rating_key, machine_id) if (rating_key and machine_id) else (self.get_watch_url(rating_key) if rating_key else None),
+                }
+                matches[appearance_id] = match_info
+
+                # Call the on_match callback for real-time updates
+                if on_match:
+                    await on_match(appearance_id, match_info)
+
+        logger.info(f"Matched {len(matches)} of {len(appearances)} appearances to Plex library")
+        return matches
--- a/modules/podchaser_client.py
+++ b/modules/podchaser_client.py
@@ -0,0 +1,445 @@
+"""Podchaser GraphQL API client for podcast guest appearances tracking"""
+import asyncio
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional
+from web.backend.core.http_client import http_client
+from modules.universal_logger import get_logger
+
+logger = get_logger('Podchaser')
+
+class PodchaserClient:
+    """Client for interacting with the Podchaser GraphQL API"""
+
+    API_URL = "https://api.podchaser.com/graphql"
+
+    def __init__(self, api_key: str):
+        # API key is actually the access token (already exchanged from client credentials)
+        self.api_key = api_key
+        self.headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+
+    @classmethod
+    async def from_client_credentials(cls, client_id: str, client_secret: str):
+        """
+        Create a PodchaserClient by exchanging client credentials for an access token
+
+        Args:
+            client_id: Podchaser client ID
+            client_secret: Podchaser client secret
+
+        Returns:
+            PodchaserClient instance with access token
+        """
+        from web.backend.core.http_client import http_client
+
+        mutation = """
+        mutation GetToken($client_id: String!, $client_secret: String!) {
+            requestAccessToken(
+                input: {
+                    grant_type: CLIENT_CREDENTIALS
+                    client_id: $client_id
+                    client_secret: $client_secret
+                }
+            ) {
+                access_token
+            }
+        }
+        """
+
+        variables = {
+            "client_id": client_id,
+            "client_secret": client_secret
+        }
+
+        try:
+            response = await http_client.post(
+                cls.API_URL,
+                json={"query": mutation, "variables": variables},
+                headers={"Content-Type": "application/json"}
+            )
+
+            data = response.json()
+
+            if "errors" in data:
+                logger.error(f"Failed to get Podchaser access token: {data['errors']}")
+                raise Exception(f"Podchaser authentication failed: {data['errors']}")
+
+            access_token = data.get("data", {}).get("requestAccessToken", {}).get("access_token")
+
+            if not access_token:
+                raise Exception("No access token returned from Podchaser")
+
+            logger.info("Successfully obtained Podchaser access token")
+            return cls(access_token)
+
+        except Exception as e:
+            logger.error(f"Error getting Podchaser access token: {e}")
+            raise
+
+    async def _execute_query(self, query: str, variables: Optional[Dict] = None) -> Dict:
+        """Execute a GraphQL query"""
+        try:
+            payload = {"query": query}
+            if variables:
+                payload["variables"] = variables
+
+            response = await http_client.post(
+                self.API_URL,
+                json=payload,
+                headers=self.headers
+            )
+
+            data = response.json()
+
+            if "errors" in data:
+                logger.error(f"GraphQL errors: {data['errors']}")
+                return {}
+
+            return data.get("data", {})
+
+        except Exception as e:
+            logger.error(f"Podchaser API error: {e}")
+            return {}
+
+    async def search_creator_by_creators_endpoint(self, name: str) -> Optional[Dict]:
+        """
+        Search for a creator using the creators endpoint
+        This is more direct than searching via credits or podcasts
+        """
+        query = """
+        query FindCreator($term: String!) {
+          creators(searchTerm: $term, first: 10) {
+            data {
+              pcid
+              name
+              informalName
+              subtitle
+              imageUrl
+              url
+              episodeAppearanceCount
+            }
+          }
+        }
+        """
+
+        variables = {"term": name}
+        data = await self._execute_query(query, variables)
+
+        if data and "creators" in data and data["creators"]["data"]:
+            creators = data["creators"]["data"]
+            # Prefer exact case-insensitive match
+            name_lower = name.strip().lower()
+            for creator in creators:
+                if creator.get("name") and creator["name"].strip().lower() == name_lower:
+                    logger.info(f"Found exact creator match: {creator['name']} (pcid: {creator['pcid']})")
+                    return creator
+
+            # Return first result if no exact match
+            if creators:
+                logger.info(f"Found creator: {creators[0]['name']} (pcid: {creators[0]['pcid']})")
+                return creators[0]
+
+        return None
+
+    async def search_creator(self, name: str) -> Optional[Dict]:
+        """
+        Search for a creator by name using the creators endpoint
+        Returns the first matching creator or None
+        """
+        return await self.search_creator_by_creators_endpoint(name)
+
+    async def get_creator_guest_appearances(self, creator_id: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
+        """
+        Get all guest AND host appearances (episodeCredits) for a creator
+        Filters for recent and upcoming episodes
+
+        Args:
+            creator_id: Podchaser creator ID
+            days_back: How many days in the past to search
+            days_ahead: How many days in the future to search
+
+        Returns:
+            List of episode appearances with metadata (both guest and host roles)
+        """
+        today = datetime.now().date()
+        cutoff_past = today - timedelta(days=days_back)
+        cutoff_future = today + timedelta(days=days_ahead)
+
+        query = """
+        query GetCreatorAppearances($creatorId: String!, $page: Int) {
+          creator(identifier: {type: PCID, id: $creatorId}) {
+            pcid
+            name
+            episodeCredits(
+              filters: { role: ["guest", "host"] }
+              first: 20
+              page: $page
+              sort: {sortBy: DATE, direction: DESCENDING}
+            ) {
+              data {
+                role {
+                  code
+                  title
+                }
+                episode {
+                  id
+                  title
+                  description
+                  url
+                  imageUrl
+                  audioUrl
+                  airDate
+                  podcast {
+                    id
+                    title
+                    imageUrl
+                    url
+                    categories {
+                      title
+                      slug
+                    }
+                  }
+                }
+              }
+              paginatorInfo {
+                currentPage
+                hasMorePages
+                lastPage
+              }
+            }
+          }
+        }
+        """
+
+        page = 1
+        max_pages = 10  # Limit to prevent excessive API calls
+        appearances = []
+
+        while page <= max_pages:
+            variables = {
+                "creatorId": str(creator_id),
+                "page": page
+            }
+
+            data = await self._execute_query(query, variables)
+
+            if not data or "creator" not in data or not data["creator"]:
+                break
+
+            creator_data = data["creator"]
+            episode_credits = creator_data.get("episodeCredits", {}).get("data", [])
+
+            logger.info(f"Fetched {len(episode_credits)} episodes from Podchaser (page {page})")
+
+            for credit in episode_credits:
+                episode = credit.get("episode")
+                if not episode:
+                    continue
+
+                # Check air date
+                air_date_str = episode.get("airDate")
+                if not air_date_str:
+                    continue
+
+                try:
+                    # Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
+                    # Take only the date part (first 10 characters for YYYY-MM-DD)
+                    date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
+                    air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
+
+                    # Only include episodes within our time window
+                    if cutoff_past <= air_date <= cutoff_future:
+                        podcast = episode.get("podcast", {})
+
+                        role_obj = credit.get("role", {})
+                        role_name = role_obj.get("title") if isinstance(role_obj, dict) else None
+
+                        appearances.append({
+                            "podchaser_episode_id": episode.get("id"),
+                            "episode_title": episode.get("title"),
+                            "podcast_name": podcast.get("title"),
+                            "description": episode.get("description"),
+                            "air_date": air_date_str,
+                            "episode_url": episode.get("url"),
+                            "audio_url": episode.get("audioUrl"),
+                            "poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
+                            "role": role_name,
+                            "podchaser_podcast_id": podcast.get("id"),
+                        })
+                except ValueError as e:
+                    logger.debug(f"Date parse error for episode: {e}")
+                    continue
+
+            # Check if there are more pages
+            paginator = creator_data.get("episodeCredits", {}).get("paginatorInfo", {})
+            if not paginator.get("hasMorePages"):
+                break
+
+            page += 1
+            await asyncio.sleep(0.15)  # Rate limiting
+
+        logger.info(f"Returning {len(appearances)} guest/host appearances for creator {creator_id}")
+        return appearances
+
+    async def get_creator_podcast_episodes(self, creator_name: str, days_back: int = 30, days_ahead: int = 365) -> List[Dict]:
+        """
+        Get podcast episodes where the creator is a host
+        Searches for podcasts by the creator's name and returns recent episodes
+
+        Args:
+            creator_name: Creator's name to search for
+            days_back: How many days in the past to search
+            days_ahead: How many days in the future to search
+
+        Returns:
+            List of podcast episodes with metadata
+        """
+        today = datetime.now().date()
+        cutoff_past = today - timedelta(days=days_back)
+        cutoff_future = today + timedelta(days=days_ahead)
+
+        # Search for podcasts by creator name
+        query = """
+        query SearchPodcastByHost($searchTerm: String!) {
+          podcasts(searchTerm: $searchTerm, first: 5) {
+            data {
+              id
+              title
+              imageUrl
+              url
+              credits(first: 20) {
+                data {
+                  role {
+                    code
+                    title
+                  }
+                  creator {
+                    pcid
+                    name
+                  }
+                }
+              }
+              episodes(first: 50, sort: {sortBy: AIR_DATE, direction: DESCENDING}) {
+                data {
+                  id
+                  title
+                  description
+                  url
+                  imageUrl
+                  audioUrl
+                  airDate
+                }
+              }
+            }
+          }
+        }
+        """
+
+        variables = {"searchTerm": creator_name}
+        data = await self._execute_query(query, variables)
+
+        appearances = []
+
+        if data and "podcasts" in data and data["podcasts"]["data"]:
+            for podcast in data["podcasts"]["data"]:
+                # Check if the creator is a host of this podcast
+                credits = podcast.get("credits", {}).get("data", [])
+                is_host = False
+                host_role = None
+
+                for credit in credits:
+                    creator = credit.get("creator", {})
+                    role = credit.get("role", {})
+
+                    # Check if this is our creator and they're a host
+                    if (role.get("code") == "host" and
+                        creator.get("name") and
+                        (creator_name.lower() in creator["name"].lower() or
+                         creator["name"].lower() in creator_name.lower())):
+                        is_host = True
+                        host_role = role.get("title")
+                        break
+
+                if not is_host:
+                    continue
+
+                # Get episodes from this podcast
+                episodes = podcast.get("episodes", {}).get("data", [])
+
+                for episode in episodes:
+                    air_date_str = episode.get("airDate")
+                    if not air_date_str:
+                        continue
+
+                    try:
+                        # Handle both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS" formats
+                        # Take only the date part (first 10 characters for YYYY-MM-DD)
+                        date_part = air_date_str[:10] if len(air_date_str) >= 10 else air_date_str
+                        air_date = datetime.strptime(date_part, "%Y-%m-%d").date()
+
+                        # Only include episodes within our time window
+                        if cutoff_past <= air_date <= cutoff_future:
+                            appearances.append({
+                                "podchaser_episode_id": episode.get("id"),
+                                "episode_title": episode.get("title"),
+                                "podcast_name": podcast.get("title"),
+                                "description": episode.get("description"),
+                                "air_date": air_date_str,
+                                "episode_url": episode.get("url"),
+                                "audio_url": episode.get("audioUrl"),
+                                "poster_url": episode.get("imageUrl") or podcast.get("imageUrl"),
+                                "role": host_role,
+                                "podchaser_podcast_id": podcast.get("id"),
+                            })
+                    except ValueError:
+                        continue
+
+        return appearances
+
+    async def find_upcoming_podcast_appearances(self, creator_id: str, creator_name: str = None) -> List[Dict]:
+        """
+        Find upcoming podcast appearances for a creator
+        Includes both guest appearances (episodeCredits) and hosted podcast episodes
+        Returns episodes that haven't aired yet or aired within last 90 days
+
+        Args:
+            creator_id: Podchaser creator ID (pcid)
+            creator_name: Creator's name (required for podcast search)
+        """
+        # Get both guest appearances and hosted episodes
+        guest_appearances = await self.get_creator_guest_appearances(
+            creator_id,
+            days_back=365,  # Look back 1 year for recent episodes
+            days_ahead=365
+        )
+
+        # For hosted episodes, we need the creator name
+        hosted_episodes = []
+        if creator_name:
+            hosted_episodes = await self.get_creator_podcast_episodes(
+                creator_name,
+                days_back=365,  # Look back 1 year for recent episodes
+                days_ahead=365
+            )
+        else:
+            logger.warning(f"No creator name provided for {creator_id}, skipping podcast host search")
+
+        # Combine and deduplicate by episode ID
+        all_appearances = {}
+        for appearance in guest_appearances + hosted_episodes:
+            episode_id = appearance.get("podchaser_episode_id")
+            if episode_id:
+                # If duplicate, prefer the one with more info (hosted episodes usually have more)
+                if episode_id not in all_appearances or len(str(appearance.get("description", ""))) > len(str(all_appearances[episode_id].get("description", ""))):
+                    all_appearances[episode_id] = appearance
+
+        # Sort by air date
+        sorted_appearances = sorted(
+            all_appearances.values(),
+            key=lambda x: x.get("air_date", ""),
+            reverse=True
+        )
+
+        return sorted_appearances
--- a/modules/private_gallery_crypto.py
+++ b/modules/private_gallery_crypto.py
@@ -0,0 +1,873 @@
+#!/usr/bin/env python3
+"""
+Private Gallery Encryption Module
+
+Provides security features for the Private Gallery:
+- Password hashing with bcrypt
+- Key derivation with Argon2id
+- File encryption/decryption with AES-256-GCM
+- Field encryption with Fernet
+- Session token management
+"""
+
+import os
+import secrets
+import hashlib
+import base64
+import time
+from datetime import datetime, timedelta
+from typing import Optional, Dict, Tuple
+from pathlib import Path
+from threading import Lock
+
+try:
+    import bcrypt
+except ImportError:
+    bcrypt = None
+
+try:
+    from argon2 import PasswordHasher
+    from argon2.low_level import hash_secret_raw, Type
+    ARGON2_AVAILABLE = True
+except ImportError:
+    ARGON2_AVAILABLE = False
+
+try:
+    from cryptography.fernet import Fernet
+    from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+    from cryptography.hazmat.primitives import hashes
+    from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
+    CRYPTO_AVAILABLE = True
+except ImportError:
+    CRYPTO_AVAILABLE = False
+
+from modules.universal_logger import get_logger
+
+logger = get_logger('PrivateGalleryCrypto')
+
+
+class PrivateGalleryCrypto:
+    """
+    Handles all encryption operations for the Private Gallery.
+
+    Security features:
+    - Passwords hashed with bcrypt (cost factor 12)
+    - Encryption key derived from password using Argon2id
+    - Files encrypted with AES-256-GCM
+    - Database fields encrypted with Fernet (AES-128-CBC + HMAC)
+    - Session tokens with configurable timeout
+    """
+
+    # Argon2id parameters (OWASP recommended)
+    ARGON2_TIME_COST = 3
+    ARGON2_MEMORY_COST = 65536  # 64 MiB
+    ARGON2_PARALLELISM = 4
+    ARGON2_HASH_LENGTH = 32  # 256 bits for AES-256
+
+    # AES-GCM parameters
+    AES_KEY_SIZE = 32  # 256 bits
+    AES_NONCE_SIZE = 12  # 96 bits (GCM recommended)
+    AES_TAG_SIZE = 16  # 128 bits
+
+    # Encryption chunk size for streaming large files
+    CHUNK_SIZE = 8 * 1024 * 1024  # 8 MB chunks
+    CHUNKED_THRESHOLD = 50 * 1024 * 1024  # Use chunked encryption for files > 50 MB
+    CHUNKED_MAGIC = b'\x01PGCE'  # Magic bytes: version 1, Private Gallery Chunked Encryption
+
+    def __init__(self):
+        self._sessions: Dict[str, Dict] = {}  # token -> {expiry, username}
+        self._session_lock = Lock()
+        self._derived_key: Optional[bytes] = None
+        self._fernet: Optional[Fernet] = None
+        self._aesgcm: Optional[AESGCM] = None
+
+        # Check dependencies
+        if not bcrypt:
+            logger.warning("bcrypt not available - password hashing will use fallback")
+        if not ARGON2_AVAILABLE:
+            logger.warning("argon2-cffi not available - key derivation will use PBKDF2")
+        if not CRYPTO_AVAILABLE:
+            raise ImportError("cryptography library required for Private Gallery")
+
+    # =========================================================================
+    # PASSWORD HASHING (bcrypt)
+    # =========================================================================
+
+    def hash_password(self, password: str) -> str:
+        """
+        Hash a password using bcrypt with cost factor 12.
+
+        Args:
+            password: Plain text password
+
+        Returns:
+            bcrypt hash string (includes salt)
+        """
+        if bcrypt:
+            salt = bcrypt.gensalt(rounds=12)
+            hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
+            return hashed.decode('utf-8')
+        else:
+            # Fallback to PBKDF2 if bcrypt not available
+            salt = secrets.token_bytes(16)
+            kdf = PBKDF2HMAC(
+                algorithm=hashes.SHA256(),
+                length=32,
+                salt=salt,
+                iterations=600000,
+            )
+            key = kdf.derive(password.encode('utf-8'))
+            return f"pbkdf2${base64.b64encode(salt).decode()}${base64.b64encode(key).decode()}"
+
+    def verify_password(self, password: str, password_hash: str) -> bool:
+        """
+        Verify a password against its hash.
+
+        Args:
+            password: Plain text password to check
+            password_hash: Stored hash to verify against
+
+        Returns:
+            True if password matches
+        """
+        try:
+            if password_hash.startswith('pbkdf2$'):
+                # PBKDF2 fallback hash
+                parts = password_hash.split('$')
+                if len(parts) != 3:
+                    return False
+                salt = base64.b64decode(parts[1])
+                stored_key = base64.b64decode(parts[2])
+                kdf = PBKDF2HMAC(
+                    algorithm=hashes.SHA256(),
+                    length=32,
+                    salt=salt,
+                    iterations=600000,
+                )
+                try:
+                    kdf.verify(password.encode('utf-8'), stored_key)
+                    return True
+                except Exception:
+                    return False
+            elif bcrypt:
+                return bcrypt.checkpw(
+                    password.encode('utf-8'),
+                    password_hash.encode('utf-8')
+                )
+            else:
+                return False
+        except Exception as e:
+            logger.error(f"Password verification failed: {e}")
+            return False
+
+    # =========================================================================
+    # KEY DERIVATION (Argon2id or PBKDF2)
+    # =========================================================================
+
+    def derive_key(self, password: str, salt: bytes) -> bytes:
+        """
+        Derive an encryption key from password using Argon2id.
+
+        Args:
+            password: User's password
+            salt: Random salt (should be stored)
+
+        Returns:
+            32-byte derived key for AES-256
+        """
+        if ARGON2_AVAILABLE:
+            key = hash_secret_raw(
+                secret=password.encode('utf-8'),
+                salt=salt,
+                time_cost=self.ARGON2_TIME_COST,
+                memory_cost=self.ARGON2_MEMORY_COST,
+                parallelism=self.ARGON2_PARALLELISM,
+                hash_len=self.ARGON2_HASH_LENGTH,
+                type=Type.ID  # Argon2id
+            )
+            return key
+        else:
+            # Fallback to PBKDF2 with high iterations
+            kdf = PBKDF2HMAC(
+                algorithm=hashes.SHA256(),
+                length=self.AES_KEY_SIZE,
+                salt=salt,
+                iterations=600000,  # OWASP recommended minimum
+            )
+            return kdf.derive(password.encode('utf-8'))
+
+    def generate_salt(self) -> bytes:
+        """Generate a cryptographically secure random salt."""
+        return secrets.token_bytes(16)
+
+    def initialize_encryption(self, password: str, salt: bytes) -> None:
+        """
+        Initialize encryption with derived key.
+        Must be called after successful unlock.
+
+        Args:
+            password: User's password
+            salt: Stored salt for key derivation
+        """
+        self._derived_key = self.derive_key(password, salt)
+
+        # Initialize Fernet for field encryption
+        # Fernet requires a 32-byte key, base64-encoded
+        fernet_key = base64.urlsafe_b64encode(self._derived_key)
+        self._fernet = Fernet(fernet_key)
+
+        # Initialize AES-GCM for file encryption
+        self._aesgcm = AESGCM(self._derived_key)
+
+        logger.info("Encryption initialized successfully")
+
+    def clear_encryption(self) -> None:
+        """Clear encryption keys from memory (on lock)."""
+        self._derived_key = None
+        self._fernet = None
+        self._aesgcm = None
+        logger.info("Encryption keys cleared")
+
+    def is_initialized(self) -> bool:
+        """Check if encryption is initialized (unlocked)."""
+        return self._derived_key is not None
+
+    # =========================================================================
+    # FIELD ENCRYPTION (Fernet - for database fields)
+    # =========================================================================
+
+    def encrypt_field(self, plaintext: str) -> str:
+        """
+        Encrypt a database field value.
+
+        Args:
+            plaintext: Plain text to encrypt
+
+        Returns:
+            Base64-encoded encrypted string
+        """
+        if not self._fernet:
+            raise RuntimeError("Encryption not initialized - call initialize_encryption first")
+
+        if not plaintext:
+            return ""
+
+        encrypted = self._fernet.encrypt(plaintext.encode('utf-8'))
+        return base64.urlsafe_b64encode(encrypted).decode('utf-8')
+
+    def decrypt_field(self, ciphertext: str) -> str:
+        """
+        Decrypt a database field value.
+
+        Args:
+            ciphertext: Base64-encoded encrypted string
+
+        Returns:
+            Decrypted plain text
+        """
+        if not self._fernet:
+            raise RuntimeError("Encryption not initialized - call initialize_encryption first")
+
+        if not ciphertext:
+            return ""
+
+        try:
+            encrypted = base64.urlsafe_b64decode(ciphertext.encode('utf-8'))
+            decrypted = self._fernet.decrypt(encrypted)
+            return decrypted.decode('utf-8')
+        except Exception as e:
+            logger.error(f"Field decryption failed: {e}")
+            return "[Decryption Error]"
+
+    # =========================================================================
+    # FILE ENCRYPTION (AES-256-GCM)
+    # =========================================================================
+
+    def encrypt_file(self, input_path: Path, output_path: Path) -> bool:
+        """
+        Encrypt a file using AES-256-GCM.
+
+        Small files (<=50MB): single-shot format
+            [12-byte nonce][encrypted data + 16-byte tag]
+
+        Large files (>50MB): chunked format for memory efficiency
+            [5-byte magic 0x01PGCE][4-byte chunk_size BE]
+            [12-byte nonce][encrypted chunk + 16-byte tag] (repeated)
+
+        Args:
+            input_path: Path to plaintext file
+            output_path: Path for encrypted output
+
+        Returns:
+            True if successful
+        """
+        if not self._aesgcm:
+            raise RuntimeError("Encryption not initialized")
+
+        try:
+            file_size = input_path.stat().st_size
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            if file_size <= self.CHUNKED_THRESHOLD:
+                # Small file: single-shot encryption (backward compatible)
+                nonce = secrets.token_bytes(self.AES_NONCE_SIZE)
+                with open(input_path, 'rb') as f:
+                    plaintext = f.read()
+                ciphertext = self._aesgcm.encrypt(nonce, plaintext, None)
+                with open(output_path, 'wb') as f:
+                    f.write(nonce)
+                    f.write(ciphertext)
+            else:
+                # Large file: chunked encryption
+                import struct
+                with open(input_path, 'rb') as fin, open(output_path, 'wb') as fout:
+                    # Write header
+                    fout.write(self.CHUNKED_MAGIC)
+                    fout.write(struct.pack('>I', self.CHUNK_SIZE))
+
+                    # Encrypt in chunks
+                    while True:
+                        chunk = fin.read(self.CHUNK_SIZE)
+                        if not chunk:
+                            break
+                        nonce = secrets.token_bytes(self.AES_NONCE_SIZE)
+                        encrypted_chunk = self._aesgcm.encrypt(nonce, chunk, None)
+                        # Write chunk: nonce + encrypted data (includes GCM tag)
+                        fout.write(nonce)
+                        fout.write(struct.pack('>I', len(encrypted_chunk)))
+                        fout.write(encrypted_chunk)
+
+            return True
+
+        except Exception as e:
+            logger.error(f"File encryption failed: {e}")
+            # Clean up partial output
+            if output_path.exists():
+                try:
+                    output_path.unlink()
+                except Exception:
+                    pass
+            return False
+
+    def _is_chunked_format(self, input_path: Path) -> bool:
+        """Check if an encrypted file uses the chunked format."""
+        try:
+            with open(input_path, 'rb') as f:
+                magic = f.read(len(self.CHUNKED_MAGIC))
+                return magic == self.CHUNKED_MAGIC
+        except Exception:
+            return False
+
+    def decrypt_file(self, input_path: Path, output_path: Optional[Path] = None) -> Optional[bytes]:
+        """
+        Decrypt a file encrypted with AES-256-GCM.
+        Handles both single-shot and chunked formats.
+
+        Args:
+            input_path: Path to encrypted file
+            output_path: Optional path to write decrypted file
+
+        Returns:
+            Decrypted bytes if output_path is None, else None on success
+        """
+        if not self._aesgcm:
+            raise RuntimeError("Encryption not initialized")
+
+        try:
+            if self._is_chunked_format(input_path):
+                return self._decrypt_file_chunked(input_path, output_path)
+
+            # Single-shot format: [nonce][ciphertext+tag]
+            with open(input_path, 'rb') as f:
+                nonce = f.read(self.AES_NONCE_SIZE)
+                if len(nonce) != self.AES_NONCE_SIZE:
+                    raise ValueError("Invalid encrypted file: missing nonce")
+                ciphertext = f.read()
+
+            plaintext = self._aesgcm.decrypt(nonce, ciphertext, None)
+
+            if output_path:
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                with open(output_path, 'wb') as f:
+                    f.write(plaintext)
+                return None
+
+            return plaintext
+
+        except Exception as e:
+            logger.error(f"File decryption failed: {e}")
+            return None
+
+    def _decrypt_file_chunked(self, input_path: Path, output_path: Optional[Path] = None) -> Optional[bytes]:
+        """Decrypt a chunked-format encrypted file."""
+        import struct
+
+        try:
+            parts = [] if output_path is None else None
+
+            with open(input_path, 'rb') as fin:
+                # Read header
+                magic = fin.read(len(self.CHUNKED_MAGIC))
+                if magic != self.CHUNKED_MAGIC:
+                    raise ValueError("Invalid chunked file header")
+                chunk_size_bytes = fin.read(4)
+                # chunk_size from header (informational, actual sizes are per-chunk)
+                struct.unpack('>I', chunk_size_bytes)
+
+                fout = None
+                if output_path:
+                    output_path.parent.mkdir(parents=True, exist_ok=True)
+                    fout = open(output_path, 'wb')
+
+                try:
+                    while True:
+                        # Read chunk: [12-byte nonce][4-byte encrypted_len][encrypted data]
+                        nonce = fin.read(self.AES_NONCE_SIZE)
+                        if len(nonce) == 0:
+                            break  # EOF
+                        if len(nonce) != self.AES_NONCE_SIZE:
+                            raise ValueError("Truncated chunk nonce")
+
+                        enc_len_bytes = fin.read(4)
+                        if len(enc_len_bytes) != 4:
+                            raise ValueError("Truncated chunk length")
+                        enc_len = struct.unpack('>I', enc_len_bytes)[0]
+
+                        encrypted_chunk = fin.read(enc_len)
+                        if len(encrypted_chunk) != enc_len:
+                            raise ValueError("Truncated chunk data")
+
+                        decrypted_chunk = self._aesgcm.decrypt(nonce, encrypted_chunk, None)
+
+                        if fout:
+                            fout.write(decrypted_chunk)
+                        else:
+                            parts.append(decrypted_chunk)
+                finally:
+                    if fout:
+                        fout.close()
+
+            if output_path:
+                return None
+            return b''.join(parts)
+
+        except Exception as e:
+            logger.error(f"Chunked file decryption failed for {input_path}: {type(e).__name__}: {e}")
+            return None
+
+    def re_encrypt_to_chunked(self, file_path: Path) -> bool:
+        """
+        Re-encrypt a single-shot encrypted file to chunked format in-place.
+        Decrypts and re-encrypts in chunks to avoid loading the entire file into memory.
+
+        Args:
+            file_path: Path to the single-shot encrypted file
+
+        Returns:
+            True if successful, False if already chunked or on error
+        """
+        if not self._aesgcm:
+            raise RuntimeError("Encryption not initialized")
+
+        if self._is_chunked_format(file_path):
+            return False  # Already chunked
+
+        import struct
+        temp_path = file_path.with_suffix(f'.enc.{secrets.token_hex(4)}.tmp')
+
+        try:
+            # Decrypt the single-shot file fully (required by AES-GCM)
+            with open(file_path, 'rb') as f:
+                nonce = f.read(self.AES_NONCE_SIZE)
+                if len(nonce) != self.AES_NONCE_SIZE:
+                    raise ValueError("Invalid encrypted file")
+                ciphertext = f.read()
+
+            plaintext = self._aesgcm.decrypt(nonce, ciphertext, None)
+            del ciphertext  # Free memory
+
+            # Write chunked format to temp file
+            with open(temp_path, 'wb') as fout:
+                fout.write(self.CHUNKED_MAGIC)
+                fout.write(struct.pack('>I', self.CHUNK_SIZE))
+
+                offset = 0
+                while offset < len(plaintext):
+                    chunk = plaintext[offset:offset + self.CHUNK_SIZE]
+                    offset += len(chunk)
+                    chunk_nonce = secrets.token_bytes(self.AES_NONCE_SIZE)
+                    encrypted_chunk = self._aesgcm.encrypt(chunk_nonce, chunk, None)
+                    fout.write(chunk_nonce)
+                    fout.write(struct.pack('>I', len(encrypted_chunk)))
+                    fout.write(encrypted_chunk)
+
+            del plaintext  # Free memory
+
+            # Atomic replace
+            temp_path.replace(file_path)
+            return True
+
+        except Exception as e:
+            logger.error(f"Re-encryption to chunked failed for {file_path}: {e}")
+            if temp_path.exists():
+                try:
+                    temp_path.unlink()
+                except Exception:
+                    pass
+            return False
+
+    def decrypt_file_streaming(self, input_path: Path) -> Optional[bytes]:
+        """
+        Decrypt a file and return bytes for streaming.
+        Only suitable for small files (single-shot format, ≤50MB).
+        For large chunked files, use decrypt_file_generator() instead.
+
+        Args:
+            input_path: Path to encrypted file
+
+        Returns:
+            Decrypted bytes or None on error
+        """
+        return self.decrypt_file(input_path, output_path=None)
+
+    def decrypt_file_generator(self, input_path: Path):
+        """
+        Generator that yields decrypted chunks for streaming large files.
+        For chunked files, yields one decrypted chunk at a time (~8MB each).
+        For single-shot files, yields the entire content at once.
+
+        Args:
+            input_path: Path to encrypted file
+
+        Yields:
+            bytes: Decrypted data chunks
+        """
+        import struct
+
+        if not self._aesgcm:
+            raise RuntimeError("Encryption not initialized")
+
+        if self._is_chunked_format(input_path):
+            with open(input_path, 'rb') as fin:
+                # Skip header
+                fin.read(len(self.CHUNKED_MAGIC))
+                fin.read(4)
+
+                while True:
+                    nonce = fin.read(self.AES_NONCE_SIZE)
+                    if len(nonce) == 0:
+                        break
+                    if len(nonce) != self.AES_NONCE_SIZE:
+                        raise ValueError("Truncated chunk nonce")
+
+                    enc_len_bytes = fin.read(4)
+                    if len(enc_len_bytes) != 4:
+                        raise ValueError("Truncated chunk length")
+                    enc_len = struct.unpack('>I', enc_len_bytes)[0]
+
+                    encrypted_chunk = fin.read(enc_len)
+                    if len(encrypted_chunk) != enc_len:
+                        raise ValueError("Truncated chunk data")
+
+                    yield self._aesgcm.decrypt(nonce, encrypted_chunk, None)
+        else:
+            # Single-shot: yield everything at once (≤50MB)
+            with open(input_path, 'rb') as f:
+                nonce = f.read(self.AES_NONCE_SIZE)
+                if len(nonce) != self.AES_NONCE_SIZE:
+                    raise ValueError("Invalid encrypted file: missing nonce")
+                ciphertext = f.read()
+            yield self._aesgcm.decrypt(nonce, ciphertext, None)
+
+    def decrypt_file_range_generator(self, input_path: Path, start: int, end: int):
+        """
+        Generator that yields only the decrypted bytes for a specific byte range.
+        For chunked files, only decrypts the necessary chunks and slices them.
+        For single-shot files, decrypts all and slices.
+
+        Args:
+            input_path: Path to encrypted file
+            start: Start byte offset (inclusive)
+            end: End byte offset (inclusive)
+
+        Yields:
+            bytes: Decrypted data for the requested range
+        """
+        import struct
+
+        if not self._aesgcm:
+            raise RuntimeError("Encryption not initialized")
+
+        if not self._is_chunked_format(input_path):
+            # Single-shot: decrypt all and slice (file is ≤50MB)
+            with open(input_path, 'rb') as f:
+                nonce = f.read(self.AES_NONCE_SIZE)
+                ciphertext = f.read()
+            plaintext = self._aesgcm.decrypt(nonce, ciphertext, None)
+            yield plaintext[start:end + 1]
+            return
+
+        chunk_size = self.CHUNK_SIZE
+        first_chunk = start // chunk_size
+        last_chunk = end // chunk_size
+
+        # Header: 5 magic + 4 chunk_size = 9 bytes
+        header_size = len(self.CHUNKED_MAGIC) + 4
+        # Each full encrypted chunk: 12 nonce + 4 length + (chunk_size + 16 tag)
+        enc_chunk_stride = self.AES_NONCE_SIZE + 4 + chunk_size + self.AES_TAG_SIZE
+
+        with open(input_path, 'rb') as fin:
+            for chunk_idx in range(first_chunk, last_chunk + 1):
+                # Seek to this chunk's position in the encrypted file
+                fin.seek(header_size + chunk_idx * enc_chunk_stride)
+
+                nonce = fin.read(self.AES_NONCE_SIZE)
+                if len(nonce) == 0:
+                    break
+                if len(nonce) != self.AES_NONCE_SIZE:
+                    raise ValueError("Truncated chunk nonce")
+
+                enc_len_bytes = fin.read(4)
+                if len(enc_len_bytes) != 4:
+                    raise ValueError("Truncated chunk length")
+                enc_len = struct.unpack('>I', enc_len_bytes)[0]
+
+                encrypted_chunk = fin.read(enc_len)
+                if len(encrypted_chunk) != enc_len:
+                    raise ValueError("Truncated chunk data")
+
+                decrypted_chunk = self._aesgcm.decrypt(nonce, encrypted_chunk, None)
+
+                # Calculate which part of this chunk we need
+                chunk_start_byte = chunk_idx * chunk_size
+                slice_start = max(start - chunk_start_byte, 0)
+                slice_end = min(end - chunk_start_byte + 1, len(decrypted_chunk))
+
+                yield decrypted_chunk[slice_start:slice_end]
+
+    # =========================================================================
+    # SESSION MANAGEMENT
+    # =========================================================================
+
+    def create_session(self, username: str = "user", timeout_minutes: int = 30) -> str:
+        """
+        Create a new session token.
+
+        Args:
+            username: Username for the session
+            timeout_minutes: Session timeout in minutes
+
+        Returns:
+            Session token string
+        """
+        token = secrets.token_urlsafe(32)
+        expiry = datetime.now() + timedelta(minutes=timeout_minutes)
+
+        with self._session_lock:
+            self._sessions[token] = {
+                'expiry': expiry,
+                'username': username,
+                'created_at': datetime.now()
+            }
+
+        logger.info(f"Created session for {username}, expires in {timeout_minutes} minutes")
+        return token
+
+    def verify_session(self, token: str) -> Optional[Dict]:
+        """
+        Verify a session token is valid and not expired.
+
+        Args:
+            token: Session token to verify
+
+        Returns:
+            Session info dict if valid, None otherwise
+        """
+        with self._session_lock:
+            session = self._sessions.get(token)
+
+            if not session:
+                return None
+
+            if datetime.now() > session['expiry']:
+                # Expired - remove it
+                del self._sessions[token]
+                return None
+
+            return session
+
+    def refresh_session(self, token: str, timeout_minutes: int = 30) -> bool:
+        """
+        Refresh a session's expiry time.
+
+        Args:
+            token: Session token to refresh
+            timeout_minutes: New timeout in minutes
+
+        Returns:
+            True if refreshed, False if token invalid
+        """
+        with self._session_lock:
+            session = self._sessions.get(token)
+
+            if not session:
+                return False
+
+            if datetime.now() > session['expiry']:
+                del self._sessions[token]
+                return False
+
+            session['expiry'] = datetime.now() + timedelta(minutes=timeout_minutes)
+            return True
+
+    def invalidate_session(self, token: str) -> bool:
+        """
+        Invalidate a session token (logout/lock).
+
+        Args:
+            token: Session token to invalidate
+
+        Returns:
+            True if invalidated, False if not found
+        """
+        with self._session_lock:
+            if token in self._sessions:
+                del self._sessions[token]
+                return True
+            return False
+
+    def invalidate_all_sessions(self) -> int:
+        """
+        Invalidate all sessions (master lock).
+
+        Returns:
+            Number of sessions invalidated
+        """
+        with self._session_lock:
+            count = len(self._sessions)
+            self._sessions.clear()
+            return count
+
+    def cleanup_expired_sessions(self) -> int:
+        """
+        Remove all expired sessions.
+
+        Returns:
+            Number of sessions removed
+        """
+        with self._session_lock:
+            now = datetime.now()
+            expired = [t for t, s in self._sessions.items() if now > s['expiry']]
+            for token in expired:
+                del self._sessions[token]
+            return len(expired)
+
+    def get_active_session_count(self) -> int:
+        """Get count of active (non-expired) sessions."""
+        self.cleanup_expired_sessions()
+        return len(self._sessions)
+
+
+# Global instance
+_crypto_instance: Optional[PrivateGalleryCrypto] = None
+_crypto_lock = Lock()
+
+
+def get_private_gallery_crypto() -> PrivateGalleryCrypto:
+    """Get or create the global crypto instance."""
+    global _crypto_instance
+
+    with _crypto_lock:
+        if _crypto_instance is None:
+            _crypto_instance = PrivateGalleryCrypto()
+        return _crypto_instance
+
+
+def export_key_to_file(path: str) -> bool:
+    """
+    Save the current derived key from the global crypto instance to a file.
+    The file is written with mode 0600 for security.
+
+    Args:
+        path: File path to write the key material to
+
+    Returns:
+        True if successful
+    """
+    import json as _json
+
+    crypto = get_private_gallery_crypto()
+    if not crypto.is_initialized() or crypto._derived_key is None:
+        logger.warning("Cannot export key: encryption not initialized")
+        return False
+
+    try:
+        key_data = {
+            'derived_key': base64.b64encode(crypto._derived_key).decode('utf-8')
+        }
+        key_path = Path(path)
+        key_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Write atomically via temp file
+        tmp_path = key_path.with_suffix('.tmp')
+        with open(tmp_path, 'w') as f:
+            _json.dump(key_data, f)
+        os.chmod(str(tmp_path), 0o600)
+        tmp_path.replace(key_path)
+
+        logger.info(f"Exported encryption key to {path}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to export key to {path}: {e}")
+        return False
+
+
+def load_key_from_file(path: str) -> Optional[PrivateGalleryCrypto]:
+    """
+    Load a derived key from a file and return an initialized crypto instance.
+
+    Args:
+        path: File path containing the key material
+
+    Returns:
+        Initialized PrivateGalleryCrypto instance, or None if unavailable
+    """
+    import json as _json
+
+    key_path = Path(path)
+    if not key_path.exists():
+        return None
+
+    try:
+        with open(key_path, 'r') as f:
+            key_data = _json.load(f)
+
+        derived_key = base64.b64decode(key_data['derived_key'])
+
+        crypto = PrivateGalleryCrypto()
+        crypto._derived_key = derived_key
+
+        # Initialize Fernet for field encryption
+        fernet_key = base64.urlsafe_b64encode(derived_key)
+        crypto._fernet = Fernet(fernet_key)
+
+        # Initialize AES-GCM for file encryption
+        crypto._aesgcm = AESGCM(derived_key)
+
+        return crypto
+    except Exception as e:
+        logger.error(f"Failed to load key from {path}: {e}")
+        return None
+
+
+def delete_key_file(path: str) -> bool:
+    """Delete the key file if it exists."""
+    try:
+        key_path = Path(path)
+        if key_path.exists():
+            key_path.unlink()
+            logger.info(f"Deleted key file {path}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to delete key file {path}: {e}")
+        return False
--- a/modules/pushover_notifier.py
+++ b/modules/pushover_notifier.py
@@ -0,0 +1,961 @@
+#!/usr/bin/env python3
+"""
+Pushover Notification Module
+Sends professional push notifications when new media is downloaded
+"""
+
+import os
+import requests
+from datetime import datetime
+from typing import Dict, Optional, Any
+from pathlib import Path
+from modules.universal_logger import get_logger
+
+logger = get_logger('Notifier')
+
+
+class PushoverNotifier:
+    """Handles Pushover push notifications for media downloads"""
+
+    # Pushover API endpoint
+    API_URL = "https://api.pushover.net/1/messages.json"
+
+    # Plural forms for proper grammar
+    PLURALS = {
+        'story': 'stories',
+        'video': 'videos',
+        'photo': 'photos',
+        'image': 'images',
+        'reel': 'reels',
+        'post': 'posts',
+        'thread': 'threads',
+        'item': 'items',
+        'media': 'media',  # Already plural (singular: medium)
+        'tagged': 'tagged',  # "Tagged" doesn't change in plural (7 Tagged Photos)
+        'audio': 'audio',  # Uncountable (3 Audio Downloaded)
+    }
+
+    # Priority levels
+    PRIORITY_LOW = -2
+    PRIORITY_NORMAL = -1
+    PRIORITY_DEFAULT = 0
+    PRIORITY_HIGH = 1
+    PRIORITY_EMERGENCY = 2
+
+    # Platform emoji/icons for better visual appeal
+    PLATFORM_ICONS = {
+        'instagram': '📸',
+        'fastdl': '📸',
+        'imginn': '📸',
+        'toolzu': '📸',
+        'tiktok': '🎵',
+        'forums': '💬',
+        'snapchat': '👻',
+        'youtube': '▶️',
+        'twitter': '🐦',
+        'easynews': '📰',
+    }
+
+    # Platform name mapping (service name -> user-friendly platform name)
+    PLATFORM_NAMES = {
+        'fastdl': 'Instagram',
+        'imginn': 'Instagram',
+        'toolzu': 'Instagram',
+        'instagram': 'Instagram',
+        'tiktok': 'TikTok',
+        'snapchat': 'Snapchat',
+        'forums': 'Forum',
+        'easynews': 'Easynews',
+    }
+
+    # Content type icons
+    CONTENT_ICONS = {
+        'post': '🖼️',
+        'story': '⭐',
+        'reel': '🎬',
+        'video': '🎥',
+        'image': '🖼️',
+        'thread': '🧵',
+        'photo': '📷',
+        'audio': '🎵',
+    }
+
+    def __init__(self, user_key: str, api_token: str, enabled: bool = True,
+                 default_priority: int = 0, device: str = None, include_image: bool = True,
+                 unified_db=None, enable_review_queue_notifications: bool = True):
+        """
+        Initialize Pushover notifier
+
+        Args:
+            user_key: Your Pushover user key
+            api_token: Your Pushover application API token
+            enabled: Whether notifications are enabled
+            default_priority: Default notification priority (-2 to 2)
+            device: Specific device name to send to (optional)
+            include_image: Whether to include image thumbnails in notifications (default: True)
+            unified_db: UnifiedDatabase instance for recording notifications (optional)
+            enable_review_queue_notifications: Whether to send push notifications for review queue items (default: True)
+        """
+        self.user_key = user_key
+        self.api_token = api_token
+        self.enabled = enabled
+        self.default_priority = default_priority
+        self.device = device
+        self.include_image = include_image
+        self.unified_db = unified_db
+        self.enable_review_queue_notifications = enable_review_queue_notifications
+        self.stats = {
+            'sent': 0,
+            'failed': 0,
+            'skipped': 0
+        }
+        # Tracking for database recording
+        self._current_notification_context = None
+
+    def _record_notification(self, title: str, message: str, priority: int, status: str, response_data: dict, image_path: str = None):
+        """Record notification to database
+
+        Args:
+            title: Notification title
+            message: Notification message
+            priority: Priority level
+            status: Status ('sent' or 'failed')
+            response_data: Response from Pushover API
+            image_path: Optional path to thumbnail image
+        """
+        if not self.unified_db:
+            logger.debug("[Pushover] No database connection available for recording notification")
+            return
+
+        if not self._current_notification_context:
+            logger.debug("[Pushover] No notification context available for recording")
+            return
+
+        try:
+            import json
+
+            context = self._current_notification_context
+
+            # Add image path to metadata if provided
+            metadata = context.get('metadata', {}) or {}
+            if image_path:
+                metadata['image_path'] = str(image_path)
+
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                cursor.execute("""
+                    INSERT INTO notifications (
+                        platform, source, content_type, message, title,
+                        priority, download_count, sent_at, status, response_data, metadata
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), ?, ?, ?)
+                """, (
+                    context.get('platform'),
+                    context.get('source'),
+                    context.get('content_type'),
+                    message,
+                    title,
+                    priority,
+                    context.get('download_count', 1),
+                    status,
+                    json.dumps(response_data) if response_data else None,
+                    json.dumps(metadata) if metadata else None
+                ))
+
+                conn.commit()
+                logger.info(f"[Pushover] Recorded notification to database: {title} (status: {status})")
+
+                # Broadcast to frontend for real-time toast notification
+                try:
+                    from web.backend.api import manager
+                    if manager and manager.active_connections:
+                        manager.broadcast_sync({
+                            'type': 'notification_created',
+                            'notification': {
+                                'title': title,
+                                'message': message,
+                                'platform': context.get('platform'),
+                                'source': context.get('source'),
+                                'content_type': context.get('content_type'),
+                                'download_count': context.get('download_count', 1),
+                                'status': status,
+                            }
+                        })
+                except Exception:
+                    # Fail silently - API may not be running or manager not available
+                    pass
+
+                # Clear context after recording to prevent stale data on subsequent notifications
+                self._current_notification_context = None
+        except Exception as e:
+            logger.warning(f"[Pushover] Failed to record notification to database: {e}")
+            import traceback
+            logger.warning(f"[Pushover] Traceback: {traceback.format_exc()}")
+
+    def _get_platform_display_name(self, platform: str, source: str = None) -> str:
+        """
+        Convert service name to user-friendly platform name
+
+        Args:
+            platform: Service/platform name (fastdl, imginn, toolzu, etc.)
+            source: Source/username (for forums, this is the forum name)
+
+        Returns:
+            User-friendly platform name (Instagram, TikTok, etc.)
+        """
+        platform_lower = platform.lower()
+
+        # For forums, use the forum name (source) as the platform display name
+        if platform_lower == 'forums' and source:
+            return source.title()
+
+        return self.PLATFORM_NAMES.get(platform_lower, platform.title())
+
+    def _pluralize(self, word: str, count: int) -> str:
+        """
+        Get the correct plural form of a word
+
+        Args:
+            word: Singular word
+            count: Count to determine if plural needed
+
+        Returns:
+            Singular or plural form
+        """
+        # Handle None or empty word
+        if not word:
+            return "items" if count != 1 else "item"
+
+        if count == 1:
+            return word
+
+        # Check if we have a custom plural
+        word_lower = word.lower()
+        if word_lower in self.PLURALS:
+            return self.PLURALS[word_lower].title() if word[0].isupper() else self.PLURALS[word_lower]
+
+        # Check if word is already a plural form (value in PLURALS)
+        if word_lower in self.PLURALS.values():
+            return word  # Already plural, return as-is
+
+        # Default: just add 's' (but not if already ends with 's')
+        if word_lower.endswith('s'):
+            return word
+        return f"{word}s"
+
+    def _extract_random_video_frame(self, video_path: str) -> str:
+        """
+        Extract a random frame from a video file
+
+        Args:
+            video_path: Path to the video file
+
+        Returns:
+            Path to extracted frame (temp file) or None if extraction failed
+        """
+        import subprocess
+        import random
+        import tempfile
+
+        try:
+            # Get video duration using ffprobe
+            ffprobe_cmd = [
+                'ffprobe',
+                '-v', 'error',
+                '-show_entries', 'format=duration',
+                '-of', 'default=noprint_wrappers=1:nokey=1',
+                video_path
+            ]
+
+            result = subprocess.run(
+                ffprobe_cmd,
+                capture_output=True,
+                text=True,
+                timeout=10
+            )
+
+            if result.returncode != 0:
+                logger.warning(f"[Pushover] ffprobe failed to get video duration: {result.stderr[:200]}")
+                return None
+
+            duration = float(result.stdout.strip())
+
+            # Skip first and last 10% to avoid black frames
+            start_offset = duration * 0.1
+            end_offset = duration * 0.9
+
+            if end_offset <= start_offset:
+                # Video too short, just use middle
+                timestamp = duration / 2
+            else:
+                # Pick random timestamp in the middle 80%
+                timestamp = random.uniform(start_offset, end_offset)
+
+            logger.debug(f"[Pushover] Video duration: {duration:.2f}s, extracting frame at {timestamp:.2f}s")
+
+            # Create temp file for the frame
+            temp_fd, temp_path = tempfile.mkstemp(suffix='.jpg', prefix='pushover_frame_')
+            os.close(temp_fd)  # Close the file descriptor, ffmpeg will write to it
+            success = False
+
+            try:
+                # Extract frame using ffmpeg
+                ffmpeg_cmd = [
+                    'ffmpeg',
+                    '-ss', str(timestamp),  # Seek to timestamp
+                    '-i', video_path,       # Input file
+                    '-vframes', '1',        # Extract 1 frame
+                    '-q:v', '2',            # High quality
+                    '-y',                   # Overwrite output
+                    temp_path
+                ]
+
+                result = subprocess.run(
+                    ffmpeg_cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=30
+                )
+
+                if result.returncode != 0:
+                    logger.debug(f"[Pushover] ffmpeg failed: {result.stderr}")
+                    return None
+
+                # Verify the frame was created
+                if Path(temp_path).exists() and Path(temp_path).stat().st_size > 0:
+                    success = True
+                    return temp_path
+                else:
+                    logger.debug("[Pushover] Frame extraction produced empty file")
+                    return None
+
+            except subprocess.TimeoutExpired:
+                logger.debug("[Pushover] Video frame extraction timed out")
+                return None
+            finally:
+                # Clean up temp file if extraction failed
+                if not success:
+                    try:
+                        Path(temp_path).unlink(missing_ok=True)
+                    except OSError:
+                        pass
+
+        except Exception as e:
+            logger.debug(f"[Pushover] Error extracting video frame: {e}")
+            return None
+
+    def send_notification(self,
+                         title: str,
+                         message: str,
+                         priority: int = None,
+                         url: str = None,
+                         url_title: str = None,
+                         sound: str = None,
+                         device: str = None,
+                         html: bool = False,
+                         image_path: str = None,
+                         max_retries: int = 3,
+                         retry_delay: int = 5) -> bool:
+        """
+        Send a Pushover notification with automatic retry on transient failures
+
+        Args:
+            title: Notification title
+            message: Notification message
+            priority: Priority level (-2 to 2)
+            url: Supplementary URL
+            url_title: Title for the URL
+            sound: Notification sound name
+            device: Specific device to send to
+            html: Enable HTML formatting
+            image_path: Path to image file to attach as thumbnail
+            max_retries: Maximum number of retry attempts (default 3)
+            retry_delay: Initial retry delay in seconds, doubles each retry (default 5)
+
+        Returns:
+            True if notification sent successfully
+        """
+        if not self.enabled:
+            logger.debug("[Pushover] Notifications disabled, skipping")
+            self.stats['skipped'] += 1
+            return False
+
+        if not self.user_key or not self.api_token:
+            logger.warning("[Pushover] Missing user_key or api_token")
+            self.stats['failed'] += 1
+            return False
+
+        # Normalize priority
+        actual_priority = priority if priority is not None else self.default_priority
+
+        # Prepare payload
+        payload = {
+            'token': self.api_token,
+            'user': self.user_key,
+            'title': title,
+            'message': message,
+            'priority': actual_priority,
+        }
+
+        # Add optional parameters
+        if url:
+            payload['url'] = url
+        if url_title:
+            payload['url_title'] = url_title
+        if sound:
+            payload['sound'] = sound
+        if device or self.device:
+            payload['device'] = device or self.device
+        if html:
+            payload['html'] = 1
+
+        # Retry loop with exponential backoff
+        for attempt in range(max_retries):
+            try:
+                # Check if we have an image to attach
+                files = None
+                if image_path:
+                    from pathlib import Path
+                    img_path = Path(image_path)
+
+                    # Only attach if file exists and is an image
+                    if img_path.exists() and img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
+                        try:
+                            # Determine MIME type
+                            mime_type = 'image/jpeg'
+                            if img_path.suffix.lower() == '.png':
+                                mime_type = 'image/png'
+                            elif img_path.suffix.lower() == '.gif':
+                                mime_type = 'image/gif'
+                            elif img_path.suffix.lower() == '.bmp':
+                                mime_type = 'image/bmp'
+                            elif img_path.suffix.lower() == '.webp':
+                                mime_type = 'image/webp'
+
+                            # Open and attach the image
+                            files = {'attachment': (img_path.name, open(img_path, 'rb'), mime_type)}
+                            logger.debug(f"[Pushover] Attaching image: {img_path.name}")
+                        except Exception as e:
+                            logger.warning(f"[Pushover] Failed to attach image {image_path}: {e}")
+
+                response = requests.post(self.API_URL, data=payload, files=files, timeout=30)
+
+                # Close file if opened
+                if files and 'attachment' in files:
+                    files['attachment'][1].close()
+
+                if response.status_code == 200:
+                    result = response.json()
+                    if result.get('status') == 1:
+                        request_id = result.get('request', 'unknown')
+                        if attempt > 0:
+                            logger.info(f"[Pushover] Notification sent after {attempt + 1} attempt(s): {title} (request: {request_id})")
+                        else:
+                            logger.info(f"[Pushover] Notification sent: {title} (request: {request_id})")
+                        self.stats['sent'] += 1
+
+                        # Record to database if available and we have context
+                        self._record_notification(title, message, actual_priority, 'sent', result, image_path)
+
+                        return True
+                    else:
+                        # API returned error status - don't retry client errors
+                        logger.error(f"[Pushover] API error: {result}")
+                        self.stats['failed'] += 1
+
+                        # Record failure to database
+                        self._record_notification(title, message, actual_priority, 'failed', result, image_path)
+
+                        return False
+
+                # Handle HTTP errors with retry logic
+                elif response.status_code >= 500:
+                    # Server error (5xx) - retry with backoff
+                    if attempt < max_retries - 1:
+                        wait_time = retry_delay * (2 ** attempt)
+                        logger.warning(f"[Pushover] HTTP {response.status_code}: {response.text[:100]}, retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})")
+                        import time
+                        time.sleep(wait_time)
+                        continue
+                    else:
+                        # Max retries exceeded
+                        logger.error(f"[Pushover] HTTP {response.status_code} after {max_retries} attempts: {response.text}")
+                        self.stats['failed'] += 1
+                        self._record_notification(title, message, actual_priority, 'failed', {'error': f"HTTP {response.status_code} after {max_retries} retries"}, image_path)
+                        return False
+                else:
+                    # Client error (4xx) - don't retry
+                    logger.error(f"[Pushover] HTTP {response.status_code}: {response.text}")
+                    self.stats['failed'] += 1
+                    self._record_notification(title, message, actual_priority, 'failed', {'error': response.text}, image_path)
+                    return False
+
+            except (requests.ConnectionError, requests.Timeout) as e:
+                # Network errors - retry with backoff
+                if attempt < max_retries - 1:
+                    wait_time = retry_delay * (2 ** attempt)
+                    logger.warning(f"[Pushover] Network error: {e}, retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})")
+                    import time
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    # Max retries exceeded
+                    logger.error(f"[Pushover] Network error after {max_retries} attempts: {e}")
+                    self.stats['failed'] += 1
+                    self._record_notification(title, message, actual_priority, 'failed', {'error': f"Network error after {max_retries} retries: {str(e)}"}, image_path)
+                    return False
+
+            except Exception as e:
+                # Other exceptions - don't retry
+                logger.error(f"[Pushover] Failed to send notification: {e}")
+                self.stats['failed'] += 1
+                self._record_notification(title, message, actual_priority, 'failed', {'error': str(e)}, image_path)
+                return False
+
+        # Should never reach here, but just in case
+        return False
+
+    def notify_download(self,
+                       platform: str,
+                       source: str,
+                       content_type: str,
+                       filename: str = None,
+                       search_term: str = None,
+                       count: int = 1,
+                       metadata: Dict[str, Any] = None,
+                       priority: int = None) -> bool:
+        """
+        Send a professional notification for a new download
+
+        Args:
+            platform: Platform name (instagram, tiktok, forum, etc.)
+            source: Username or source identifier
+            content_type: Type of content (post, story, reel, thread, etc.)
+            filename: Optional filename
+            search_term: Optional search term (for forum searches)
+            count: Number of items downloaded (default 1)
+            metadata: Additional metadata dictionary
+            priority: Notification priority
+
+        Returns:
+            True if notification sent successfully
+        """
+        metadata = metadata or {}
+
+        # Handle None content_type
+        content_type = content_type or 'item'
+
+        # Get appropriate icons
+        platform_icon = self.PLATFORM_ICONS.get(platform.lower(), '📥')
+        content_icon = self.CONTENT_ICONS.get(content_type.lower(), '📄')
+
+        # Build title with proper grammar
+        if count > 1:
+            plural_type = self._pluralize(content_type, count)
+            title = f"{platform_icon} {count} {plural_type.title()} Downloaded"
+        else:
+            title = f"{platform_icon} New {content_type.title()} Downloaded"
+
+        # Build message
+        message_parts = []
+
+        # Add platform (convert service name to user-friendly platform name)
+        # For forums, use forum name; for Instagram services, use "Instagram"
+        platform_display = self._get_platform_display_name(platform, source)
+        message_parts.append(f"📱 <b>Platform:</b> {platform_display}")
+
+        # Add source/username (skip for forums since source becomes the platform name)
+        if source and platform.lower() != 'forums':
+            message_parts.append(f"{content_icon} <b>Source:</b> {source}")
+
+        # Add search term if available
+        if search_term:
+            message_parts.append(f"🔍 <b>Search:</b> {search_term}")
+
+        # Add post date if available
+        if metadata.get('post_date'):
+            try:
+                if isinstance(metadata['post_date'], str):
+                    post_date = datetime.fromisoformat(metadata['post_date'])
+                else:
+                    post_date = metadata['post_date']
+                date_str = post_date.strftime("%Y-%m-%d %H:%M")
+                message_parts.append(f"📅 <b>Posted:</b> {date_str}")
+            except Exception:
+                pass
+
+        # Add timestamp
+        now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        message_parts.append(f"⏰ <b>Downloaded:</b> {now}")
+
+        message = "\n".join(message_parts)
+
+        # Set context for database recording
+        self._current_notification_context = {
+            'platform': platform,
+            'source': source,
+            'content_type': content_type,
+            'download_count': count,
+            'metadata': {'search_term': search_term} if search_term else metadata
+        }
+
+        # Determine sound based on platform or priority
+        sound = None
+        if priority and priority >= self.PRIORITY_HIGH:
+            sound = "pushover"  # Default urgent sound
+
+        return self.send_notification(
+            title=title,
+            message=message,
+            priority=priority,
+            sound=sound,
+            html=True
+        )
+
+    def notify_batch_download(self,
+                             platform: str,
+                             downloads: list,
+                             search_term: str = None,
+                             is_review_queue: bool = False) -> bool:
+        """
+        Send notification for batch downloads
+
+        Args:
+            platform: Platform name
+            downloads: List of download dicts with keys: source, content_type, filename, file_path
+            search_term: Optional search term
+            is_review_queue: True if these are review queue items (no face match)
+
+        Returns:
+            True if notification sent successfully
+        """
+        if not downloads:
+            return False
+
+        # Check if review queue notifications are disabled
+        # Always check current database value for review queue notifications
+        if is_review_queue:
+            if self.unified_db:
+                try:
+                    from modules.settings_manager import SettingsManager
+                    settings_manager = SettingsManager(str(self.unified_db.db_path))
+                    pushover_settings = settings_manager.get('pushover', {})
+                    enable_review_notifications = pushover_settings.get('enable_review_queue_notifications', True)
+                    if not enable_review_notifications:
+                        logger.debug("[Pushover] Skipping review queue notification (disabled in settings)")
+                        return False
+                except Exception as e:
+                    logger.warning(f"[Pushover] Could not check review queue notification setting, using cached value: {e}")
+                    # Fall back to cached value
+                    if not self.enable_review_queue_notifications:
+                        logger.debug("[Pushover] Skipping review queue notification (disabled in cached settings)")
+                        return False
+            else:
+                # No database, use cached value
+                if not self.enable_review_queue_notifications:
+                    logger.debug("[Pushover] Skipping review queue notification (disabled in settings)")
+                    return False
+
+        # Extract source from first download
+        source = None
+        if downloads and downloads[0].get('source'):
+            source = downloads[0]['source']
+
+        # Extract content type (handle None explicitly)
+        content_type = (downloads[0].get('content_type') or 'item') if downloads else 'item'
+
+        # Collect all media file paths for the notification database record
+        image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic', '.heif', '.avif', '.tiff', '.tif'}
+        video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v', '.flv'}
+        audio_extensions = {'.mp3', '.wav', '.flac', '.aac', '.m4a', '.ogg', '.wma'}
+        all_media_paths = []
+
+        for dl in downloads:
+            file_path = dl.get('file_path')
+            if file_path and Path(file_path).exists():
+                suffix = Path(file_path).suffix.lower()
+                ct = dl.get('content_type', '').lower()
+                if ct == 'audio' or suffix in audio_extensions:
+                    media_type = 'audio'
+                elif ct == 'image' or suffix in image_extensions:
+                    media_type = 'image'
+                elif ct == 'video' or suffix in video_extensions:
+                    media_type = 'video'
+                else:
+                    continue
+                all_media_paths.append({
+                    'file_path': file_path,
+                    'filename': dl.get('filename', Path(file_path).name),
+                    'media_type': media_type
+                })
+
+        # Set context for database recording with all media files
+        metadata = {}
+        if search_term:
+            metadata['search_term'] = search_term
+        if all_media_paths:
+            metadata['media_files'] = all_media_paths  # Store all media files for notifications page
+
+        self._current_notification_context = {
+            'platform': platform,
+            'source': source,
+            'content_type': content_type,
+            'download_count': len(downloads),
+            'metadata': metadata if metadata else None
+        }
+
+        # Use different icon for review queue
+        if is_review_queue:
+            platform_icon = "👁️"  # Eye icon for review
+        else:
+            platform_icon = self.PLATFORM_ICONS.get(platform.lower(), '📥')
+
+        # Group by content type
+        by_type = {}
+        for dl in downloads:
+            content_type = dl.get('content_type') or 'item'  # Handle None explicitly
+            by_type.setdefault(content_type, []).append(dl)
+
+        # Build title with proper grammar
+        total = len(downloads)
+        if is_review_queue:
+            # Review queue notification
+            if len(by_type) == 1:
+                content_type = list(by_type.keys())[0]
+                plural_type = self._pluralize(content_type, total)
+                title = f"{platform_icon} {total} {plural_type.title()} - Review Queue"
+            else:
+                title = f"{platform_icon} {total} Items - Review Queue"
+        else:
+            # Regular download notification
+            if len(by_type) == 1:
+                # Single content type - use specific name
+                content_type = list(by_type.keys())[0]
+                plural_type = self._pluralize(content_type, total)
+                title = f"{platform_icon} {total} {plural_type.title()} Downloaded"
+            else:
+                # Multiple content types - use "Items"
+                title = f"{platform_icon} {total} Items Downloaded"
+
+        # Build message
+        message_parts = []
+
+        # Extract source from first download since they're all from same source
+        source = None
+        if downloads and downloads[0].get('source'):
+            source = downloads[0]['source']
+
+        # Add platform (convert service name to user-friendly platform name)
+        # For forums, use forum name; for Instagram services, use "Instagram"
+        platform_display = self._get_platform_display_name(platform, source)
+        message_parts.append(f"📱 <b>Platform:</b> {platform_display}")
+
+        # Add source/username (skip for forums since source becomes the platform name)
+        if source and platform.lower() != 'forums':
+            # Get content icon for the primary content type
+            primary_content_type = list(by_type.keys())[0] if by_type else 'item'
+            content_icon = self.CONTENT_ICONS.get(primary_content_type.lower(), '📄')
+            message_parts.append(f"{content_icon} <b>Source:</b> {source}")
+
+        if search_term:
+            message_parts.append(f"🔍 <b>Search:</b> {search_term}")
+
+        # Add review queue notice if applicable
+        if is_review_queue:
+            message_parts.append(f"\n⚠️ <b>No face match detected</b> - Items moved to review queue for manual review")
+
+        # Summary by type (only show if multiple types)
+        if len(by_type) > 1:
+            message_parts.append(f"\n<b>Breakdown:</b>")
+            for content_type, items in by_type.items():
+                content_icon = self.CONTENT_ICONS.get(content_type.lower(), '📄')
+                count = len(items)
+                plural_type = self._pluralize(content_type, count)
+                message_parts.append(f"{content_icon} {count} {plural_type}")
+
+        # Add timestamp
+        now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        message_parts.append(f"\n⏰ <b>Downloaded:</b> {now}")
+
+        message = "\n".join(message_parts)
+
+        # Select a random file for thumbnail attachment (if enabled)
+        # Can be an image or video (extract random frame from video)
+        import random
+        image_path = None
+        temp_frame_path = None  # Track temporary frame extractions
+
+        if self.include_image:
+            image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
+            video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v'}
+
+            # Collect all valid media file paths (images and videos)
+            media_files = []
+            for dl in downloads:
+                file_path = dl.get('file_path')
+                if file_path:
+                    exists = Path(file_path).exists()
+                    if exists:
+                        suffix = Path(file_path).suffix.lower()
+                        if suffix in image_extensions or suffix in video_extensions:
+                            media_files.append(file_path)
+                        else:
+                            logger.debug(f"[Pushover] Skipping file (invalid extension): {Path(file_path).name} ({suffix})")
+                    else:
+                        logger.warning(f"[Pushover] Skipping file (doesn't exist): {file_path}")
+                else:
+                    logger.warning(f"[Pushover] Download entry has no file_path")
+
+            logger.debug(f"[Pushover] Found {len(media_files)} valid media files out of {len(downloads)} downloads")
+
+            # Randomly select one file if available
+            if media_files:
+                selected_file = random.choice(media_files)
+                selected_suffix = Path(selected_file).suffix.lower()
+
+                if selected_suffix in image_extensions:
+                    # It's an image, use directly
+                    image_path = selected_file
+                    logger.debug(f"[Pushover] Selected image thumbnail: {Path(image_path).name}")
+
+                elif selected_suffix in video_extensions:
+                    # It's a video, extract a random frame
+                    logger.info(f"[Pushover] Selected video for thumbnail, extracting random frame: {Path(selected_file).name}")
+                    temp_frame_path = self._extract_random_video_frame(selected_file)
+                    if temp_frame_path:
+                        image_path = temp_frame_path
+                        logger.info(f"[Pushover] Successfully extracted video frame for thumbnail: {Path(temp_frame_path).name}")
+                    else:
+                        logger.warning("[Pushover] Failed to extract frame from video - notification will be sent without thumbnail")
+            else:
+                logger.debug("[Pushover] No media files available for thumbnail attachment")
+        else:
+            logger.debug("[Pushover] Image thumbnails disabled in settings")
+
+        # Send notification with lower priority for review queue
+        priority = -1 if is_review_queue else None  # Low priority for review queue
+        result = self.send_notification(
+            title=title,
+            message=message,
+            html=True,
+            image_path=image_path,
+            priority=priority
+        )
+
+        # Clean up temporary frame file if we created one
+        if temp_frame_path and Path(temp_frame_path).exists():
+            try:
+                Path(temp_frame_path).unlink()
+                logger.debug(f"[Pushover] Cleaned up temp frame: {Path(temp_frame_path).name}")
+            except Exception as e:
+                logger.debug(f"[Pushover] Failed to cleanup temp frame: {e}")
+
+        return result
+
+    def notify_error(self, platform: str, error_message: str, source: str = None) -> bool:
+        """
+        Send error notification
+
+        Args:
+            platform: Platform name
+            error_message: Error description
+            source: Optional source/username
+
+        Returns:
+            True if notification sent successfully
+        """
+        # Convert service name to user-friendly platform name
+        # For forums, use forum name; for Instagram services, use "Instagram"
+        platform_display = self._get_platform_display_name(platform, source)
+        title = f"⚠️ {platform_display} Download Error"
+
+        message_parts = [
+            f"<b>Platform:</b> {platform_display}",
+        ]
+
+        # Add source (skip for forums since source becomes the platform name)
+        if source and platform.lower() != 'forums':
+            message_parts.append(f"<b>Source:</b> {source}")
+
+        message_parts.append(f"\n<b>Error:</b> {error_message}")
+
+        message = "\n".join(message_parts)
+
+        return self.send_notification(
+            title=title,
+            message=message,
+            priority=self.PRIORITY_HIGH,
+            sound="siren",
+            html=True
+        )
+
+    def get_stats(self) -> Dict[str, int]:
+        """Get notification statistics"""
+        return self.stats.copy()
+
+    def reset_stats(self):
+        """Reset statistics"""
+        self.stats = {
+            'sent': 0,
+            'failed': 0,
+            'skipped': 0
+        }
+
+
+def create_notifier_from_config(config: Dict, unified_db=None) -> Optional[PushoverNotifier]:
+    """
+    Create a PushoverNotifier from configuration dictionary
+
+    Args:
+        config: Configuration dict with pushover settings
+        unified_db: UnifiedDatabase instance for recording notifications (optional)
+
+    Returns:
+        PushoverNotifier instance or None if disabled/invalid
+    """
+    pushover_config = config.get('pushover', {})
+
+    if not pushover_config.get('enabled', False):
+        logger.info("[Pushover] Notifications disabled in config")
+        return None
+
+    user_key = pushover_config.get('user_key')
+    api_token = pushover_config.get('api_token')
+
+    if not user_key or not api_token:
+        logger.warning("[Pushover] Missing user_key or api_token in config")
+        return None
+
+    return PushoverNotifier(
+        user_key=user_key,
+        api_token=api_token,
+        enabled=True,
+        default_priority=pushover_config.get('priority', 0),
+        device=pushover_config.get('device'),
+        include_image=pushover_config.get('include_image', True),
+        unified_db=unified_db,
+        enable_review_queue_notifications=pushover_config.get('enable_review_queue_notifications', True)
+    )
+
+
+if __name__ == "__main__":
+    # Test the notifier
+    print("Testing Pushover Notifier...")
+
+    # This is a test - replace with your actual credentials
+    notifier = PushoverNotifier(
+        user_key="YOUR_USER_KEY",
+        api_token="YOUR_API_TOKEN",
+        enabled=False  # Set to True to test
+    )
+
+    # Test notification
+    notifier.notify_download(
+        platform="instagram",
+        source="evalongoria",
+        content_type="story",
+        filename="evalongoria_story_20251018.mp4",
+        metadata={'post_date': datetime.now()}
+    )
+
+    print(f"Stats: {notifier.get_stats()}")
--- a/modules/reddit_community_monitor.py
+++ b/modules/reddit_community_monitor.py
--- a/modules/scheduler.py
+++ b/modules/scheduler.py
--- a/modules/scraper_event_emitter.py
+++ b/modules/scraper_event_emitter.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""
+Thread-safe WebSocket event emitter for scraper monitoring
+
+Provides real-time events for the scraping monitor page:
+- Scraper sessions starting/completing
+- File downloads and movements
+- Progress updates
+"""
+
+from datetime import datetime
+from typing import Optional, Dict, Any
+
+
+class ScraperEventEmitter:
+    """Emits WebSocket events for real-time scraper monitoring"""
+
+    def __init__(self, websocket_manager=None, app_state=None):
+        """
+        Initialize event emitter
+
+        Args:
+            websocket_manager: WebSocket connection manager (optional)
+            app_state: Application state for tracking active sessions (optional)
+        """
+        self.websocket_manager = websocket_manager
+        self.app_state = app_state
+
+    def emit_scraper_started(self, session_id: str, platform: str, account: str,
+                            content_type: str, estimated_count: int = 0, accounts_list: list = None):
+        """
+        Emit when scraper session begins
+
+        Args:
+            session_id: Unique session identifier
+            platform: Platform name (instagram, snapchat, etc.)
+            account: Account/username being scraped (or comma-separated list)
+            content_type: Type of content (stories, posts, etc.)
+            estimated_count: Estimated number of items to download
+            accounts_list: Optional list of all accounts to be processed
+        """
+        event_data = {
+            'session_id': session_id,
+            'platform': platform,
+            'account': account,
+            'content_type': content_type,
+            'estimated_count': estimated_count,
+            'timestamp': datetime.now().isoformat()
+        }
+
+        # Include accounts list if provided
+        if accounts_list:
+            event_data['accounts_list'] = accounts_list
+
+        # Store session in app_state for API retrieval
+        # Match the scheduler's data structure exactly
+        if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
+            self.app_state.active_scraper_sessions[session_id] = {
+                'session_id': session_id,
+                'platform': platform,
+                'account': account,
+                'content_type': content_type,
+                'start_time': datetime.now().isoformat(),
+                'status': 'Starting...',
+                'detailed_status': 'Starting...',
+                'progress': {
+                    'current': 0,
+                    'total': estimated_count or 100
+                },
+                'stats': {'media': 0, 'review': 0, 'failed': 0}
+            }
+
+        self._broadcast({
+            'type': 'scraper_started',
+            'data': event_data
+        })
+
+    def emit_scraper_progress(self, session_id: str, status: str,
+                             current: int, total: int, current_account: str = None,
+                             completed_accounts: list = None):
+        """
+        Emit progress update
+
+        Args:
+            session_id: Session identifier
+            status: Status message (e.g., "Downloading stories...")
+            current: Current item count
+            total: Total item count
+            current_account: Currently active account/forum name (optional)
+            completed_accounts: List of completed accounts (optional)
+        """
+        event_data = {
+            'session_id': session_id,
+            'status': status,
+            'progress_current': current,
+            'progress_total': total,
+            'timestamp': datetime.now().isoformat()
+        }
+
+        # Include current account if provided
+        if current_account:
+            event_data['current_account'] = current_account
+
+        # Include completed accounts if provided
+        if completed_accounts:
+            event_data['completed_accounts'] = completed_accounts
+
+        # Update session in app_state - match scheduler structure
+        if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
+            if session_id in self.app_state.active_scraper_sessions:
+                session = self.app_state.active_scraper_sessions[session_id]
+                session['status'] = status
+                session['detailed_status'] = status
+                # Update account to current account if provided
+                if current_account:
+                    session['account'] = current_account
+                # Use nested progress structure to match scheduler
+                session['progress'] = {
+                    'current': current,
+                    'total': total
+                }
+                if completed_accounts:
+                    session['completed_accounts'] = completed_accounts
+
+        self._broadcast({
+            'type': 'scraper_progress',
+            'data': event_data
+        })
+
+    def emit_scraper_completed(self, session_id: str, stats: Dict[str, int]):
+        """
+        Emit when scraper session completes
+
+        Args:
+            session_id: Session identifier
+            stats: Statistics dict with keys: total_downloaded, moved, review, duplicates, failed
+        """
+        # Remove session from app_state
+        if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
+            self.app_state.active_scraper_sessions.pop(session_id, None)
+
+        self._broadcast({
+            'type': 'scraper_completed',
+            'data': {
+                'session_id': session_id,
+                'stats': stats,
+                'timestamp': datetime.now().isoformat()
+            }
+        })
+
+    def emit_file_moved(self, session_id: str, platform: str, account: str,
+                       filename: str, media_type: str, destination_type: str,
+                       destination_path: str, thumbnail_url: str = None,
+                       face_match: Dict[str, Any] = None):
+        """
+        Emit when file is moved to destination
+
+        Args:
+            session_id: Session identifier
+            platform: Platform name
+            account: Account/username
+            filename: File name
+            media_type: 'image' or 'video'
+            destination_type: 'media', 'review', or 'recycle'
+            destination_path: Full path to destination file
+            thumbnail_url: URL to thumbnail (optional)
+            face_match: Face recognition result dict (optional)
+        """
+        self._broadcast({
+            'type': 'file_moved',
+            'data': {
+                'session_id': session_id,
+                'platform': platform,
+                'account': account,
+                'filename': filename,
+                'media_type': media_type,
+                'destination_type': destination_type,
+                'destination_path': destination_path,
+                'thumbnail_url': thumbnail_url,
+                'face_match': face_match or {'matched': False},
+                'timestamp': datetime.now().isoformat()
+            }
+        })
+
+    def _broadcast(self, message: dict):
+        """
+        Thread-safe broadcast to WebSocket clients
+
+        Args:
+            message: Event message dict
+        """
+        if self.websocket_manager:
+            # Use broadcast_sync for thread-safe emission from background threads
+            self.websocket_manager.broadcast_sync(message)
--- a/modules/scraper_gallery_bridge.py
+++ b/modules/scraper_gallery_bridge.py
@@ -0,0 +1,652 @@
+"""
+Scraper Gallery Bridge
+
+Maps scraper accounts (Instagram, TikTok, Snapchat) to private gallery persons.
+After each download session, auto-imports new media as gallery posts.
+"""
+
+import hashlib
+import logging
+import mimetypes
+import sqlite3
+import subprocess
+import tempfile
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+SCRAPER_BRIDGE_KEY_FILE = '/opt/immich/private/.scraper_bridge_key'
+
+# Map scraper module names → platform
+SCRAPER_TO_PLATFORM = {
+    'fastdl': 'instagram',
+    'imginn': 'instagram',
+    'imginn_api': 'instagram',
+    'instagram_client': 'instagram',
+    'toolzu': 'instagram',
+    'instagram': 'instagram',
+    'instagram_unified': 'instagram',
+    'tiktok': 'tiktok',
+    'snapchat': 'snapchat',
+    'snapchat_client': 'snapchat',
+}
+
+PLATFORM_COLORS = {
+    'instagram': '#E1306C',
+    'tiktok': '#00f2ea',
+    'snapchat': '#FFFC00',
+}
+
+PLATFORM_LABELS = {
+    'instagram': 'Instagram',
+    'tiktok': 'TikTok',
+    'snapchat': 'Snapchat',
+}
+
+
+def get_crypto():
+    """Load crypto from key file for background access (works when gallery is locked)."""
+    from modules.private_gallery_crypto import load_key_from_file
+    crypto = load_key_from_file(SCRAPER_BRIDGE_KEY_FILE)
+    if crypto is None:
+        logger.debug("Scraper bridge crypto unavailable - key file missing or invalid")
+    return crypto
+
+
+def get_available_accounts(platform: str, config: dict, db) -> List[Dict[str, Any]]:
+    """
+    Aggregate usernames from all scraper configs + paid_content_creators for a platform.
+    Returns de-duplicated list with source annotations.
+    """
+    accounts = {}  # username -> set of sources
+
+    if platform == 'instagram':
+        # instagram.accounts[].username
+        ig_cfg = config.get('instagram', {})
+        if ig_cfg.get('enabled', False):
+            ig_accounts = ig_cfg.get('accounts', [])
+            if not ig_accounts and 'usernames' in ig_cfg:
+                ig_accounts = [{'username': u} for u in ig_cfg['usernames']]
+            for acc in ig_accounts:
+                u = acc.get('username', '').strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add('instagram')
+
+        # Collect usernames + phrase_search usernames from each scraper
+        for scraper_id in ('fastdl', 'imginn', 'imginn_api', 'instagram_client', 'toolzu'):
+            scraper_cfg = config.get(scraper_id, {})
+            if not scraper_cfg.get('enabled', False):
+                continue
+            for u in scraper_cfg.get('usernames', []):
+                u = u.strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add(scraper_id)
+            # phrase_search usernames are also downloadable accounts
+            for u in scraper_cfg.get('phrase_search', {}).get('usernames', []):
+                u = u.strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add(scraper_id)
+
+    elif platform == 'tiktok':
+        tt_cfg = config.get('tiktok', {})
+        if tt_cfg.get('enabled', False):
+            tt_accounts = tt_cfg.get('accounts', [])
+            if not tt_accounts and 'usernames' in tt_cfg:
+                tt_accounts = [{'username': u} for u in tt_cfg['usernames']]
+            for acc in tt_accounts:
+                u = acc.get('username', '').strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add('tiktok')
+
+    elif platform == 'snapchat':
+        # snapchat.usernames
+        sc_cfg = config.get('snapchat', {})
+        if sc_cfg.get('enabled', False):
+            for u in sc_cfg.get('usernames', []):
+                u = u.strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add('snapchat')
+
+        # snapchat_client.usernames
+        sc_client_cfg = config.get('snapchat_client', {})
+        if sc_client_cfg.get('enabled', False):
+            for u in sc_client_cfg.get('usernames', []):
+                u = u.strip().lower()
+                if u:
+                    accounts.setdefault(u, set()).add('snapchat_client')
+
+    # Add from paid_content_creators table
+    try:
+        conn = sqlite3.connect(db.db_path, timeout=10)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute(
+            'SELECT username FROM paid_content_creators WHERE platform = ? AND enabled = 1',
+            (platform,)
+        )
+        for row in cursor.fetchall():
+            u = row['username'].strip().lower()
+            if u:
+                accounts.setdefault(u, set()).add('paid_content')
+        conn.close()
+    except Exception as e:
+        logger.debug(f"Could not query paid_content_creators: {e}")
+
+    # Check which are already mapped
+    mapped_usernames = set()
+    try:
+        conn = sqlite3.connect(db.db_path, timeout=10)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute(
+            'SELECT username FROM private_media_scraper_accounts WHERE platform = ?',
+            (platform,)
+        )
+        for row in cursor.fetchall():
+            mapped_usernames.add(row['username'].lower())
+        conn.close()
+    except Exception:
+        pass
+
+    result = []
+    for username, sources in sorted(accounts.items()):
+        result.append({
+            'username': username,
+            'sources': sorted(sources),
+            'is_mapped': username.lower() in mapped_usernames,
+        })
+
+    return result
+
+
+def _ensure_platform_tag(platform: str, db, crypto) -> int:
+    """Find or create a tag for the platform in private_gallery_tags."""
+    conn = sqlite3.connect(db.db_path, timeout=10)
+    conn.row_factory = sqlite3.Row
+    try:
+        cursor = conn.cursor()
+        cursor.execute("SELECT id, encrypted_name FROM private_gallery_tags")
+        label = PLATFORM_LABELS.get(platform, platform.title())
+        for row in cursor.fetchall():
+            try:
+                name = crypto.decrypt_field(row['encrypted_name'])
+                if name.lower() == label.lower():
+                    return row['id']
+            except Exception:
+                continue
+
+        # Create the tag
+        encrypted_name = crypto.encrypt_field(label)
+        color = PLATFORM_COLORS.get(platform, '#888888')
+        cursor.execute('''
+            INSERT INTO private_gallery_tags (encrypted_name, color)
+            VALUES (?, ?)
+        ''', (encrypted_name, color))
+        conn.commit()
+        tag_id = cursor.lastrowid
+        logger.info(f"Created '{label}' tag with ID {tag_id}")
+        return tag_id
+    finally:
+        conn.close()
+
+
+def _get_file_info(file_path: Path) -> Dict[str, Any]:
+    """Get file type, mime type, and dimensions."""
+    ext = file_path.suffix.lower().lstrip('.')
+    mime_type, _ = mimetypes.guess_type(str(file_path))
+    if not mime_type:
+        mime_type = 'application/octet-stream'
+
+    image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
+    video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}
+
+    if ext in image_exts:
+        file_type = 'image'
+    elif ext in video_exts:
+        file_type = 'video'
+    else:
+        file_type = 'other'
+
+    width, height, duration = 0, 0, 0
+
+    if file_type == 'image':
+        try:
+            from PIL import Image
+            with Image.open(file_path) as img:
+                width, height = img.size
+        except Exception:
+            pass
+    elif file_type == 'video':
+        try:
+            result = subprocess.run(
+                ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_streams', str(file_path)],
+                capture_output=True, text=True, timeout=15
+            )
+            if result.returncode == 0:
+                import json
+                probe = json.loads(result.stdout)
+                for stream in probe.get('streams', []):
+                    if stream.get('codec_type') == 'video':
+                        width = int(stream.get('width', 0))
+                        height = int(stream.get('height', 0))
+                        dur = stream.get('duration')
+                        if dur:
+                            duration = int(float(dur))
+                        break
+        except Exception:
+            pass
+
+    return {
+        'file_type': file_type,
+        'mime_type': mime_type,
+        'width': width,
+        'height': height,
+        'duration': duration,
+    }
+
+
+def _compute_perceptual_hash(file_path: Path) -> Optional[str]:
+    """Calculate perceptual hash for an image or video file."""
+    try:
+        import imagehash
+        from PIL import Image
+    except ImportError:
+        return None
+
+    ext = file_path.suffix.lower().lstrip('.')
+    image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
+    video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}
+
+    pil_image = None
+    try:
+        if ext in video_exts:
+            try:
+                import cv2
+            except ImportError:
+                return None
+            cap = cv2.VideoCapture(str(file_path))
+            if not cap.isOpened():
+                return None
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            cap.set(cv2.CAP_PROP_POS_FRAMES, int(total_frames * 0.5))
+            ret, frame = cap.read()
+            cap.release()
+            if not ret or frame is None:
+                return None
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(frame_rgb)
+        elif ext in image_exts:
+            pil_image = Image.open(file_path)
+        else:
+            return None
+
+        return str(imagehash.dhash(pil_image, hash_size=16))
+    except Exception:
+        return None
+    finally:
+        if pil_image:
+            try:
+                pil_image.close()
+            except Exception:
+                pass
+
+
+def _generate_thumbnail(file_path: Path, output_path: Path, file_type: str) -> bool:
+    """Generate a thumbnail for an image or video."""
+    try:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        if file_type == 'image':
+            from PIL import Image, ImageOps
+            with Image.open(file_path) as img:
+                img = ImageOps.exif_transpose(img)
+                img.thumbnail((400, 400))
+                if img.mode in ('RGBA', 'P'):
+                    img = img.convert('RGB')
+                img.save(output_path, 'JPEG', quality=85)
+            return True
+
+        elif file_type == 'video':
+            result = subprocess.run([
+                'ffmpeg', '-y', '-i', str(file_path),
+                '-ss', '00:00:01', '-vframes', '1',
+                '-vf', 'scale=400:-1:force_original_aspect_ratio=decrease',
+                str(output_path)
+            ], capture_output=True, timeout=30)
+            return result.returncode == 0 and output_path.exists()
+    except Exception:
+        pass
+    return False
+
+
+def import_new_media(platform: str, username: str, person_id: int,
+                     last_imported_at: Optional[str], db, crypto,
+                     last_imported_file_id: int = 0) -> int:
+    """
+    Import new media files from file_inventory into the private gallery.
+    Returns count of imported files.
+    """
+    conn = sqlite3.connect(db.db_path, timeout=30)
+    conn.row_factory = sqlite3.Row
+    try:
+        cursor = conn.cursor()
+
+        # Use id-based filtering (reliable, monotonically increasing with insertion order).
+        # Falls back to created_date only for legacy accounts without last_imported_file_id.
+        if last_imported_file_id and last_imported_file_id > 0:
+            cursor.execute('''
+                SELECT id, file_path, filename, created_date FROM file_inventory
+                WHERE platform = ? AND source = ? AND id > ?
+                AND location IN ('final', 'review')
+                ORDER BY id ASC
+            ''', (platform, username, last_imported_file_id))
+        elif last_imported_at:
+            cursor.execute('''
+                SELECT id, file_path, filename, created_date FROM file_inventory
+                WHERE platform = ? AND source = ? AND created_date > ?
+                AND location IN ('final', 'review')
+                ORDER BY id ASC
+            ''', (platform, username, last_imported_at))
+        else:
+            # First run: only import files from the last 1 hour
+            from datetime import timedelta
+            cutoff = (datetime.now() - timedelta(hours=1)).isoformat()
+            cursor.execute('''
+                SELECT id, file_path, filename, created_date FROM file_inventory
+                WHERE platform = ? AND source = ? AND created_date > ?
+                AND location IN ('final', 'review')
+                ORDER BY id ASC
+            ''', (platform, username, cutoff))
+
+        files = cursor.fetchall()
+    finally:
+        conn.close()
+
+    if not files:
+        return 0
+
+    # Filter to existing files, track max id for updating last_imported_file_id
+    valid_files = []
+    max_file_id = last_imported_file_id or 0
+    for f in files:
+        fp = Path(f['file_path'])
+        file_id = f['id']
+        if file_id > max_file_id:
+            max_file_id = file_id
+        if fp.exists() and fp.stat().st_size > 0:
+            valid_files.append({'path': fp, 'created_date': f['created_date'], 'id': file_id})
+
+    if not valid_files:
+        return 0
+
+    # Get storage path
+    conn = sqlite3.connect(db.db_path, timeout=10)
+    conn.row_factory = sqlite3.Row
+    try:
+        cursor = conn.cursor()
+        cursor.execute("SELECT value FROM private_media_config WHERE key = 'storage_path'")
+        row = cursor.fetchone()
+        storage_path = Path(row['value']) if row else Path('/opt/immich/private')
+    finally:
+        conn.close()
+
+    data_path = storage_path / 'data'
+    thumbs_path = storage_path / 'thumbs'
+    data_path.mkdir(parents=True, exist_ok=True)
+    thumbs_path.mkdir(parents=True, exist_ok=True)
+
+    # Get/create platform tag
+    tag_id = _ensure_platform_tag(platform, db, crypto)
+
+    # Create a post for this batch
+    now_iso = datetime.now().isoformat()
+    encrypted_desc = crypto.encrypt_field(f"{PLATFORM_LABELS.get(platform, platform)} - @{username}")
+    encrypted_date = crypto.encrypt_field(now_iso)
+
+    conn = sqlite3.connect(db.db_path, timeout=10)
+    conn.row_factory = sqlite3.Row
+    try:
+        cursor = conn.cursor()
+        cursor.execute('''
+            INSERT INTO private_media_posts (person_id, encrypted_description, encrypted_media_date, created_at, updated_at)
+            VALUES (?, ?, ?, ?, ?)
+        ''', (person_id, encrypted_desc, encrypted_date, now_iso, now_iso))
+        conn.commit()
+        post_id = cursor.lastrowid
+    finally:
+        conn.close()
+
+    media_count = 0
+    latest_date = last_imported_at
+
+    for file_info_entry in valid_files:
+        file_path = file_info_entry['path']
+        created_date = file_info_entry['created_date']
+        # Normalize to string for consistent comparison (PostgreSQL returns datetime objects)
+        if hasattr(created_date, 'isoformat'):
+            created_date = created_date.isoformat()
+        try:
+            # Calculate file hash
+            sha256 = hashlib.sha256()
+            with open(file_path, 'rb') as f:
+                for chunk in iter(lambda: f.read(65536), b''):
+                    sha256.update(chunk)
+            file_hash = sha256.hexdigest()
+
+            # Check for duplicates (scoped by person)
+            conn = sqlite3.connect(db.db_path, timeout=10)
+            conn.row_factory = sqlite3.Row
+            try:
+                cursor = conn.cursor()
+                cursor.execute(
+                    'SELECT id FROM private_media WHERE file_hash = ? AND person_id = ?',
+                    (file_hash, person_id)
+                )
+                if cursor.fetchone():
+                    logger.debug(f"Duplicate file skipped: {file_path.name}")
+                    if created_date and (not latest_date or created_date > latest_date):
+                        latest_date = created_date
+                    continue
+            finally:
+                conn.close()
+
+            # Get file info
+            finfo = _get_file_info(file_path)
+            file_size = file_path.stat().st_size
+
+            # Compute perceptual hash
+            perceptual_hash = _compute_perceptual_hash(file_path)
+
+            # Generate storage ID
+            storage_id = str(uuid.uuid4())
+
+            # Generate thumbnail
+            temp_thumb = Path(tempfile.gettempdir()) / f"pg_thumb_{storage_id}.jpg"
+            _generate_thumbnail(file_path, temp_thumb, finfo['file_type'])
+
+            # Encrypt the file
+            encrypted_file = data_path / f"{storage_id}.enc"
+            if not crypto.encrypt_file(file_path, encrypted_file):
+                logger.error(f"Encryption failed for {file_path.name}")
+                continue
+
+            # Encrypt thumbnail
+            if temp_thumb.exists():
+                encrypted_thumb = thumbs_path / f"{storage_id}.enc"
+                crypto.encrypt_file(temp_thumb, encrypted_thumb)
+                try:
+                    temp_thumb.unlink()
+                except Exception:
+                    pass
+
+            # Insert media record
+            encrypted_filename = crypto.encrypt_field(file_path.name)
+            encrypted_source = crypto.encrypt_field(f"@{username}")
+
+            conn = sqlite3.connect(db.db_path, timeout=10)
+            conn.row_factory = sqlite3.Row
+            try:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT INTO private_media (
+                        post_id, storage_id, encrypted_filename, encrypted_description,
+                        file_hash, file_size, file_type, mime_type,
+                        width, height, duration, person_id,
+                        encrypted_media_date, source_type, encrypted_source_path,
+                        perceptual_hash, created_at
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                ''', (
+                    post_id,
+                    storage_id,
+                    encrypted_filename,
+                    None,
+                    file_hash,
+                    file_size,
+                    finfo['file_type'],
+                    finfo['mime_type'],
+                    finfo['width'],
+                    finfo['height'],
+                    finfo['duration'],
+                    person_id,
+                    encrypted_date,
+                    platform,
+                    encrypted_source,
+                    perceptual_hash,
+                    now_iso,
+                ))
+                conn.commit()
+            finally:
+                conn.close()
+
+            media_count += 1
+            if created_date and (not latest_date or created_date > latest_date):
+                latest_date = created_date
+
+        except Exception as e:
+            logger.error(f"Failed to import {file_path.name}: {e}")
+
+    # Apply platform tag to the post if we imported media
+    if media_count > 0:
+        conn = sqlite3.connect(db.db_path, timeout=10)
+        try:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR IGNORE INTO private_media_post_tags (post_id, tag_id)
+                VALUES (?, ?)
+            ''', (post_id, tag_id))
+            conn.commit()
+        finally:
+            conn.close()
+
+        # Update the mapping row with both timestamp and file id markers
+        conn = sqlite3.connect(db.db_path, timeout=10)
+        try:
+            cursor = conn.cursor()
+            cursor.execute('''
+                UPDATE private_media_scraper_accounts
+                SET last_imported_at = ?,
+                    last_imported_file_id = ?,
+                    total_media_imported = total_media_imported + ?,
+                    updated_at = ?
+                WHERE platform = ? AND username = ? AND person_id = ?
+            ''', (latest_date or now_iso, max_file_id, media_count, now_iso, platform, username, person_id))
+            conn.commit()
+        finally:
+            conn.close()
+
+        logger.info(f"Imported {media_count} files from {platform}/@{username} to gallery (last_file_id={max_file_id})")
+    else:
+        # No media imported - still update the file id marker so we don't re-check these files
+        if max_file_id > (last_imported_file_id or 0):
+            conn = sqlite3.connect(db.db_path, timeout=10)
+            try:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    UPDATE private_media_scraper_accounts
+                    SET last_imported_file_id = ?
+                    WHERE platform = ? AND username = ? AND person_id = ?
+                ''', (max_file_id, platform, username, person_id))
+                conn.commit()
+            finally:
+                conn.close()
+
+        # Delete the empty post
+        conn = sqlite3.connect(db.db_path, timeout=10)
+        try:
+            cursor = conn.cursor()
+            cursor.execute("DELETE FROM private_media_posts WHERE id = ?", (post_id,))
+            conn.commit()
+        finally:
+            conn.close()
+
+    return media_count
+
+
+def on_download_complete(task_id: str, download_count: int, db, crypto) -> int:
+    """
+    Called from scheduler after a task completes.
+    Checks ALL mapped accounts for the platform for new media.
+
+    This handles all cases:
+    - Batch tasks (fastdl:all, imginn_api:all)
+    - Per-user tasks that also download phrase_search users (instagram_client:evalongoria)
+    - Simple per-user tasks (toolzu:evalongoria)
+
+    The id-based filtering is cheap — accounts with no new files return quickly.
+    """
+    if not task_id or ':' not in task_id:
+        return 0
+
+    scraper_module = task_id.split(':')[0]
+
+    # Map scraper module to platform
+    platform = SCRAPER_TO_PLATFORM.get(scraper_module)
+    if not platform:
+        return 0
+
+    # Always check ALL mapped accounts for the platform.
+    # A single task can download for many users (batch tasks, phrase_search),
+    # and id-based filtering makes per-account checks cheap.
+    return _import_all_mapped_accounts(platform, db, crypto)
+
+
+def _import_all_mapped_accounts(platform: str, db, crypto) -> int:
+    """
+    After a batch task (e.g. fastdl:all), check ALL mapped accounts
+    for the platform and import any new media.
+    """
+    conn = sqlite3.connect(db.db_path, timeout=10)
+    conn.row_factory = sqlite3.Row
+    try:
+        cursor = conn.cursor()
+        cursor.execute('''
+            SELECT id, username, person_id, last_imported_at, last_imported_file_id
+            FROM private_media_scraper_accounts
+            WHERE platform = ? AND enabled = 1
+        ''', (platform,))
+        rows = cursor.fetchall()
+    finally:
+        conn.close()
+
+    if not rows:
+        return 0
+
+    total_imported = 0
+    for row in rows:
+        try:
+            count = import_new_media(
+                platform, row['username'], row['person_id'],
+                row['last_imported_at'], db, crypto,
+                last_imported_file_id=row['last_imported_file_id'] or 0
+            )
+            total_imported += count
+        except Exception as e:
+            logger.error(f"Gallery bridge batch import error for {platform}/@{row['username']}: {e}")
+
+    if total_imported > 0:
+        logger.info(f"Batch import for {platform}: {total_imported} files across {len(rows)} accounts")
+
+    return total_imported
--- a/modules/semantic_search.py
+++ b/modules/semantic_search.py
@@ -0,0 +1,728 @@
+#!/usr/bin/env python3
+"""
+Semantic Search Module using CLIP
+Provides image/video similarity search and natural language search capabilities
+"""
+
+import os
+import struct
+import numpy as np
+from typing import Dict, List, Optional, Tuple, Any
+from pathlib import Path
+from PIL import Image
+import threading
+import queue
+from datetime import datetime
+from modules.universal_logger import get_logger
+
+logger = get_logger('SemanticSearch')
+
+# Global model instance (lazy loaded)
+_clip_model = None
+_clip_model_name = None
+_model_lock = threading.Lock()
+
+
+def get_configured_model_name() -> str:
+    """Get the configured CLIP model name from settings"""
+    try:
+        from modules.settings_manager import SettingsManager
+        from pathlib import Path
+        # Use the correct database path
+        db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
+        settings_manager = SettingsManager(str(db_path))
+        semantic_settings = settings_manager.get('semantic_search', {})
+        if isinstance(semantic_settings, dict):
+            model = semantic_settings.get('model', 'clip-ViT-B-32')
+            logger.info(f"Configured CLIP model: {model}")
+            return model
+        return 'clip-ViT-B-32'
+    except Exception as e:
+        logger.error(f"Failed to get configured model: {e}")
+        return 'clip-ViT-B-32'
+
+
+def get_clip_model(model_name: str = None):
+    """Get or load the CLIP model (thread-safe singleton)"""
+    global _clip_model, _clip_model_name
+
+    if model_name is None:
+        model_name = get_configured_model_name()
+
+    # Check if we need to reload (model changed)
+    if _clip_model is not None and _clip_model_name != model_name:
+        with _model_lock:
+            logger.info(f"Model changed from {_clip_model_name} to {model_name}, reloading...")
+            _clip_model = None
+            _clip_model_name = None
+
+    if _clip_model is None:
+        with _model_lock:
+            if _clip_model is None:
+                logger.info(f"Loading CLIP model ({model_name})...")
+                try:
+                    from sentence_transformers import SentenceTransformer
+                    _clip_model = SentenceTransformer(model_name)
+                    _clip_model_name = model_name
+                    logger.info(f"CLIP model {model_name} loaded successfully")
+                except Exception as e:
+                    logger.error(f"Failed to load CLIP model: {e}")
+                    raise
+
+    return _clip_model
+
+
+def embedding_to_bytes(embedding: np.ndarray) -> bytes:
+    """Convert numpy embedding to bytes for database storage"""
+    return embedding.astype(np.float32).tobytes()
+
+
+def bytes_to_embedding(data: bytes) -> np.ndarray:
+    """Convert bytes from database back to numpy embedding"""
+    return np.frombuffer(data, dtype=np.float32)
+
+
+def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    """Calculate cosine similarity between two embeddings"""
+    return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
+
+
+class SemanticSearch:
+    """Semantic search engine using CLIP embeddings"""
+
+    SUPPORTED_IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
+    SUPPORTED_VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v'}
+
+    def __init__(self, unified_db):
+        """
+        Initialize Semantic Search
+
+        Args:
+            unified_db: UnifiedDatabase instance
+        """
+        self.db = unified_db
+        self.logger = get_logger('SemanticSearch')
+        self._model = None
+
+    @property
+    def model(self):
+        """Lazy load CLIP model"""
+        if self._model is None:
+            self._model = get_clip_model()
+        return self._model
+
+    def get_image_embedding(self, image_path: str) -> Optional[np.ndarray]:
+        """
+        Generate CLIP embedding for an image
+
+        Args:
+            image_path: Path to the image file
+
+        Returns:
+            Embedding vector or None on error
+        """
+        try:
+            # Load and preprocess image
+            with Image.open(image_path) as image:
+                # Convert to RGB if necessary
+                if image.mode != 'RGB':
+                    image = image.convert('RGB')
+
+                # Generate embedding
+                embedding = self.model.encode(image, convert_to_numpy=True)
+
+                return embedding
+
+        except Exception as e:
+            self.logger.debug(f"Failed to get embedding for {image_path}: {e}")
+            return None
+
+    def get_video_frame_embedding(self, video_path: str, frame_position: float = 0.1) -> Optional[np.ndarray]:
+        """
+        Generate CLIP embedding for a video by extracting a frame
+
+        Args:
+            video_path: Path to the video file
+            frame_position: Position in video (0-1) to extract frame from
+
+        Returns:
+            Embedding vector or None on error
+        """
+        # Try cv2 first, fall back to ffmpeg for codecs cv2 can't handle (e.g. AV1)
+        image = self._extract_frame_cv2(video_path, frame_position)
+        if image is None:
+            image = self._extract_frame_ffmpeg(video_path, frame_position)
+
+        if image is None:
+            return None
+
+        try:
+            embedding = self.model.encode(image, convert_to_numpy=True)
+            return embedding
+        except Exception as e:
+            self.logger.debug(f"Failed to encode video frame for {video_path}: {e}")
+            return None
+        finally:
+            # Clean up image to prevent memory leaks
+            if image is not None:
+                try:
+                    image.close()
+                except Exception:
+                    pass
+
+    def _extract_frame_cv2(self, video_path: str, frame_position: float) -> Optional[Image.Image]:
+        """Extract frame using OpenCV"""
+        try:
+            import cv2
+
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                return None
+
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            if total_frames <= 0:
+                cap.release()
+                return None
+
+            target_frame = int(total_frames * frame_position)
+            cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame)
+
+            ret, frame = cap.read()
+            cap.release()
+
+            if not ret:
+                return None
+
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            return Image.fromarray(frame_rgb)
+
+        except Exception as e:
+            self.logger.debug(f"cv2 frame extraction failed for {video_path}: {e}")
+            return None
+
+    def _extract_frame_ffmpeg(self, video_path: str, frame_position: float) -> Optional[Image.Image]:
+        """Extract frame using ffmpeg (fallback for codecs cv2 can't handle)"""
+        try:
+            import subprocess
+            import tempfile
+
+            # Get video duration
+            probe_cmd = [
+                'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+                '-of', 'default=noprint_wrappers=1:nokey=1', video_path
+            ]
+            result = subprocess.run(probe_cmd, capture_output=True, text=True, timeout=10)
+            if result.returncode != 0:
+                return None
+
+            duration = float(result.stdout.strip())
+            seek_time = duration * frame_position
+
+            # Extract frame to temp file
+            with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
+                tmp_path = tmp.name
+
+            extract_cmd = [
+                'ffmpeg', '-y', '-ss', str(seek_time), '-i', video_path,
+                '-vframes', '1', '-q:v', '2', tmp_path
+            ]
+            result = subprocess.run(extract_cmd, capture_output=True, timeout=30)
+
+            if result.returncode != 0 or not os.path.exists(tmp_path):
+                return None
+
+            image = Image.open(tmp_path)
+            image.load()  # Load into memory before deleting file
+
+            # Clean up temp file
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass  # Best effort cleanup of temp file
+
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+
+            return image
+
+        except Exception as e:
+            self.logger.debug(f"ffmpeg frame extraction failed for {video_path}: {e}")
+            return None
+
+    def get_text_embedding(self, text: str) -> Optional[np.ndarray]:
+        """
+        Generate CLIP embedding for text query
+
+        Args:
+            text: Text query
+
+        Returns:
+            Embedding vector or None on error
+        """
+        try:
+            embedding = self.model.encode(text, convert_to_numpy=True)
+            return embedding
+        except Exception as e:
+            self.logger.error(f"Failed to get text embedding: {e}")
+            return None
+
+    def store_embedding(self, file_id: int, embedding: np.ndarray) -> bool:
+        """
+        Store embedding in database
+
+        Args:
+            file_id: File inventory ID
+            embedding: Embedding vector
+
+        Returns:
+            Success status
+        """
+        try:
+            embedding_bytes = embedding_to_bytes(embedding)
+
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    INSERT OR REPLACE INTO content_embeddings
+                    (file_id, embedding, embedding_model, embedding_version, created_date)
+                    VALUES (?, ?, 'clip-ViT-B-32', 1, CURRENT_TIMESTAMP)
+                ''', (file_id, embedding_bytes))
+
+            return True
+
+        except Exception as e:
+            self.logger.error(f"Failed to store embedding for file {file_id}: {e}")
+            return False
+
+    def get_embedding(self, file_id: int) -> Optional[np.ndarray]:
+        """
+        Get stored embedding from database
+
+        Args:
+            file_id: File inventory ID
+
+        Returns:
+            Embedding vector or None
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT embedding FROM content_embeddings WHERE file_id = ?
+                ''', (file_id,))
+
+                row = cursor.fetchone()
+                if row and row['embedding']:
+                    return bytes_to_embedding(row['embedding'])
+
+            return None
+
+        except Exception as e:
+            self.logger.error(f"Failed to get embedding for file {file_id}: {e}")
+            return None
+
+    def delete_embedding(self, file_id: int) -> bool:
+        """
+        Delete embedding for a file
+
+        Args:
+            file_id: File inventory ID
+
+        Returns:
+            True if deleted, False otherwise
+        """
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                cursor.execute('DELETE FROM content_embeddings WHERE file_id = ?', (file_id,))
+                if cursor.rowcount > 0:
+                    self.logger.debug(f"Deleted embedding for file_id {file_id}")
+                    return True
+            return False
+        except Exception as e:
+            self.logger.error(f"Failed to delete embedding for file {file_id}: {e}")
+            return False
+
+    def delete_embedding_by_path(self, file_path: str) -> bool:
+        """
+        Delete embedding for a file by its path
+
+        Args:
+            file_path: File path
+
+        Returns:
+            True if deleted, False otherwise
+        """
+        try:
+            with self.db.get_connection(for_write=True) as conn:
+                cursor = conn.cursor()
+                # First get the file_id
+                cursor.execute('SELECT id FROM file_inventory WHERE file_path = ?', (file_path,))
+                row = cursor.fetchone()
+                if row:
+                    cursor.execute('DELETE FROM content_embeddings WHERE file_id = ?', (row['id'],))
+                    if cursor.rowcount > 0:
+                        self.logger.debug(f"Deleted embedding for {file_path}")
+                        return True
+            return False
+        except Exception as e:
+            self.logger.error(f"Failed to delete embedding for {file_path}: {e}")
+            return False
+
+    def generate_embedding_for_file(self, file_id: int, file_path: str, content_type: str = None) -> bool:
+        """
+        Generate and store embedding for a single file
+
+        Args:
+            file_id: File inventory ID
+            file_path: Path to the file
+            content_type: Optional content type ('image' or 'video')
+
+        Returns:
+            True if embedding generated and stored successfully
+        """
+        try:
+            if not os.path.exists(file_path):
+                self.logger.debug(f"File not found for embedding: {file_path}")
+                return False
+
+            ext = Path(file_path).suffix.lower()
+
+            # Determine file type
+            if content_type:
+                is_image = 'image' in content_type.lower()
+                is_video = 'video' in content_type.lower()
+            else:
+                is_image = ext in self.SUPPORTED_IMAGE_EXTENSIONS
+                is_video = ext in self.SUPPORTED_VIDEO_EXTENSIONS
+
+            embedding = None
+            if is_image:
+                embedding = self.get_image_embedding(file_path)
+            elif is_video:
+                embedding = self.get_video_frame_embedding(file_path)
+
+            if embedding is not None:
+                if self.store_embedding(file_id, embedding):
+                    self.logger.debug(f"Generated embedding for file_id {file_id}: {Path(file_path).name}")
+                    return True
+
+            return False
+
+        except Exception as e:
+            self.logger.error(f"Failed to generate embedding for file {file_id}: {e}")
+            return False
+
+    def get_embedding_stats(self) -> Dict:
+        """Get statistics about embeddings in the database"""
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Total embeddings for files in 'final' location only
+                # (excludes embeddings for files moved to recycle bin or review)
+                cursor.execute('''
+                    SELECT COUNT(*) FROM content_embeddings ce
+                    JOIN file_inventory fi ON ce.file_id = fi.id
+                    WHERE fi.location = 'final'
+                ''')
+                total_embeddings = cursor.fetchone()[0]
+
+                # Total files in final location
+                cursor.execute("SELECT COUNT(*) FROM file_inventory WHERE location = 'final'")
+                total_files = cursor.fetchone()[0]
+
+                # Files without embeddings
+                cursor.execute('''
+                    SELECT COUNT(*) FROM file_inventory fi
+                    WHERE fi.location = 'final'
+                    AND NOT EXISTS (SELECT 1 FROM content_embeddings ce WHERE ce.file_id = fi.id)
+                ''')
+                missing_embeddings = cursor.fetchone()[0]
+
+                return {
+                    'total_embeddings': total_embeddings,
+                    'total_files': total_files,
+                    'missing_embeddings': missing_embeddings,
+                    'coverage_percent': round((total_embeddings / total_files * 100) if total_files > 0 else 0, 2)
+                }
+
+        except Exception as e:
+            self.logger.error(f"Failed to get embedding stats: {e}")
+            return {}
+
+    def generate_embeddings_batch(self, limit: int = 100, platform: str = None,
+                                   progress_callback=None) -> Dict:
+        """
+        Generate embeddings for files that don't have them yet
+
+        Args:
+            limit: Maximum files to process
+            platform: Filter by platform
+            progress_callback: Optional callback(processed, total, current_file)
+
+        Returns:
+            Dict with success/error counts
+        """
+        results = {'processed': 0, 'success': 0, 'errors': 0, 'skipped': 0}
+
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Get files without embeddings
+                query = '''
+                    SELECT fi.id, fi.file_path, fi.content_type, fi.filename
+                    FROM file_inventory fi
+                    WHERE fi.location = 'final'
+                    AND NOT EXISTS (SELECT 1 FROM content_embeddings ce WHERE ce.file_id = fi.id)
+                '''
+                params = []
+
+                if platform:
+                    query += ' AND fi.platform = ?'
+                    params.append(platform)
+
+                query += ' LIMIT ?'
+                params.append(limit)
+
+                cursor.execute(query, params)
+                files = cursor.fetchall()
+
+            total = len(files)
+            self.logger.info(f"Processing {total} files for embedding generation")
+
+            for i, file_row in enumerate(files):
+                file_id = file_row['id']
+                file_path = file_row['file_path']
+                content_type = file_row['content_type'] or ''
+                filename = file_row['filename'] or ''
+
+                results['processed'] += 1
+
+                if progress_callback:
+                    progress_callback(i + 1, total, filename)
+
+                # Skip if file doesn't exist
+                if not os.path.exists(file_path):
+                    results['skipped'] += 1
+                    continue
+
+                # Determine file type
+                ext = Path(file_path).suffix.lower()
+
+                embedding = None
+
+                if ext in self.SUPPORTED_IMAGE_EXTENSIONS or 'image' in content_type.lower():
+                    embedding = self.get_image_embedding(file_path)
+                elif ext in self.SUPPORTED_VIDEO_EXTENSIONS or 'video' in content_type.lower():
+                    embedding = self.get_video_frame_embedding(file_path)
+                else:
+                    results['skipped'] += 1
+                    continue
+
+                if embedding is not None:
+                    if self.store_embedding(file_id, embedding):
+                        results['success'] += 1
+                    else:
+                        results['errors'] += 1
+                else:
+                    results['errors'] += 1
+
+            self.logger.info(f"Embedding generation complete: {results}")
+            return results
+
+        except Exception as e:
+            self.logger.error(f"Failed to generate embeddings batch: {e}")
+            return results
+
+    def search_by_text(self, query: str, limit: int = 50, platform: str = None,
+                       source: str = None, threshold: float = 0.2) -> List[Dict]:
+        """
+        Search for images/videos using natural language
+
+        Args:
+            query: Natural language search query
+            limit: Maximum results
+            platform: Filter by platform
+            source: Filter by source
+            threshold: Minimum similarity score (0-1)
+
+        Returns:
+            List of files with similarity scores
+        """
+        try:
+            # Get text embedding
+            query_embedding = self.get_text_embedding(query)
+            if query_embedding is None:
+                return []
+
+            return self._search_by_embedding(query_embedding, limit, platform, source, threshold)
+
+        except Exception as e:
+            self.logger.error(f"Text search failed: {e}")
+            return []
+
+    def search_by_image(self, image_path: str, limit: int = 50, platform: str = None,
+                        source: str = None, threshold: float = 0.5) -> List[Dict]:
+        """
+        Find similar images to a given image
+
+        Args:
+            image_path: Path to query image
+            limit: Maximum results
+            platform: Filter by platform
+            source: Filter by source
+            threshold: Minimum similarity score (0-1)
+
+        Returns:
+            List of similar files with scores
+        """
+        try:
+            # Get image embedding
+            query_embedding = self.get_image_embedding(image_path)
+            if query_embedding is None:
+                return []
+
+            return self._search_by_embedding(query_embedding, limit, platform, source, threshold)
+
+        except Exception as e:
+            self.logger.error(f"Image search failed: {e}")
+            return []
+
+    def search_by_file_id(self, file_id: int, limit: int = 50, platform: str = None,
+                          source: str = None, threshold: float = 0.5) -> List[Dict]:
+        """
+        Find similar files to a file already in the database
+
+        Args:
+            file_id: File inventory ID
+            limit: Maximum results
+            platform: Filter by platform
+            source: Filter by source
+            threshold: Minimum similarity score (0-1)
+
+        Returns:
+            List of similar files with scores
+        """
+        try:
+            # Get existing embedding
+            query_embedding = self.get_embedding(file_id)
+
+            if query_embedding is None:
+                # Try to generate it
+                with self.db.get_connection() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('SELECT file_path FROM file_inventory WHERE id = ?', (file_id,))
+                    row = cursor.fetchone()
+                    if row:
+                        query_embedding = self.get_image_embedding(row['file_path'])
+
+            if query_embedding is None:
+                return []
+
+            results = self._search_by_embedding(query_embedding, limit + 1, platform, source, threshold)
+
+            # Remove the query file itself from results
+            return [r for r in results if r['id'] != file_id][:limit]
+
+        except Exception as e:
+            self.logger.error(f"Similar file search failed: {e}")
+            return []
+
+    def _search_by_embedding(self, query_embedding: np.ndarray, limit: int,
+                              platform: str = None, source: str = None,
+                              threshold: float = 0.2) -> List[Dict]:
+        """
+        Internal search using embedding vector
+
+        Args:
+            query_embedding: Query embedding vector
+            limit: Maximum results
+            platform: Filter by platform
+            source: Filter by source
+            threshold: Minimum similarity score
+
+        Returns:
+            List of files with similarity scores, sorted by score
+        """
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+
+                # Build query to get all embeddings (with optional filters)
+                query = '''
+                    SELECT ce.file_id, ce.embedding, fi.file_path, fi.filename,
+                           fi.platform, fi.source, fi.content_type, fi.file_size
+                    FROM content_embeddings ce
+                    JOIN file_inventory fi ON fi.id = ce.file_id
+                    WHERE fi.location = 'final'
+                '''
+                params = []
+
+                if platform:
+                    query += ' AND fi.platform = ?'
+                    params.append(platform)
+                if source:
+                    query += ' AND fi.source = ?'
+                    params.append(source)
+
+                cursor.execute(query, params)
+
+                results = []
+                for row in cursor.fetchall():
+                    embedding = bytes_to_embedding(row['embedding'])
+                    similarity = cosine_similarity(query_embedding, embedding)
+
+                    if similarity >= threshold:
+                        results.append({
+                            'id': row['file_id'],
+                            'file_path': row['file_path'],
+                            'filename': row['filename'],
+                            'platform': row['platform'],
+                            'source': row['source'],
+                            'content_type': row['content_type'],
+                            'file_size': row['file_size'],
+                            'similarity': round(similarity, 4)
+                        })
+
+                # Sort by similarity descending
+                results.sort(key=lambda x: x['similarity'], reverse=True)
+
+                return results[:limit]
+
+        except Exception as e:
+            self.logger.error(f"Embedding search failed: {e}")
+            return []
+
+
+# Global instance (lazy initialization)
+_semantic_search = None
+
+
+def reset_clip_model():
+    """Reset the global CLIP model so it will be reloaded with new config"""
+    global _clip_model, _clip_model_name
+    with _model_lock:
+        _clip_model = None
+        _clip_model_name = None
+        logger.info("CLIP model cache cleared, will reload on next use")
+
+
+def get_semantic_search(unified_db=None, force_reload=False):
+    """Get or create global semantic search instance
+
+    Args:
+        unified_db: Database instance to use
+        force_reload: If True, recreate the instance (useful when model config changes)
+    """
+    global _semantic_search
+    if _semantic_search is None or force_reload:
+        if force_reload:
+            # Also reset the CLIP model so it reloads with new config
+            reset_clip_model()
+        if unified_db is None:
+            from modules.unified_database import UnifiedDatabase
+            unified_db = UnifiedDatabase()
+        _semantic_search = SemanticSearch(unified_db)
+    return _semantic_search
--- a/modules/service_health_monitor.py
+++ b/modules/service_health_monitor.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+"""
+Service Health Monitor - Tracks service failures and sends alerts
+Only active during scheduler mode for unattended operation monitoring
+"""
+
+import json
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict, Optional
+from modules.universal_logger import get_logger
+
+
+class ServiceHealthMonitor:
+    """Monitor service health and send alerts when services get stuck"""
+
+    def __init__(self,
+                 state_file: str = "/opt/media-downloader/database/service_health.json",
+                 config: dict = None,
+                 error_monitoring_config: dict = None,
+                 pushover_notifier = None,
+                 scheduler_mode: bool = False):
+        """
+        Initialize health monitor
+
+        Args:
+            state_file: Path to JSON file storing health state
+            config: Configuration dict from settings.json
+            error_monitoring_config: Error monitoring settings (for push alert delay)
+            pushover_notifier: Instance of PushoverNotifier for alerts
+            scheduler_mode: Only monitor when True (scheduler mode)
+        """
+        self.state_file = Path(state_file)
+        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+        self.pushover = pushover_notifier
+        self.scheduler_mode = scheduler_mode
+        self.error_monitoring_config = error_monitoring_config or {}
+
+        # Default configuration
+        self.config = {
+            'enabled': True,
+            'notification_cooldown_hours': 24,
+            'min_consecutive_failures': 2,  # Number of consecutive run failures before alerting
+            'services': {
+                'fastdl': {'monitor': True, 'notify': True},
+                'imginn': {'monitor': True, 'notify': True},
+                'snapchat': {'monitor': True, 'notify': True},
+                'toolzu': {'monitor': True, 'notify': True},
+                'tiktok': {'monitor': True, 'notify': True},
+                'forums': {'monitor': True, 'notify': True}
+            },
+            'pushover': {
+                'enabled': True,
+                'priority': 0,
+                'sound': 'pushover'
+            }
+        }
+
+        # Merge user config
+        if config:
+            self.config.update(config)
+
+        # Load or initialize state
+        self.state = self._load_state()
+
+        # Setup logging
+        self.logger = get_logger('ServiceHealthMonitor')
+
+    def _load_state(self) -> Dict:
+        """Load health state from file"""
+        if self.state_file.exists():
+            try:
+                with open(self.state_file, 'r') as f:
+                    return json.load(f)
+            except Exception as e:
+                self.logger.error(f"Failed to load health state: {e}")
+
+        # Initialize empty state
+        return {'service_health': {}}
+
+    def _save_state(self):
+        """Save health state to file"""
+        try:
+            with open(self.state_file, 'w') as f:
+                json.dump(self.state, f, indent=2, default=str)
+        except Exception as e:
+            self.logger.error(f"Failed to save health state: {e}")
+
+    def _get_service_state(self, service: str) -> Dict:
+        """Get state for a service, initialize if doesn't exist"""
+        if service not in self.state['service_health']:
+            self.state['service_health'][service] = {
+                'status': 'healthy',
+                'consecutive_failures': 0,
+                'last_success': None,
+                'last_failure': None,
+                'last_notification_sent': None,
+                'failure_type': None,
+                'total_failures': 0,
+                'total_successes': 0
+            }
+        return self.state['service_health'][service]
+
+    def record_success(self, service: str):
+        """
+        Record successful operation for a service
+
+        Args:
+            service: Service name (fastdl, imginn, snapchat, etc.)
+        """
+        # Only monitor in scheduler mode
+        if not self.scheduler_mode:
+            return
+
+        # Check if service is monitored
+        if not self._is_monitored(service):
+            return
+
+        state = self._get_service_state(service)
+        now = datetime.now()
+
+        # Was service previously stuck? Send recovery notification
+        was_stuck = state['status'] == 'stuck'
+
+        # Update state
+        state['status'] = 'healthy'
+        state['consecutive_failures'] = 0
+        state['last_success'] = now.isoformat()
+        state['failure_type'] = None
+        state['total_successes'] += 1
+
+        self._save_state()
+
+        # Send recovery notification if service was stuck
+        if was_stuck and self._should_notify(service):
+            self._send_recovery_notification(service, now)
+
+    def record_failure(self, service: str, reason: str = 'unknown'):
+        """
+        Record failure for a service
+
+        Args:
+            service: Service name (fastdl, imginn, snapchat, etc.)
+            reason: Reason for failure (cloudflare, rate_limit, timeout, etc.)
+        """
+        # Only monitor in scheduler mode
+        if not self.scheduler_mode:
+            return
+
+        # Check if service is monitored
+        if not self._is_monitored(service):
+            return
+
+        state = self._get_service_state(service)
+        now = datetime.now()
+
+        # Update state - increment consecutive failures
+        state['consecutive_failures'] += 1
+        state['last_failure'] = now.isoformat()
+        state['failure_type'] = reason
+        state['total_failures'] += 1
+
+        # Check if service should be marked as stuck based on consecutive run failures
+        min_failures = self.config.get('min_consecutive_failures', 2)
+        if state['consecutive_failures'] >= min_failures:
+            state['status'] = 'stuck'
+
+            # Send notification if cooldown period has passed
+            if self._should_notify(service) and self._notification_cooldown_expired(service):
+                self._send_alert_notification(service, reason, now)
+                state['last_notification_sent'] = now.isoformat()
+
+        self._save_state()
+
+    def _is_monitored(self, service: str) -> bool:
+        """Check if service should be monitored"""
+        if not self.config.get('enabled', True):
+            return False
+
+        service_config = self.config.get('services', {}).get(service, {})
+        return service_config.get('monitor', True)
+
+    def _should_notify(self, service: str) -> bool:
+        """Check if notifications are enabled for this service"""
+        if not self.pushover:
+            return False
+
+        if not self.config.get('pushover', {}).get('enabled', True):
+            return False
+
+        service_config = self.config.get('services', {}).get(service, {})
+        return service_config.get('notify', True)
+
+    def _notification_cooldown_expired(self, service: str) -> bool:
+        """Check if notification cooldown period has expired"""
+        state = self._get_service_state(service)
+        last_sent = state.get('last_notification_sent')
+
+        if not last_sent:
+            return True  # Never sent, can send now
+
+        try:
+            last_sent_time = datetime.fromisoformat(last_sent)
+            # Use push_alert_delay_hours from error_monitoring config if available,
+            # otherwise fall back to notification_cooldown_hours or default 24
+            cooldown_hours = self.error_monitoring_config.get('push_alert_delay_hours',
+                                self.config.get('notification_cooldown_hours', 24))
+            cooldown_period = timedelta(hours=cooldown_hours)
+
+            return datetime.now() - last_sent_time > cooldown_period
+        except (ValueError, TypeError):
+            return True  # Error parsing date, allow notification
+
+    def _send_alert_notification(self, service: str, reason: str, now: datetime):
+        """Send Pushover alert notification"""
+        state = self._get_service_state(service)
+
+        # Calculate time since last success
+        time_stuck = "Unknown"
+        if state['last_success']:
+            try:
+                last_success = datetime.fromisoformat(state['last_success'])
+                delta = now - last_success
+                hours = int(delta.total_seconds() / 3600)
+                if hours < 1:
+                    time_stuck = f"{int(delta.total_seconds() / 60)} minutes ago"
+                elif hours < 48:
+                    time_stuck = f"{hours} hours ago"
+                else:
+                    days = int(hours / 24)
+                    time_stuck = f"{days} days ago"
+            except (ValueError, TypeError):
+                pass
+
+        # Format service name
+        service_name = service.replace('_', ' ').title()
+
+        # Format reason
+        reason_map = {
+            'cloudflare': 'Cloudflare Challenge',
+            'cloudflare_challenge': 'Cloudflare Challenge',
+            'rate_limit': 'Rate Limited (429)',
+            'forbidden': 'Access Forbidden (403)',
+            'timeout': 'Connection Timeout',
+            'authentication': 'Authentication Required',
+            'captcha': 'CAPTCHA Challenge',
+            'blocked': 'IP Blocked',
+            'unknown': 'Unknown Error'
+        }
+        reason_text = reason_map.get(reason.lower(), reason)
+
+        # Build message
+        title = f"⚠️ Service Alert: {service_name}"
+        message = f"""Status: Stuck/Blocked
+Issue: {reason_text}
+Failed Since: {now.strftime('%b %d, %I:%M %p')} ({state['consecutive_failures']} consecutive failures)
+
+Last successful download: {time_stuck if state['last_success'] else 'Never'}
+
+Action may be required.
+"""
+
+        # Send notification
+        try:
+            priority = self.config.get('pushover', {}).get('priority', 0)
+            sound = self.config.get('pushover', {}).get('sound', 'pushover')
+
+            self.pushover.send_notification(
+                title=title,
+                message=message,
+                priority=priority,
+                sound=sound
+            )
+
+            self.logger.info(f"Sent alert notification for {service}: {reason}")
+        except Exception as e:
+            self.logger.error(f"Failed to send alert notification: {e}")
+
+    def _send_recovery_notification(self, service: str, now: datetime):
+        """Send recovery notification (optional)"""
+        # Recovery notifications are optional - can be disabled
+        if not self.config.get('send_recovery_notifications', False):
+            return
+
+        state = self._get_service_state(service)
+        service_name = service.replace('_', ' ').title()
+
+        title = f"✅ Service Recovered: {service_name}"
+        message = f"""Status: Healthy
+Service is working again.
+
+Recovered at: {now.strftime('%b %d, %I:%M %p')}
+"""
+
+        try:
+            self.pushover.send_notification(
+                title=title,
+                message=message,
+                priority=-1,  # Low priority for recovery
+                sound='magic'
+            )
+
+            self.logger.info(f"Sent recovery notification for {service}")
+        except Exception as e:
+            self.logger.error(f"Failed to send recovery notification: {e}")
+
+    def get_service_status(self, service: str) -> Dict:
+        """Get current status for a service"""
+        return self._get_service_state(service).copy()
+
+    def get_all_status(self) -> Dict:
+        """Get status for all services"""
+        return self.state['service_health'].copy()
+
+    def reset_service(self, service: str):
+        """Reset state for a service"""
+        if service in self.state['service_health']:
+            del self.state['service_health'][service]
+            self._save_state()
--- a/modules/settings_manager.py
+++ b/modules/settings_manager.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python3
+"""
+Settings Manager for Media Downloader
+Handles settings storage in database with JSON file compatibility
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional, Any, Union, Tuple
+from contextlib import contextmanager
+import threading
+from modules.universal_logger import get_logger
+
+logger = get_logger('SettingsManager')
+
+
+class SettingsManager:
+    """Manage application settings in database (thread-safe)"""
+
+    def __init__(self, db_path: str):
+        """
+        Initialize settings manager
+
+        Args:
+            db_path: Path to SQLite database
+        """
+        self.db_path = db_path
+        self._write_lock = threading.RLock()  # Reentrant lock for write operations
+        self._create_tables()
+
+    @contextmanager
+    def _get_connection(self, for_write: bool = False):
+        """Get database connection (thread-safe)"""
+        conn = sqlite3.connect(self.db_path, timeout=30.0, check_same_thread=False)
+        conn.row_factory = sqlite3.Row
+        try:
+            if for_write:
+                with self._write_lock:
+                    yield conn
+            else:
+                yield conn
+        finally:
+            conn.close()
+
+    def _create_tables(self):
+        """Create settings table if it doesn't exist"""
+        with self._get_connection(for_write=True) as conn:
+            cursor = conn.cursor()
+
+            cursor.execute('''
+                CREATE TABLE IF NOT EXISTS settings (
+                    key TEXT PRIMARY KEY,
+                    value TEXT NOT NULL,
+                    value_type TEXT NOT NULL,
+                    category TEXT,
+                    description TEXT,
+                    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    updated_by TEXT DEFAULT 'system'
+                )
+            ''')
+
+            # Create index for category lookups
+            cursor.execute('''
+                CREATE INDEX IF NOT EXISTS idx_settings_category
+                ON settings(category)
+            ''')
+
+            conn.commit()
+            logger.info("Settings tables initialized")
+
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Get a setting value
+
+        Args:
+            key: Setting key (supports dot notation, e.g., 'instagram.enabled')
+            default: Default value if not found
+
+        Returns:
+            Setting value or default
+        """
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('SELECT value, value_type FROM settings WHERE key = ?', (key,))
+            row = cursor.fetchone()
+
+            if not row:
+                return default
+
+            value, value_type = row['value'], row['value_type']
+            return self._deserialize_value(value, value_type)
+
+    def set(self, key: str, value: Any, category: str = None,
+            description: str = None, updated_by: str = 'system'):
+        """
+        Set a setting value
+
+        Args:
+            key: Setting key
+            value: Setting value (will be serialized to JSON if needed)
+            category: Optional category
+            description: Optional description
+            updated_by: Who updated the setting
+        """
+        value_str, value_type = self._serialize_value(value)
+
+        with self._get_connection(for_write=True) as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                INSERT OR REPLACE INTO settings
+                (key, value, value_type, category, description, updated_at, updated_by)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            ''', (key, value_str, value_type, category, description,
+                  datetime.now().isoformat(), updated_by))
+            conn.commit()
+            logger.debug(f"Setting updated: {key} = {value_str[:100]}")
+
+    def get_category(self, category: str) -> Dict[str, Any]:
+        """
+        Get all settings in a category
+
+        Args:
+            category: Category name
+
+        Returns:
+            Dictionary of settings
+        """
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('''
+                SELECT key, value, value_type
+                FROM settings
+                WHERE category = ?
+            ''', (category,))
+
+            result = {}
+            for row in cursor.fetchall():
+                key = row['key']
+                value = self._deserialize_value(row['value'], row['value_type'])
+                result[key] = value
+
+            return result
+
+    def get_all(self) -> Dict[str, Any]:
+        """
+        Get all settings as a nested dictionary
+
+        Returns:
+            Nested dictionary of all settings
+        """
+        with self._get_connection() as conn:
+            cursor = conn.cursor()
+            cursor.execute('SELECT key, value, value_type FROM settings')
+
+            result = {}
+            for row in cursor.fetchall():
+                key = row['key']
+                value = self._deserialize_value(row['value'], row['value_type'])
+
+                # Support nested keys like 'instagram.enabled'
+                self._set_nested(result, key, value)
+
+            return result
+
+    def delete(self, key: str):
+        """Delete a setting"""
+        with self._get_connection(for_write=True) as conn:
+            cursor = conn.cursor()
+            cursor.execute('DELETE FROM settings WHERE key = ?', (key,))
+            conn.commit()
+            logger.debug(f"Setting deleted: {key}")
+
+    def migrate_from_json(self, json_path: str):
+        """
+        Migrate settings from JSON file to database
+
+        Args:
+            json_path: Path to settings.json file
+        """
+        json_file = Path(json_path)
+        if not json_file.exists():
+            logger.warning(f"JSON file not found: {json_path}")
+            return
+
+        with open(json_file, 'r') as f:
+            settings = json.load(f)
+
+        # Flatten and store settings
+        self._migrate_dict(settings, prefix='', category='root')
+        logger.info(f"Settings migrated from {json_path}")
+
+    def _migrate_dict(self, data: Dict, prefix: str = '', category: str = None):
+        """Recursively migrate nested dictionary"""
+        for key, value in data.items():
+            full_key = f"{prefix}.{key}" if prefix else key
+
+            if isinstance(value, dict):
+                # Store the entire dict as a value
+                self.set(full_key, value, category=category or key)
+            else:
+                # Store primitive value
+                self.set(full_key, value, category=category or prefix.split('.')[0])
+
+    def export_to_json(self, json_path: str):
+        """
+        Export settings to JSON file
+
+        Args:
+            json_path: Path to save settings.json
+        """
+        settings = self.get_all()
+
+        with open(json_path, 'w') as f:
+            json.dump(settings, f, indent=2)
+
+        logger.info(f"Settings exported to {json_path}")
+
+    def _serialize_value(self, value: Any) -> Tuple[str, str]:
+        """
+        Serialize value to string and determine type
+
+        Returns:
+            Tuple of (value_string, value_type)
+        """
+        if isinstance(value, bool):
+            return (json.dumps(value), 'boolean')
+        elif isinstance(value, int):
+            return (json.dumps(value), 'number')
+        elif isinstance(value, float):
+            return (json.dumps(value), 'number')
+        elif isinstance(value, str):
+            return (value, 'string')
+        elif isinstance(value, (dict, list)):
+            return (json.dumps(value), 'object' if isinstance(value, dict) else 'array')
+        else:
+            return (json.dumps(value), 'object')
+
+    def _deserialize_value(self, value_str: str, value_type: str) -> Any:
+        """Deserialize value from string"""
+        if value_type == 'string':
+            return value_str
+        else:
+            return json.loads(value_str)
+
+    def _set_nested(self, data: Dict, key: str, value: Any):
+        """Set value in nested dictionary using dot notation"""
+        parts = key.split('.')
+        current = data
+
+        for part in parts[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+
+        current[parts[-1]] = value
--- a/modules/snapchat_client_module.py
+++ b/modules/snapchat_client_module.py
@@ -0,0 +1,871 @@
+#!/usr/bin/env python3
+"""
+Snapchat Client Module - Direct HTTP-based Snapchat downloader using curl_cffi.
+
+Replaces Playwright-based scraping with direct HTTP requests. Snapchat embeds
+all page data in <script id="__NEXT_DATA__"> JSON tags, so no JavaScript
+execution is needed. Uses story.snapchat.com which may not require Cloudflare.
+
+Follows the same pattern as instagram_client_module.py.
+"""
+
+import os
+import json
+import re
+import subprocess
+import time
+import random
+import platform
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List, Set
+
+from modules.base_module import LoggingMixin
+from modules.snapchat_scraper import SnapMedia, SnapCollection
+
+
+class SnapchatClientDownloader(LoggingMixin):
+    """Snapchat downloader using direct HTTP via curl_cffi (no Playwright)"""
+
+    def __init__(self,
+                 show_progress: bool = True,
+                 use_database: bool = True,
+                 log_callback=None,
+                 unified_db=None):
+        """Initialize the Snapchat Client downloader.
+
+        Args:
+            show_progress: Whether to show download progress
+            use_database: Whether to use database for dedup
+            log_callback: Optional logging callback
+            unified_db: UnifiedDatabase instance
+        """
+        self._init_logger('SnapchatClient', log_callback, default_module='Download')
+
+        self.scraper_id = 'snapchat_client'
+        self.show_progress = show_progress
+        self.use_database = use_database
+        self.download_count = 0
+        self.downloaded_files: Set[str] = set()
+        self.pending_downloads = []
+
+        # Session (lazy-initialized)
+        self._session = None
+
+        # Database
+        if unified_db and use_database:
+            from modules.unified_database import SnapchatDatabaseAdapter
+            self.db = SnapchatDatabaseAdapter(unified_db)
+            self.unified_db = unified_db
+        else:
+            self.db = None
+            self.unified_db = None
+            self.use_database = False
+
+        # Activity status manager
+        try:
+            from modules.activity_status import get_activity_manager
+            self.activity_manager = get_activity_manager(unified_db)
+        except ImportError:
+            self.activity_manager = None
+
+        # Cookie data from DB
+        self.cookies = []
+        self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
+
+    def _get_session(self):
+        """Get or create a curl_cffi session with browser TLS fingerprinting."""
+        if self._session is None:
+            from curl_cffi.requests import Session
+            # Try multiple browser versions for curl_cffi compatibility
+            for _browser in ("chrome131", "chrome136", "chrome"):
+                try:
+                    self._session = Session(impersonate=_browser)
+                    break
+                except Exception:
+                    continue
+            else:
+                self._session = Session()
+            self._session.headers.update({
+                'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'accept-language': 'en-US,en;q=0.9',
+                'cache-control': 'no-cache',
+            })
+            # Load cookies from database
+            self._load_cookies()
+        return self._session
+
+    def _load_cookies(self):
+        """Load cookies from database for authenticated requests."""
+        if not self.unified_db:
+            return
+
+        # Try snapchat_client cookies first, fall back to snapchat
+        for scraper_id in ['snapchat_client', 'snapchat']:
+            try:
+                cookies = self.unified_db.get_scraper_cookies(scraper_id)
+                if cookies:
+                    self.log(f"Loaded {len(cookies)} cookies from '{scraper_id}' scraper", "debug")
+                    self.cookies = cookies
+                    for cookie in cookies:
+                        name = cookie.get('name', '')
+                        value = cookie.get('value', '')
+                        domain = cookie.get('domain', '.snapchat.com')
+                        if name and value and self._session:
+                            self._session.cookies.set(name, value, domain=domain)
+
+                    # Check if we have a stored user-agent (important for cf_clearance match)
+                    try:
+                        import json as _json
+                        with self.unified_db.get_connection() as conn:
+                            cursor = conn.cursor()
+                            cursor.execute(
+                                "SELECT user_agent FROM scrapers WHERE id = ?",
+                                (scraper_id,)
+                            )
+                            row = cursor.fetchone()
+                            if row and row[0]:
+                                self.user_agent = row[0]
+                                if self._session:
+                                    self._session.headers['User-Agent'] = self.user_agent
+                    except Exception:
+                        pass
+
+                    return
+            except Exception as e:
+                self.log(f"Error loading cookies from '{scraper_id}': {e}", "debug")
+
+    def _fetch_page(self, url: str) -> Optional[str]:
+        """Fetch a page via HTTP and return the HTML content.
+
+        Tries story.snapchat.com first (no Cloudflare), falls back to www.snapchat.com.
+        """
+        session = self._get_session()
+
+        # If URL uses www.snapchat.com, try story.snapchat.com first
+        story_url = url.replace('www.snapchat.com', 'story.snapchat.com')
+        www_url = url.replace('story.snapchat.com', 'www.snapchat.com')
+
+        # Try story.snapchat.com first (likely no Cloudflare)
+        for attempt_url in [story_url, www_url]:
+            try:
+                resp = session.get(attempt_url, timeout=30)
+                if resp.status_code == 200 and '__NEXT_DATA__' in resp.text:
+                    return resp.text
+                elif resp.status_code == 403:
+                    self.log(f"403 Forbidden from {attempt_url.split('/@')[0]}", "debug")
+                    continue
+                elif resp.status_code != 200:
+                    self.log(f"HTTP {resp.status_code} from {attempt_url.split('/@')[0]}", "debug")
+                    continue
+            except Exception as e:
+                self.log(f"Error fetching {attempt_url.split('/@')[0]}: {e}", "debug")
+                continue
+
+        return None
+
+    def _extract_next_data(self, html: str) -> Optional[Dict]:
+        """Extract __NEXT_DATA__ JSON from HTML page."""
+        match = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
+        if not match:
+            return None
+        try:
+            return json.loads(match.group(1))
+        except json.JSONDecodeError as e:
+            self.log(f"Failed to parse __NEXT_DATA__ JSON: {e}", "error")
+            return None
+
+    def get_profile_content(self, username: str) -> Dict[str, List]:
+        """Get all spotlight URLs, highlight URLs, and inline story/highlight data from a profile.
+
+        Parses __NEXT_DATA__ JSON to extract:
+        - spotlights: list of spotlight URL strings
+        - highlights: list of highlight URL strings
+        - story_collection: SnapCollection from story.snapList (recent stories), or None
+        - highlight_collections: list of SnapCollection from curatedHighlights (inline data)
+
+        The inline data avoids needing separate HTTP requests for stories and highlights.
+        """
+        result = {'spotlights': [], 'highlights': [], 'story_collection': None, 'highlight_collections': []}
+
+        url = f"https://story.snapchat.com/@{username}"
+        self.log(f"Fetching profile for @{username}", "info")
+
+        html = self._fetch_page(url)
+        if not html:
+            self.log(f"Failed to fetch profile page for @{username}", "warning")
+            return result
+
+        # Extract spotlight URLs via regex (still needed — spotlight metadata requires per-URL fetch)
+        spotlight_pattern = rf'/@{re.escape(username)}/spotlight/([A-Za-z0-9_-]+)'
+        spotlight_ids = list(set(re.findall(spotlight_pattern, html)))
+        result['spotlights'] = [
+            f"https://story.snapchat.com/@{username}/spotlight/{sid}"
+            for sid in spotlight_ids
+        ]
+        self.log(f"Found {len(result['spotlights'])} spotlights", "info")
+
+        # Parse __NEXT_DATA__ for stories and highlights (much more reliable than regex)
+        data = self._extract_next_data(html)
+        if not data:
+            # Fall back to regex for highlights
+            highlight_pattern = rf'/@{re.escape(username)}/highlight/([A-Za-z0-9-]+)'
+            highlight_ids = list(set(re.findall(highlight_pattern, html)))
+            result['highlights'] = [
+                f"https://story.snapchat.com/@{username}/highlight/{hid}"
+                for hid in highlight_ids
+            ]
+            self.log(f"Found {len(result['highlights'])} highlights (regex fallback)", "info")
+            return result
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+
+        # Extract story snapList (recent stories — not available via individual URLs)
+        story = props.get('story') or {}
+        story_snaps = story.get('snapList') or []
+        if story_snaps:
+            story_id = story.get('storyId') or {}
+            if isinstance(story_id, dict):
+                story_id = story_id.get('value', 'story')
+            story_collection = SnapCollection(
+                collection_id=story_id or 'story',
+                collection_type='story',
+                title=story.get('storyTitle', '') or 'Stories',
+                username=username,
+                url=url
+            )
+            for snap_data in story_snaps:
+                snap = self._parse_snap_data(snap_data)
+                if snap:
+                    story_collection.snaps.append(snap)
+            if story_collection.snaps:
+                result['story_collection'] = story_collection
+                self.log(f"Found {len(story_collection.snaps)} story snaps", "info")
+
+        # Extract curatedHighlights inline (avoids per-highlight HTTP requests)
+        curated_highlights = props.get('curatedHighlights') or []
+        for highlight in curated_highlights:
+            highlight_id = highlight.get('highlightId') or {}
+            if isinstance(highlight_id, dict):
+                highlight_id = highlight_id.get('value', '')
+
+            title = highlight.get('storyTitle') or {}
+            if isinstance(title, dict):
+                title = title.get('value', '')
+
+            collection = SnapCollection(
+                collection_id=highlight_id,
+                collection_type='highlight',
+                title=title or 'Untitled Highlight',
+                username=username,
+                url=f"https://story.snapchat.com/@{username}/highlight/{highlight_id}"
+            )
+            for snap_data in highlight.get('snapList') or []:
+                snap = self._parse_snap_data(snap_data)
+                if snap:
+                    collection.snaps.append(snap)
+            if collection.snaps:
+                result['highlight_collections'].append(collection)
+
+        self.log(f"Found {len(result['highlight_collections'])} highlights (inline)", "info")
+
+        return result
+
+    def _parse_snap_data(self, snap_data: Dict) -> Optional[SnapMedia]:
+        """Parse a snap from __NEXT_DATA__ snapList into a SnapMedia object."""
+        snap_urls = snap_data.get('snapUrls') or {}
+        media_url = snap_urls.get('mediaUrl', '')
+        if not media_url:
+            return None
+
+        snap_id = (snap_data.get('snapId') or {}).get('value', '')
+        media_id = ''
+        if '/d/' in media_url:
+            media_id = media_url.split('/d/')[1].split('.')[0]
+
+        ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+        timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str and ts_str != '0' else datetime.now()
+
+        lat = snap_data.get('lat')
+        lng = snap_data.get('lng')
+
+        return SnapMedia(
+            media_id=media_id or snap_id,
+            media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+            media_url=media_url,
+            timestamp=timestamp,
+            index=snap_data.get('snapIndex', 0),
+            thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+            lat=float(lat) if lat else None,
+            lng=float(lng) if lng else None
+        )
+
+    def get_spotlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a spotlight URL via __NEXT_DATA__."""
+        html = self._fetch_page(url)
+        if not html:
+            return None
+
+        data = self._extract_next_data(html)
+        if not data:
+            return None
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+        feed = props.get('spotlightFeed') or {}
+        stories = feed.get('spotlightStories') or []
+
+        if not stories:
+            return None
+
+        story_data = stories[0]
+        story = story_data.get('story') or {}
+        metadata = (story_data.get('metadata') or {}).get('videoMetadata') or {}
+
+        story_id = (story.get('storyId') or {}).get('value', '')
+        creator = (metadata.get('creator') or {}).get('personCreator') or {}
+        username = creator.get('username', '')
+
+        collection = SnapCollection(
+            collection_id=story_id,
+            collection_type='spotlight',
+            title=metadata.get('description', ''),
+            username=username,
+            url=url
+        )
+
+        for snap_data in story.get('snapList') or []:
+            snap_id = (snap_data.get('snapId') or {}).get('value', '')
+            snap_urls = snap_data.get('snapUrls') or {}
+            media_url = snap_urls.get('mediaUrl', '')
+
+            media_id = ''
+            if '/d/' in media_url:
+                media_id = media_url.split('/d/')[1].split('.')[0]
+
+            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+            snap = SnapMedia(
+                media_id=media_id or snap_id,
+                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                media_url=media_url,
+                timestamp=timestamp,
+                index=snap_data.get('snapIndex', 0),
+                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                duration_ms=int(metadata.get('durationMs', 0)),
+                description=metadata.get('description', ''),
+                view_count=int(metadata.get('viewCount', 0)),
+                width=int(metadata.get('width', 540)),
+                height=int(metadata.get('height', 960))
+            )
+            collection.snaps.append(snap)
+
+        return collection
+
+    def get_highlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a highlight URL via __NEXT_DATA__."""
+        html = self._fetch_page(url)
+        if not html:
+            return None
+
+        data = self._extract_next_data(html)
+        if not data:
+            return None
+
+        props = (data.get('props') or {}).get('pageProps') or {}
+        highlight = props.get('highlight') or {}
+
+        if not highlight:
+            return None
+
+        highlight_id = highlight.get('highlightId') or {}
+        if isinstance(highlight_id, dict):
+            highlight_id = highlight_id.get('value', '')
+
+        username_match = re.search(r'@([^/]+)', url)
+        username = username_match.group(1) if username_match else ''
+
+        title = highlight.get('storyTitle') or {}
+        if isinstance(title, dict):
+            title = title.get('value', '')
+
+        collection = SnapCollection(
+            collection_id=highlight_id,
+            collection_type='highlight',
+            title=title or 'Untitled Highlight',
+            username=username,
+            url=url
+        )
+
+        for snap_data in highlight.get('snapList') or []:
+            snap_urls = snap_data.get('snapUrls') or {}
+            media_url = snap_urls.get('mediaUrl', '')
+
+            media_id = ''
+            if '/d/' in media_url:
+                media_id = media_url.split('/d/')[1].split('.')[0]
+
+            ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+            timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+            lat = snap_data.get('lat')
+            lng = snap_data.get('lng')
+
+            snap = SnapMedia(
+                media_id=media_id,
+                media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                media_url=media_url,
+                timestamp=timestamp,
+                index=snap_data.get('snapIndex', 0),
+                thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                lat=float(lat) if lat else None,
+                lng=float(lng) if lng else None
+            )
+            collection.snaps.append(snap)
+
+        return collection
+
+    def _download_media_file(self, snap: SnapMedia, output_path: str) -> bool:
+        """Download a single media file via curl_cffi."""
+        try:
+            url = snap.media_url.replace('&amp;', '&')
+            session = self._get_session()
+
+            resp = session.get(url, timeout=60)
+            if resp.status_code == 200 and len(resp.content) > 0:
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+                with open(output_path, 'wb') as f:
+                    f.write(resp.content)
+                self._set_metadata(output_path, snap)
+                return True
+
+            self.log(f"Download failed: HTTP {resp.status_code}", "debug")
+            return False
+
+        except Exception as e:
+            self.log(f"Error downloading media: {e}", "error")
+            return False
+
+    def _set_metadata(self, file_path: str, snap: SnapMedia, description: str = None):
+        """Set EXIF metadata and file timestamp."""
+        try:
+            date_str = snap.timestamp.strftime('%Y:%m:%d %H:%M:%S')
+            desc = description or snap.description or ""
+            if snap.view_count:
+                desc += f" [Views: {snap.view_count}]"
+            desc = desc.strip()
+
+            ext = os.path.splitext(file_path)[1].lower()
+            is_video = ext in ['.mp4', '.mov', '.avi', '.webm']
+            is_image = ext in ['.jpg', '.jpeg', '.png', '.webp']
+
+            exif_args = [
+                'exiftool', '-overwrite_original', '-ignoreMinorErrors',
+                f'-FileModifyDate={date_str}',
+            ]
+
+            if is_image:
+                exif_args.extend([
+                    f'-DateTimeOriginal={date_str}',
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MetadataDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-ImageDescription={desc}',
+                        f'-XPComment={desc}',
+                        f'-UserComment={desc}',
+                    ])
+                if snap.lat and snap.lng:
+                    lat_ref = 'N' if snap.lat >= 0 else 'S'
+                    lng_ref = 'E' if snap.lng >= 0 else 'W'
+                    exif_args.extend([
+                        f'-GPSLatitude={abs(snap.lat)}',
+                        f'-GPSLatitudeRef={lat_ref}',
+                        f'-GPSLongitude={abs(snap.lng)}',
+                        f'-GPSLongitudeRef={lng_ref}',
+                    ])
+
+            elif is_video:
+                exif_args.extend([
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MediaCreateDate={date_str}',
+                    f'-MediaModifyDate={date_str}',
+                    f'-TrackCreateDate={date_str}',
+                    f'-TrackModifyDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-Description={desc}',
+                        f'-Comment={desc}',
+                    ])
+
+            exif_args.append(file_path)
+            subprocess.run(exif_args, capture_output=True, timeout=30)
+
+            # Set filesystem modification time
+            ts = snap.timestamp.timestamp()
+            os.utime(file_path, (ts, ts))
+
+        except Exception as e:
+            self.log(f"Warning: Could not set metadata for {file_path}: {e}", "debug")
+
+    def _generate_filename(self, username: str, snap: SnapMedia, ext: str) -> str:
+        """Generate filename with timestamp and media ID."""
+        date_str = snap.timestamp.strftime('%Y%m%d_%H%M%S')
+        return f"{username}_{date_str}_{snap.media_id}.{ext}"
+
+    def _get_processed_posts(self, username: str) -> Set[str]:
+        """Get set of media IDs that have been processed."""
+        processed = set()
+        if not self.db:
+            return processed
+
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT filename, metadata FROM downloads
+                    WHERE platform = 'snapchat'
+                    AND source = ?
+                ''', (username,))
+
+                for row in cursor.fetchall():
+                    filename, metadata_str = row
+                    if filename:
+                        parts = filename.split('_')
+                        if len(parts) >= 4:
+                            media_id = '_'.join(parts[3:]).split('.')[0]
+                            processed.add(media_id)
+
+                    if metadata_str:
+                        try:
+                            metadata = json.loads(metadata_str)
+                            if 'media_id' in metadata:
+                                processed.add(metadata['media_id'])
+                        except (json.JSONDecodeError, TypeError, KeyError):
+                            pass
+
+        except Exception as e:
+            self.log(f"Error loading processed posts: {e}", "debug")
+
+        return processed
+
+    def _record_download(self, username: str, url: str, filename: str,
+                         post_date=None, metadata: dict = None, file_path: str = None,
+                         deferred: bool = False):
+        """Record a download in the database."""
+        if deferred:
+            self.pending_downloads.append({
+                'username': username,
+                'url': url,
+                'filename': filename,
+                'post_date': post_date.isoformat() if hasattr(post_date, 'isoformat') else post_date,
+                'file_path': file_path,
+                'metadata': metadata
+            })
+            return True
+
+        if not self.db:
+            return
+
+        try:
+            self.db.mark_downloaded(
+                username=username,
+                url=url,
+                filename=filename,
+                post_date=post_date,
+                metadata=metadata,
+                file_path=file_path
+            )
+        except Exception as e:
+            self.log(f"Failed to record download: {e}", "debug")
+
+    def get_pending_downloads(self) -> list:
+        """Get list of pending downloads for deferred recording."""
+        return self.pending_downloads
+
+    def clear_pending_downloads(self):
+        """Clear pending downloads list."""
+        self.pending_downloads = []
+
+    def download(self, username: str, content_type: str = "all", days_back: int = 14,
+                 max_downloads: int = 50, output_dir: str = None,
+                 spotlight_dir: str = None, stories_dir: str = None,
+                 stitch_highlights: bool = True, defer_database: bool = False,
+                 phrase_config: dict = None) -> int:
+        """Download content from a user - compatible with media-downloader interface.
+
+        Args:
+            username: Snapchat username
+            content_type: "spotlight", "stories", "highlights", or "all"
+            days_back: How many days back to download (filters by post date)
+            max_downloads: Maximum items to download per content type
+            output_dir: Default output directory (used if specific dirs not set)
+            spotlight_dir: Output directory for spotlights
+            stories_dir: Output directory for stories/highlights
+            stitch_highlights: Ignored (kept for backwards compatibility)
+            defer_database: If True, defer database recording
+            phrase_config: Not used (for interface compatibility)
+
+        Returns:
+            Number of files downloaded
+        """
+        self.defer_database = defer_database
+        self.downloaded_files.clear()
+
+        # Set output directories
+        if spotlight_dir:
+            spotlight_output = Path(spotlight_dir)
+        elif output_dir:
+            spotlight_output = Path(output_dir)
+        else:
+            spotlight_output = Path(f"/opt/media-downloader/downloads/snapchat_client/spotlight/{username}")
+
+        if stories_dir:
+            stories_output = Path(stories_dir)
+        elif output_dir:
+            stories_output = Path(output_dir)
+        else:
+            stories_output = Path(f"/opt/media-downloader/downloads/snapchat_client/stories/{username}")
+
+        spotlight_output.mkdir(parents=True, exist_ok=True)
+        stories_output.mkdir(parents=True, exist_ok=True)
+
+        # Update activity status
+        if self.activity_manager:
+            self.activity_manager.update_status("Checking Snapchat")
+
+        # Get processed posts (shared with snapchat module - both use platform='snapchat')
+        processed = self._get_processed_posts(username)
+        self.log(f"Loaded {len(processed)} processed posts from database", "debug")
+
+        cutoff_date = datetime.now() - timedelta(days=days_back)
+        downloaded_count = 0
+
+        # Crash recovery checkpoint
+        from modules.task_checkpoint import TaskCheckpoint
+        checkpoint = TaskCheckpoint(f'snapchat_client:{username}', 'scraping')
+
+        try:
+            # Get profile content via HTTP
+            content = self.get_profile_content(username)
+
+            # Count total items for checkpoint
+            total_items = 0
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                total_items += min(len(content['spotlights']), max_downloads)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                total_items += min(len(content['highlights']), max_downloads)
+            checkpoint.start(total_items=total_items)
+            if checkpoint.is_recovering():
+                self.log(f"Snapchat Client @{username}: recovering — skipping already-processed URLs", "info")
+
+            # Download spotlights
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                spotlight_items = content['spotlights'][:max_downloads]
+                self.log(f"Processing {len(spotlight_items)} spotlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading spotlights",
+                        progress_current=0,
+                        progress_total=len(spotlight_items)
+                    )
+
+                for spot_idx, url in enumerate(spotlight_items):
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading spotlights",
+                            progress_current=spot_idx + 1,
+                            progress_total=len(spotlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        # Rate limit between page fetches
+                        if spot_idx > 0:
+                            time.sleep(random.uniform(1.5, 2.5))
+
+                        spotlight = self.get_spotlight_metadata(url)
+                        if not spotlight or not spotlight.snaps:
+                            continue
+
+                        snap = spotlight.snaps[0]
+
+                        # Check date filter
+                        if snap.timestamp < cutoff_date:
+                            self.log(f"Spotlight {snap.media_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                            self.log(f"Spotlight {snap.media_id} already processed, skipping", "debug")
+                            continue
+
+                        # Download
+                        ext = 'mp4' if snap.media_type == 'video' else 'jpg'
+                        filename = self._generate_filename(username, snap, ext)
+                        output_path = str(spotlight_output / filename)
+
+                        # Rate limit between CDN downloads
+                        time.sleep(random.uniform(0.3, 0.5))
+
+                        if self._download_media_file(snap, output_path):
+                            self.downloaded_files.add(snap.media_id)
+                            downloaded_count += 1
+                            self.log(f"Downloaded spotlight: {filename}", "info")
+
+                            self._record_download(
+                                username=username,
+                                url=url,
+                                filename=filename,
+                                post_date=snap.timestamp,
+                                metadata={
+                                    'media_id': snap.media_id,
+                                    'description': snap.description,
+                                    'view_count': snap.view_count,
+                                    'content_type': 'spotlight'
+                                },
+                                file_path=output_path,
+                                deferred=defer_database
+                            )
+
+                    except Exception as e:
+                        self.log(f"Error processing spotlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+            # Rate limit between content types
+            if content_type == 'all' and content['spotlights'] and content['highlights']:
+                time.sleep(random.uniform(2, 3))
+
+            # Download highlights (stories)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                highlight_items = content['highlights'][:max_downloads]
+                self.log(f"Processing {len(highlight_items)} highlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading highlights",
+                        progress_current=0,
+                        progress_total=len(highlight_items)
+                    )
+
+                for hi_idx, url in enumerate(highlight_items):
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading highlights",
+                            progress_current=hi_idx + 1,
+                            progress_total=len(highlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        # Rate limit between page fetches
+                        if hi_idx > 0:
+                            time.sleep(random.uniform(1.5, 2.5))
+
+                        highlight = self.get_highlight_metadata(url)
+                        if not highlight or not highlight.snaps:
+                            continue
+
+                        # Check if any snap is within date range
+                        newest_snap = max(highlight.snaps, key=lambda s: s.timestamp)
+                        if newest_snap.timestamp < cutoff_date:
+                            self.log(f"Highlight {highlight.collection_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if highlight.collection_id in processed or highlight.collection_id in self.downloaded_files:
+                            self.log(f"Highlight {highlight.collection_id} already processed, skipping", "debug")
+                            continue
+
+                        # Separate videos and images
+                        videos = [s for s in highlight.snaps if s.media_type == 'video']
+                        images = [s for s in highlight.snaps if s.media_type == 'image']
+
+                        # Download images individually
+                        for snap in images:
+                            if snap.timestamp < cutoff_date:
+                                continue
+                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                continue
+
+                            time.sleep(random.uniform(0.3, 0.5))
+
+                            filename = self._generate_filename(username, snap, 'jpg')
+                            output_path = str(stories_output / filename)
+
+                            if self._download_media_file(snap, output_path):
+                                self.downloaded_files.add(snap.media_id)
+                                downloaded_count += 1
+                                self.log(f"Downloaded image: {filename}", "info")
+
+                                self._record_download(
+                                    username=username,
+                                    url=highlight.url,
+                                    filename=filename,
+                                    post_date=snap.timestamp,
+                                    metadata={
+                                        'media_id': snap.media_id,
+                                        'highlight_id': highlight.collection_id,
+                                        'content_type': 'highlight_image'
+                                    },
+                                    file_path=output_path,
+                                    deferred=defer_database
+                                )
+
+                        # Download videos individually
+                        for snap in videos:
+                            if snap.timestamp < cutoff_date:
+                                continue
+                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                continue
+
+                            time.sleep(random.uniform(0.3, 0.5))
+
+                            filename = self._generate_filename(username, snap, 'mp4')
+                            output_path = str(stories_output / filename)
+
+                            if self._download_media_file(snap, output_path):
+                                self._set_metadata(output_path, snap)
+                                self.downloaded_files.add(snap.media_id)
+                                downloaded_count += 1
+                                self.log(f"Downloaded video: {filename}", "info")
+
+                                self._record_download(
+                                    username=username,
+                                    url=highlight.url,
+                                    filename=filename,
+                                    post_date=snap.timestamp,
+                                    metadata={
+                                        'media_id': snap.media_id,
+                                        'highlight_id': highlight.collection_id,
+                                        'content_type': 'highlight_video'
+                                    },
+                                    file_path=output_path,
+                                    deferred=defer_database
+                                )
+
+                    except Exception as e:
+                        self.log(f"Error processing highlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+        except Exception as e:
+            self.log(f"Error during download: {e}", "error")
+
+        checkpoint.finish()
+        self.log(f"Downloaded {downloaded_count} files for @{username}", "info")
+        return downloaded_count
--- a/modules/snapchat_scraper.py
+++ b/modules/snapchat_scraper.py
@@ -0,0 +1,985 @@
+#!/usr/bin/env python3
+"""
+Snapchat Direct Scraper Module - Scrapes directly from Snapchat.com
+
+Uses Playwright to scrape profiles and extract:
+- Spotlight videos (540x960)
+- Stories/Highlights (480x852, stitched into single videos)
+
+Full metadata extraction including timestamps, media IDs, descriptions.
+Follows the same interface as the original snapchat_module.py
+"""
+
+import os
+import json
+import re
+import tempfile
+import subprocess
+import shutil
+import platform
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional, Dict, List, Any, Set
+from dataclasses import dataclass, field
+
+# Set environment for Playwright
+os.environ.setdefault('PLAYWRIGHT_BROWSERS_PATH', '/root/.cache/ms-playwright')
+
+from modules.base_module import LoggingMixin
+from modules.cloudflare_handler import (
+    get_playwright_context_options,
+    get_playwright_stealth_scripts,
+    get_flaresolverr_user_agent
+)
+
+
+@dataclass
+class SnapMedia:
+    """Represents a single snap media item"""
+    media_id: str
+    media_type: str  # 'video' or 'image'
+    media_url: str
+    timestamp: datetime
+    index: int = 0
+    thumbnail_url: str = ""
+    duration_ms: int = 0
+    description: str = ""
+    view_count: int = 0
+    width: int = 0
+    height: int = 0
+    lat: Optional[float] = None
+    lng: Optional[float] = None
+
+
+@dataclass
+class SnapCollection:
+    """Represents a spotlight or highlight collection"""
+    collection_id: str
+    collection_type: str  # 'spotlight' or 'highlight'
+    title: str = ""
+    username: str = ""
+    snaps: List[SnapMedia] = field(default_factory=list)
+    url: str = ""
+
+
+class SnapchatDirectScraper(LoggingMixin):
+    """
+    Scrapes Snapchat profiles directly for media content.
+
+    Follows the same interface as SnapchatDownloader for compatibility
+    with the media-downloader system.
+    """
+
+    def __init__(self,
+                 headless: bool = True,
+                 show_progress: bool = True,
+                 use_database: bool = True,
+                 log_callback=None,
+                 unified_db=None):
+        """Initialize scraper compatible with media-downloader system"""
+        self.headless = headless
+        self.show_progress = show_progress
+        self.use_database = use_database
+        self.unified_db = unified_db
+        self.scraper_id = 'snapchat_direct'
+        self.download_count = 0
+        self.downloaded_files: Set[str] = set()
+        self.pending_downloads = []
+
+        # Initialize logging via mixin
+        self._init_logger('SnapchatDirect', log_callback, default_module='Download')
+
+        # User-Agent to match FlareSolverr (dynamically fetched for consistency)
+        self.user_agent = get_flaresolverr_user_agent()
+
+        # Browser state
+        self._playwright = None
+        self.browser = None
+        self.context = None
+
+        # Database adapter
+        if unified_db and use_database:
+            from modules.unified_database import SnapchatDatabaseAdapter
+            self.db = SnapchatDatabaseAdapter(unified_db)
+        else:
+            self.db = None
+            self.use_database = False
+
+        # Activity status manager
+        try:
+            from modules.activity_status import get_activity_manager
+            self.activity_manager = get_activity_manager(unified_db)
+        except ImportError:
+            self.activity_manager = None
+
+        # Load cookies from database
+        self.cookies = self._load_cookies_from_db()
+
+        # Load proxy configuration from database
+        self.proxy_url = None
+        if unified_db:
+            try:
+                scraper_config = unified_db.get_scraper('snapchat')
+                if scraper_config and scraper_config.get('proxy_enabled') and scraper_config.get('proxy_url'):
+                    self.proxy_url = scraper_config['proxy_url']
+                    self.log(f"Using proxy: {self.proxy_url}", "info")
+            except Exception as e:
+                self.log(f"Could not load proxy config: {e}", "debug")
+
+    def _load_cookies_from_db(self) -> List[Dict]:
+        """Load cookies from database"""
+        if not self.unified_db:
+            return self._get_default_cookies()
+
+        try:
+            cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
+            if cookies:
+                self.log(f"Loaded {len(cookies)} cookies from database", "debug")
+                return cookies
+        except Exception as e:
+            self.log(f"Error loading cookies from database: {e}", "warning")
+
+        # Try loading from original snapchat scraper
+        try:
+            cookies = self.unified_db.get_scraper_cookies('snapchat')
+            if cookies:
+                self.log(f"Using cookies from 'snapchat' scraper", "debug")
+                return cookies
+        except Exception as e:
+            self.log(f"Error loading cookies from snapchat scraper: {e}", "debug")
+
+        return self._get_default_cookies()
+
+    def _get_default_cookies(self) -> List[Dict]:
+        """Get default cookies for Snapchat"""
+        return [
+            {"name": "sc-cookies-accepted", "value": "true", "domain": "www.snapchat.com", "path": "/"},
+        ]
+
+    def _save_cookies_to_db(self, cookies: List[Dict], user_agent: str = None):
+        """Save cookies to database
+
+        Args:
+            cookies: List of cookie dictionaries
+            user_agent: User agent to associate with cookies (important for cf_clearance).
+                       If not provided, uses self.user_agent as fallback.
+        """
+        if not self.unified_db:
+            return
+
+        try:
+            # Use provided user_agent or fall back to self.user_agent
+            ua = user_agent or self.user_agent
+            self.unified_db.save_scraper_cookies(
+                self.scraper_id,
+                cookies,
+                user_agent=ua,
+                merge=True
+            )
+            self.log(f"Saved {len(cookies)} cookies to database (UA: {ua[:50]}...)", "debug")
+        except Exception as e:
+            self.log(f"Error saving cookies to database: {e}", "warning")
+
+    def _parse_proxy_url(self, proxy_url: str) -> Optional[Dict]:
+        """
+        Parse proxy URL into Playwright proxy config.
+        Supports: protocol://user:pass@host:port or protocol://host:port
+        """
+        import re
+        try:
+            # Match: protocol://[user:pass@]host:port
+            match = re.match(
+                r'^(https?|socks[45]?)://(?:([^:]+):([^@]+)@)?([^:]+):(\d+)$',
+                proxy_url
+            )
+            if match:
+                protocol, username, password, host, port = match.groups()
+                config = {'server': f'{protocol}://{host}:{port}'}
+                if username and password:
+                    config['username'] = username
+                    config['password'] = password
+                return config
+        except Exception as e:
+            self.log(f"Failed to parse proxy URL: {e}", "warning")
+        return None
+
+    def __enter__(self):
+        """Context manager entry"""
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit"""
+        self._close_browser()
+        return False
+
+    def _start_browser(self):
+        """Start Playwright browser"""
+        if self.browser is not None:
+            return
+
+        os.environ['DISPLAY'] = ':100'
+
+        from playwright.sync_api import sync_playwright
+        self._playwright = sync_playwright().start()
+        self.browser = self._playwright.chromium.launch(
+            headless=self.headless,
+            args=['--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu']
+        )
+
+        # Build context options - use dynamic fingerprinting from FlareSolverr
+        context_options = get_playwright_context_options()
+
+        # IMPORTANT: If cookies have a stored user_agent, use THAT user_agent
+        # Cloudflare cf_clearance cookies are fingerprinted to the browser that solved the challenge
+        try:
+            if self.unified_db:
+                stored_user_agent = self.unified_db.get_scraper_cookies_user_agent(self.scraper_id)
+                if stored_user_agent:
+                    self.log(f"Using stored cookie user_agent: {stored_user_agent[:50]}...", "debug", module="Browser")
+                    context_options['user_agent'] = stored_user_agent
+                else:
+                    self.log(f"Using fingerprint: Chrome {context_options.get('extra_http_headers', {}).get('Sec-Ch-Ua', 'unknown')[:30]}...", "debug", module="Browser")
+            else:
+                self.log(f"Using fingerprint: Chrome {context_options.get('extra_http_headers', {}).get('Sec-Ch-Ua', 'unknown')[:30]}...", "debug", module="Browser")
+        except Exception as e:
+            self.log(f"Error getting stored user_agent, using default: {e}", "debug", module="Browser")
+
+        # Add proxy if configured
+        if self.proxy_url:
+            proxy_config = self._parse_proxy_url(self.proxy_url)
+            if proxy_config:
+                context_options['proxy'] = proxy_config
+                self.log(f"Browser using proxy: {proxy_config.get('server')}", "info", module="Browser")
+
+        self.context = self.browser.new_context(**context_options)
+
+        # Add anti-detection scripts to all pages in this context
+        self.context.add_init_script(get_playwright_stealth_scripts())
+
+        # Add cookies
+        if self.cookies:
+            # Clean cookies for Playwright and convert expiry->expires
+            cleaned = []
+            for c in self.cookies:
+                clean = {k: v for k, v in c.items() if k not in ['partitionKey', '_crHasCrossSiteAncestor']}
+                # FlareSolverr uses 'expiry' but Playwright uses 'expires'
+                if 'expiry' in clean and 'expires' not in clean:
+                    clean['expires'] = clean.pop('expiry')
+                cleaned.append(clean)
+
+            # CRITICAL: Clear existing cookies first to ensure new cf_clearance takes effect
+            try:
+                self.context.clear_cookies()
+            except Exception:
+                pass
+
+            self.context.add_cookies(cleaned)
+
+        self.log("Browser started", "info", module="Browser")
+
+    def _close_browser(self):
+        """Close browser and cleanup"""
+        if self.context:
+            try:
+                self.context.close()
+            except Exception as e:
+                self.log(f"Error closing browser context: {e}", "debug")
+            self.context = None
+
+        if self.browser:
+            try:
+                self.browser.close()
+            except Exception as e:
+                self.log(f"Error closing browser: {e}", "debug")
+            self.browser = None
+
+        if self._playwright:
+            try:
+                self._playwright.stop()
+            except Exception as e:
+                self.log(f"Error stopping playwright: {e}", "debug")
+            self._playwright = None
+
+    def _get_next_data(self, page) -> Optional[Dict]:
+        """Extract __NEXT_DATA__ JSON from page"""
+        try:
+            next_data_elem = page.locator('script#__NEXT_DATA__').first
+            if next_data_elem.count() > 0:
+                return json.loads(next_data_elem.inner_text())
+        except Exception as e:
+            self.log(f"Error extracting __NEXT_DATA__: {e}", "debug")
+        return None
+
+    def _set_metadata(self, file_path: str, snap: SnapMedia, description: str = None):
+        """Set EXIF metadata and file timestamp"""
+        try:
+            date_str = snap.timestamp.strftime('%Y:%m:%d %H:%M:%S')
+            desc = description or snap.description or ""
+            if snap.view_count:
+                desc += f" [Views: {snap.view_count}]"
+            desc = desc.strip()
+
+            ext = os.path.splitext(file_path)[1].lower()
+            is_video = ext in ['.mp4', '.mov', '.avi', '.webm']
+            is_image = ext in ['.jpg', '.jpeg', '.png', '.webp']
+
+            exif_args = [
+                'exiftool', '-overwrite_original', '-ignoreMinorErrors',
+                f'-FileModifyDate={date_str}',
+            ]
+
+            if is_image:
+                exif_args.extend([
+                    f'-DateTimeOriginal={date_str}',
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MetadataDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-ImageDescription={desc}',
+                        f'-XPComment={desc}',
+                        f'-UserComment={desc}',
+                    ])
+                if snap.lat and snap.lng:
+                    lat_ref = 'N' if snap.lat >= 0 else 'S'
+                    lng_ref = 'E' if snap.lng >= 0 else 'W'
+                    exif_args.extend([
+                        f'-GPSLatitude={abs(snap.lat)}',
+                        f'-GPSLatitudeRef={lat_ref}',
+                        f'-GPSLongitude={abs(snap.lng)}',
+                        f'-GPSLongitudeRef={lng_ref}',
+                    ])
+
+            elif is_video:
+                exif_args.extend([
+                    f'-CreateDate={date_str}',
+                    f'-ModifyDate={date_str}',
+                    f'-MediaCreateDate={date_str}',
+                    f'-MediaModifyDate={date_str}',
+                    f'-TrackCreateDate={date_str}',
+                    f'-TrackModifyDate={date_str}',
+                ])
+                if desc:
+                    exif_args.extend([
+                        f'-Description={desc}',
+                        f'-Comment={desc}',
+                    ])
+
+            exif_args.append(file_path)
+            subprocess.run(exif_args, capture_output=True, timeout=30)
+
+            # Set filesystem modification time
+            ts = snap.timestamp.timestamp()
+            os.utime(file_path, (ts, ts))
+
+        except Exception as e:
+            self.log(f"Warning: Could not set metadata for {file_path}: {e}", "debug")
+
+    def get_profile_content(self, username: str) -> Dict[str, List[str]]:
+        """Get all spotlight and highlight URLs from a profile"""
+        import time
+
+        if not self.browser:
+            self._start_browser()
+
+        page = self.context.new_page()
+        result = {'spotlights': [], 'highlights': []}
+
+        try:
+            url = f"https://www.snapchat.com/@{username}"
+            self.log(f"Navigating to profile @{username}", "info")
+            page.goto(url, wait_until='networkidle', timeout=30000)
+            time.sleep(2)
+
+            content = page.content()
+
+            # Extract spotlight URLs
+            spotlight_pattern = rf'/@{username}/spotlight/([A-Za-z0-9_-]+)'
+            spotlight_ids = list(set(re.findall(spotlight_pattern, content)))
+            result['spotlights'] = [
+                f"https://www.snapchat.com/@{username}/spotlight/{sid}"
+                for sid in spotlight_ids
+            ]
+            self.log(f"Found {len(result['spotlights'])} spotlights", "info")
+
+            # Click Stories tab to get highlights
+            stories_tab = page.locator('[role="tab"]:has-text("Stories")').first
+            if stories_tab.count() > 0:
+                stories_tab.click()
+                time.sleep(2)
+
+                content = page.content()
+                highlight_pattern = rf'/@{username}/highlight/([A-Za-z0-9-]+)'
+                highlight_ids = list(set(re.findall(highlight_pattern, content)))
+                result['highlights'] = [
+                    f"https://www.snapchat.com/@{username}/highlight/{hid}"
+                    for hid in highlight_ids
+                ]
+                self.log(f"Found {len(result['highlights'])} highlights", "info")
+
+        except Exception as e:
+            self.log(f"Error getting profile content: {e}", "error")
+        finally:
+            page.close()
+
+        return result
+
+    def get_spotlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a spotlight URL"""
+        import time
+
+        if not self.browser:
+            self._start_browser()
+
+        page = self.context.new_page()
+
+        try:
+            page.goto(url, wait_until='domcontentloaded', timeout=60000)
+            time.sleep(2)
+
+            data = self._get_next_data(page)
+            if not data:
+                return None
+
+            props = (data.get('props') or {}).get('pageProps') or {}
+            feed = props.get('spotlightFeed') or {}
+            stories = feed.get('spotlightStories') or []
+
+            if not stories:
+                return None
+
+            story_data = stories[0]
+            story = story_data.get('story') or {}
+            metadata = (story_data.get('metadata') or {}).get('videoMetadata') or {}
+
+            story_id = (story.get('storyId') or {}).get('value', '')
+            creator = (metadata.get('creator') or {}).get('personCreator') or {}
+            username = creator.get('username', '')
+
+            collection = SnapCollection(
+                collection_id=story_id,
+                collection_type='spotlight',
+                title=metadata.get('description', ''),
+                username=username,
+                url=url
+            )
+
+            for snap_data in story.get('snapList') or []:
+                snap_id = (snap_data.get('snapId') or {}).get('value', '')
+                snap_urls = snap_data.get('snapUrls') or {}
+                media_url = snap_urls.get('mediaUrl', '')
+
+                media_id = ''
+                if '/d/' in media_url:
+                    media_id = media_url.split('/d/')[1].split('.')[0]
+
+                ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+                timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+                snap = SnapMedia(
+                    media_id=media_id or snap_id,
+                    media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                    media_url=media_url,
+                    timestamp=timestamp,
+                    index=snap_data.get('snapIndex', 0),
+                    thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                    duration_ms=int(metadata.get('durationMs', 0)),
+                    description=metadata.get('description', ''),
+                    view_count=int(metadata.get('viewCount', 0)),
+                    width=int(metadata.get('width', 540)),
+                    height=int(metadata.get('height', 960))
+                )
+                collection.snaps.append(snap)
+
+            return collection
+
+        except Exception as e:
+            self.log(f"Error getting spotlight metadata: {e}", "error")
+            return None
+        finally:
+            page.close()
+
+    def get_highlight_metadata(self, url: str) -> Optional[SnapCollection]:
+        """Extract full metadata from a highlight URL"""
+        import time
+
+        if not self.browser:
+            self._start_browser()
+
+        page = self.context.new_page()
+
+        try:
+            page.goto(url, wait_until='domcontentloaded', timeout=60000)
+            time.sleep(2)
+
+            data = self._get_next_data(page)
+            if not data:
+                return None
+
+            props = (data.get('props') or {}).get('pageProps') or {}
+            highlight = props.get('highlight') or {}
+
+            if not highlight:
+                return None
+
+            highlight_id = highlight.get('highlightId') or {}
+            if isinstance(highlight_id, dict):
+                highlight_id = highlight_id.get('value', '')
+
+            username_match = re.search(r'@([^/]+)', url)
+            username = username_match.group(1) if username_match else ''
+
+            title = highlight.get('storyTitle') or {}
+            if isinstance(title, dict):
+                title = title.get('value', '')
+
+            collection = SnapCollection(
+                collection_id=highlight_id,
+                collection_type='highlight',
+                title=title or 'Untitled Highlight',
+                username=username,
+                url=url
+            )
+
+            for snap_data in highlight.get('snapList') or []:
+                snap_urls = snap_data.get('snapUrls') or {}
+                media_url = snap_urls.get('mediaUrl', '')
+
+                media_id = ''
+                if '/d/' in media_url:
+                    media_id = media_url.split('/d/')[1].split('.')[0]
+
+                ts_str = (snap_data.get('timestampInSec') or {}).get('value', '0')
+                timestamp = datetime.fromtimestamp(int(ts_str)) if ts_str else datetime.now()
+
+                lat = snap_data.get('lat')
+                lng = snap_data.get('lng')
+
+                snap = SnapMedia(
+                    media_id=media_id,
+                    media_type='video' if snap_data.get('snapMediaType') == 1 else 'image',
+                    media_url=media_url,
+                    timestamp=timestamp,
+                    index=snap_data.get('snapIndex', 0),
+                    thumbnail_url=(snap_urls.get('mediaPreviewUrl') or {}).get('value', ''),
+                    lat=float(lat) if lat else None,
+                    lng=float(lng) if lng else None
+                )
+                collection.snaps.append(snap)
+
+            return collection
+
+        except Exception as e:
+            self.log(f"Error getting highlight metadata: {e}", "error")
+            return None
+        finally:
+            page.close()
+
+    def _download_media_file(self, snap: SnapMedia, output_path: str) -> bool:
+        """Download a single media file"""
+        try:
+            url = snap.media_url.replace('&amp;', '&')
+
+            result = subprocess.run([
+                'curl', '-sL', '-o', output_path,
+                '-H', 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+                url
+            ], capture_output=True, timeout=60)
+
+            if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                self._set_metadata(output_path, snap)
+                return True
+            return False
+
+        except Exception as e:
+            self.log(f"Error downloading media: {e}", "error")
+            return False
+
+    def _generate_filename(self, username: str, snap: SnapMedia, ext: str) -> str:
+        """Generate filename with timestamp and media ID (FastDL format)"""
+        date_str = snap.timestamp.strftime('%Y%m%d_%H%M%S')
+        return f"{username}_{date_str}_{snap.media_id}.{ext}"
+
+    def _record_download(self, username: str, url: str, filename: str,
+                         post_date=None, metadata: dict = None, file_path: str = None,
+                         deferred: bool = False):
+        """Record a download in the database"""
+        if deferred:
+            self.pending_downloads.append({
+                'username': username,
+                'url': url,
+                'filename': filename,
+                'post_date': post_date.isoformat() if hasattr(post_date, 'isoformat') else post_date,
+                'file_path': file_path,
+                'metadata': metadata
+            })
+            return True
+
+        if not self.db:
+            return
+
+        try:
+            self.db.mark_downloaded(
+                username=username,
+                url=url,
+                filename=filename,
+                post_date=post_date,
+                metadata=metadata,
+                file_path=file_path
+            )
+        except Exception as e:
+            self.log(f"Failed to record download: {e}", "debug")
+
+    def get_pending_downloads(self):
+        """Get list of downloads that were deferred"""
+        return self.pending_downloads.copy()
+
+    def clear_pending_downloads(self):
+        """Clear the pending downloads list"""
+        self.pending_downloads = []
+
+    def _get_processed_posts(self, username: str) -> Set[str]:
+        """Get set of media IDs that have been processed"""
+        processed = set()
+        if not self.db:
+            return processed
+
+        try:
+            with self.db.get_connection() as conn:
+                cursor = conn.cursor()
+                cursor.execute('''
+                    SELECT filename, metadata FROM downloads
+                    WHERE platform = 'snapchat'
+                    AND source = ?
+                ''', (username,))
+
+                for row in cursor.fetchall():
+                    filename, metadata_str = row
+                    if filename:
+                        parts = filename.split('_')
+                        if len(parts) >= 4:
+                            media_id = '_'.join(parts[3:]).split('.')[0]
+                            processed.add(media_id)
+
+                    if metadata_str:
+                        try:
+                            metadata = json.loads(metadata_str)
+                            if 'media_id' in metadata:
+                                processed.add(metadata['media_id'])
+                        except (json.JSONDecodeError, TypeError, KeyError):
+                            pass  # Invalid metadata, skip
+
+        except Exception as e:
+            self.log(f"Error loading processed posts: {e}", "debug")
+
+        return processed
+
+    def download(self, username: str, content_type: str = "all", days_back: int = 14,
+                 max_downloads: int = 50, output_dir: str = None,
+                 spotlight_dir: str = None, stories_dir: str = None,
+                 stitch_highlights: bool = True, defer_database: bool = False,
+                 phrase_config: dict = None):
+        """
+        Download content from a user - compatible with media-downloader interface
+
+        Args:
+            username: Snapchat username
+            content_type: "spotlight", "stories", "highlights", or "all"
+            days_back: How many days back to download (filters by post date)
+            max_downloads: Maximum items to download per content type
+            output_dir: Default output directory (used if specific dirs not set)
+            spotlight_dir: Output directory for spotlights
+            stories_dir: Output directory for stories/highlights
+            stitch_highlights: Ignored (kept for backwards compatibility)
+            defer_database: If True, defer database recording
+            phrase_config: Not used (for interface compatibility)
+
+        Returns:
+            Number of files downloaded
+        """
+        self.defer_database = defer_database
+        self.downloaded_files.clear()
+
+        # Set output directories
+        # If specific dirs provided, use them directly
+        # If only output_dir provided, use it directly (caller handles structure)
+        # If nothing provided, use default with subdirectories
+        if spotlight_dir:
+            spotlight_output = Path(spotlight_dir)
+        elif output_dir:
+            spotlight_output = Path(output_dir)
+        else:
+            spotlight_output = Path(f"/opt/media-downloader/downloads/snapchat/spotlight/{username}")
+
+        if stories_dir:
+            stories_output = Path(stories_dir)
+        elif output_dir:
+            stories_output = Path(output_dir)
+        else:
+            stories_output = Path(f"/opt/media-downloader/downloads/snapchat/stories/{username}")
+
+        spotlight_output.mkdir(parents=True, exist_ok=True)
+        stories_output.mkdir(parents=True, exist_ok=True)
+
+        # Update activity status
+        if self.activity_manager:
+            self.activity_manager.update_status("Checking Snapchat")
+
+        # Get processed posts
+        processed = self._get_processed_posts(username)
+        self.log(f"Loaded {len(processed)} processed posts from database", "debug")
+
+        cutoff_date = datetime.now() - timedelta(days=days_back)
+        downloaded_count = 0
+
+        # Crash recovery checkpoint
+        from modules.task_checkpoint import TaskCheckpoint
+        checkpoint = TaskCheckpoint(f'snapchat:{username}', 'scraping')
+
+        try:
+            # Start browser
+            self._start_browser()
+
+            # Get profile content
+            content = self.get_profile_content(username)
+
+            # Count total items for checkpoint
+            total_items = 0
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                total_items += min(len(content['spotlights']), max_downloads)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                total_items += min(len(content['highlights']), max_downloads)
+            checkpoint.start(total_items=total_items)
+            if checkpoint.is_recovering():
+                self.log(f"Snapchat @{username}: recovering — skipping already-processed URLs", "info")
+
+            # Download spotlights
+            if content_type in ['spotlight', 'all'] and content['spotlights']:
+                spotlight_items = content['spotlights'][:max_downloads]
+                self.log(f"Processing {len(spotlight_items)} spotlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading spotlights",
+                        progress_current=0,
+                        progress_total=len(spotlight_items)
+                    )
+
+                for spot_idx, url in enumerate(spotlight_items):
+                    # Update progress at start of each iteration (fires even on skips)
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading spotlights",
+                            progress_current=spot_idx + 1,
+                            progress_total=len(spotlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        spotlight = self.get_spotlight_metadata(url)
+                        if not spotlight or not spotlight.snaps:
+                            continue
+
+                        snap = spotlight.snaps[0]
+
+                        # Check date filter
+                        if snap.timestamp < cutoff_date:
+                            self.log(f"Spotlight {snap.media_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                            self.log(f"Spotlight {snap.media_id} already processed, skipping", "debug")
+                            continue
+
+                        # Download
+                        ext = 'mp4' if snap.media_type == 'video' else 'jpg'
+                        filename = self._generate_filename(username, snap, ext)
+                        output_path = str(spotlight_output / filename)
+
+                        if self._download_media_file(snap, output_path):
+                            self.downloaded_files.add(snap.media_id)
+                            downloaded_count += 1
+                            self.log(f"Downloaded spotlight: {filename}", "info")
+
+                            self._record_download(
+                                username=username,
+                                url=url,
+                                filename=filename,
+                                post_date=snap.timestamp,
+                                metadata={
+                                    'media_id': snap.media_id,
+                                    'description': snap.description,
+                                    'view_count': snap.view_count,
+                                    'content_type': 'spotlight'
+                                },
+                                file_path=output_path,
+                                deferred=defer_database
+                            )
+
+                    except Exception as e:
+                        self.log(f"Error processing spotlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+            # Download highlights (stories)
+            if content_type in ['stories', 'highlights', 'all'] and content['highlights']:
+                highlight_items = content['highlights'][:max_downloads]
+                self.log(f"Processing {len(highlight_items)} highlights...", "info")
+
+                if self.activity_manager:
+                    self.activity_manager.update_status(
+                        "Downloading highlights",
+                        progress_current=0,
+                        progress_total=len(highlight_items)
+                    )
+
+                for hi_idx, url in enumerate(highlight_items):
+                    # Update progress at start of each iteration (fires even on skips)
+                    if self.activity_manager:
+                        self.activity_manager.update_status(
+                            "Downloading highlights",
+                            progress_current=hi_idx + 1,
+                            progress_total=len(highlight_items)
+                        )
+
+                    if checkpoint.is_completed(url):
+                        continue
+
+                    checkpoint.set_current(url)
+
+                    try:
+                        highlight = self.get_highlight_metadata(url)
+                        if not highlight or not highlight.snaps:
+                            continue
+
+                        # Check if any snap is within date range
+                        newest_snap = max(highlight.snaps, key=lambda s: s.timestamp)
+                        if newest_snap.timestamp < cutoff_date:
+                            self.log(f"Highlight {highlight.collection_id} is older than {days_back} days, skipping", "debug")
+                            continue
+
+                        # Check if already processed
+                        if highlight.collection_id in processed or highlight.collection_id in self.downloaded_files:
+                            self.log(f"Highlight {highlight.collection_id} already processed, skipping", "debug")
+                            continue
+
+                        # Separate videos and images
+                        videos = [s for s in highlight.snaps if s.media_type == 'video']
+                        images = [s for s in highlight.snaps if s.media_type == 'image']
+
+                        # Download images individually
+                        for snap in images:
+                            if snap.timestamp < cutoff_date:
+                                continue
+                            if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                continue
+
+                            filename = self._generate_filename(username, snap, 'jpg')
+                            output_path = str(stories_output / filename)
+
+                            if self._download_media_file(snap, output_path):
+                                self.downloaded_files.add(snap.media_id)
+                                downloaded_count += 1
+                                self.log(f"Downloaded image: {filename}", "info")
+
+                                self._record_download(
+                                    username=username,
+                                    url=highlight.url,
+                                    filename=filename,
+                                    post_date=snap.timestamp,
+                                    metadata={
+                                        'media_id': snap.media_id,
+                                        'highlight_id': highlight.collection_id,
+                                        'content_type': 'highlight_image'
+                                    },
+                                    file_path=output_path,
+                                    deferred=defer_database
+                                )
+
+                        # Handle videos - download each clip individually
+                        if videos:
+                            for snap in videos:
+                                if snap.timestamp < cutoff_date:
+                                    continue
+                                if snap.media_id in processed or snap.media_id in self.downloaded_files:
+                                    continue
+
+                                filename = self._generate_filename(username, snap, 'mp4')
+                                output_path = str(stories_output / filename)
+
+                                if self._download_media_file(snap, output_path):
+                                    self._set_metadata(output_path, snap)
+                                    self.downloaded_files.add(snap.media_id)
+                                    downloaded_count += 1
+                                    self.log(f"Downloaded video: {filename}", "info")
+
+                                    self._record_download(
+                                        username=username,
+                                        url=highlight.url,
+                                        filename=filename,
+                                        post_date=snap.timestamp,
+                                        metadata={
+                                            'media_id': snap.media_id,
+                                            'highlight_id': highlight.collection_id,
+                                            'content_type': 'highlight_video'
+                                        },
+                                        file_path=output_path,
+                                        deferred=defer_database
+                                    )
+
+                    except Exception as e:
+                        self.log(f"Error processing highlight: {e}", "error")
+
+                    checkpoint.mark_completed(url)
+
+        except Exception as e:
+            self.log(f"Error during download: {e}", "error")
+
+        checkpoint.finish()
+        self.log(f"Downloaded {downloaded_count} files for @{username}", "info")
+        return downloaded_count
+
+
+def test_scraper():
+    """Test the scraper"""
+    print("=" * 60)
+    print("SNAPCHAT DIRECT SCRAPER TEST")
+    print("=" * 60)
+
+    with SnapchatDirectScraper(headless=True) as scraper:
+        username = "evalongoria"
+
+        # Test download
+        count = scraper.download(
+            username=username,
+            content_type="all",
+            days_back=30,
+            max_downloads=5,
+            spotlight_dir="/tmp/snap_test/spotlight",
+            stories_dir="/tmp/snap_test/stories",
+            stitch_highlights=True
+        )
+
+        print(f"\nDownloaded {count} files")
+
+        # Show files
+        import os
+        for root, dirs, files in os.walk("/tmp/snap_test"):
+            for f in files:
+                path = os.path.join(root, f)
+                size = os.path.getsize(path) / 1024
+                print(f"  {path}: {size:.1f}KB")
+
+    print("=" * 60)
+    print("TEST COMPLETE")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    test_scraper()
--- a/modules/taddy_client.py
+++ b/modules/taddy_client.py
@@ -0,0 +1,391 @@
+"""Taddy Podcast API client for finding podcast appearances"""
+import asyncio
+import re
+from html import unescape
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional
+from web.backend.core.http_client import http_client
+from modules.universal_logger import get_logger
+
+logger = get_logger('Taddy')
+
+
+def strip_html(text: str) -> str:
+    """Strip HTML tags and decode entities from text"""
+    if not text:
+        return text
+    # Remove HTML tags
+    clean = re.sub(r'<[^>]+>', ' ', text)
+    # Decode HTML entities
+    clean = unescape(clean)
+    # Normalize whitespace
+    clean = re.sub(r'\s+', ' ', clean).strip()
+    return clean
+
+
+class TaddyClient:
+    """Client for interacting with the Taddy Podcast API (GraphQL)
+
+    Supports primary and fallback accounts for quota management.
+    When the primary account fails (500 error / quota exceeded),
+    automatically switches to the fallback account.
+    """
+
+    BASE_URL = "https://api.taddy.org"
+
+    def __init__(self, user_id: str, api_key: str,
+                 user_id_2: str = None, api_key_2: str = None):
+        # Primary account
+        self.user_id = user_id
+        self.api_key = api_key
+
+        # Fallback account (optional)
+        self.user_id_2 = user_id_2
+        self.api_key_2 = api_key_2
+        self.has_fallback = bool(user_id_2 and api_key_2)
+
+        # Track which account is active
+        self.using_fallback = False
+
+        self._update_headers()
+
+    def _update_headers(self):
+        """Update headers based on current active account"""
+        if self.using_fallback and self.has_fallback:
+            self.headers = {
+                "Content-Type": "application/json",
+                "X-USER-ID": self.user_id_2,
+                "X-API-KEY": self.api_key_2
+            }
+        else:
+            self.headers = {
+                "Content-Type": "application/json",
+                "X-USER-ID": self.user_id,
+                "X-API-KEY": self.api_key
+            }
+
+    def _switch_to_fallback(self) -> bool:
+        """Switch to fallback account if available. Returns True if switched."""
+        if self.has_fallback and not self.using_fallback:
+            self.using_fallback = True
+            self._update_headers()
+            logger.info("Switched to fallback Taddy account")
+            return True
+        return False
+
+    async def _graphql_query(self, query: str, variables: Dict = None, retry_on_fallback: bool = True) -> Optional[Dict]:
+        """Execute a GraphQL query against the Taddy API
+
+        If the primary account fails with a 500 error (quota exceeded),
+        automatically retries with the fallback account if configured.
+        """
+        try:
+            payload = {"query": query}
+            if variables:
+                payload["variables"] = variables
+
+            response = await http_client.post(
+                self.BASE_URL,
+                json=payload,
+                headers=self.headers
+            )
+
+            data = response.json()
+
+            if "errors" in data:
+                logger.error(f"Taddy API error: {data['errors']}")
+                return None
+
+            return data.get("data")
+
+        except Exception as e:
+            error_str = str(e).lower()
+            # Check for 500 error (quota exceeded) - http_client raises ServiceError
+            if "500" in error_str or "server error" in error_str:
+                account_type = "fallback" if self.using_fallback else "primary"
+                logger.warning(f"Taddy API returned 500 on {account_type} account (likely quota exceeded)")
+
+                # Try fallback if available and we haven't already
+                if retry_on_fallback and self._switch_to_fallback():
+                    logger.info("Retrying with fallback Taddy account...")
+                    return await self._graphql_query(query, variables, retry_on_fallback=False)
+
+            logger.error(f"Taddy API request failed: {e}")
+            return None
+
+    async def search_podcast_appearances(
+        self,
+        celebrity_name: str,
+        lookback_days: int = 730,  # 2 years
+        lookahead_days: int = 30,
+        limit: int = 25,
+        max_pages: int = 10
+    ) -> List[Dict]:
+        """
+        Search for podcast episodes featuring a celebrity.
+
+        Args:
+            celebrity_name: Name of the celebrity to search for
+            lookback_days: How many days back to search
+            lookahead_days: How many days forward to search (for scheduled releases)
+            limit: Maximum results per page
+
+        Returns:
+            List of podcast appearance dicts
+        """
+        appearances = []
+
+        # Calculate date range
+        now = datetime.now()
+        start_date = now - timedelta(days=lookback_days)
+        # Convert to Unix timestamp (seconds)
+        start_timestamp = int(start_date.timestamp())
+
+        query = """
+        query SearchPodcastEpisodes($term: String!, $limitPerPage: Int, $page: Int, $filterForPublishedAfter: Int) {
+            search(
+                term: $term,
+                filterForTypes: PODCASTEPISODE,
+                matchBy: EXACT_PHRASE,
+                limitPerPage: $limitPerPage,
+                page: $page,
+                filterForPublishedAfter: $filterForPublishedAfter
+            ) {
+                searchId
+                podcastEpisodes {
+                    uuid
+                    name
+                    description
+                    datePublished
+                    audioUrl
+                    persons {
+                        uuid
+                        name
+                        role
+                    }
+                    podcastSeries {
+                        uuid
+                        name
+                        imageUrl
+                    }
+                    websiteUrl
+                }
+            }
+        }
+        """
+
+        # Paginate through results (max 20 pages API limit, 25 per page = 500 max)
+        # max_pages passed as parameter from config
+        all_episodes = []
+
+        for page in range(1, max_pages + 1):
+            variables = {
+                "term": celebrity_name,
+                "limitPerPage": limit,
+                "page": page,
+                "filterForPublishedAfter": start_timestamp
+            }
+
+            data = await self._graphql_query(query, variables)
+
+            if not data or not data.get("search"):
+                break
+
+            episodes = data["search"].get("podcastEpisodes", [])
+            if not episodes:
+                break  # No more results
+
+            all_episodes.extend(episodes)
+
+            # If we got fewer than limit, we've reached the end
+            if len(episodes) < limit:
+                break
+
+            # Small delay between pages
+            await asyncio.sleep(0.2)
+
+        episodes = all_episodes
+
+        for ep in episodes:
+            try:
+                # Parse the episode data
+                podcast_series = ep.get("podcastSeries", {})
+                ep_name = (ep.get("name") or "")
+                podcast_name = (podcast_series.get("name") or "")
+                name_lower = celebrity_name.lower()
+                name_parts = name_lower.split()
+
+                # ===== USE PERSONS METADATA FOR ACCURATE FILTERING =====
+                # Check if celebrity is listed in the persons array with a role
+                persons = ep.get("persons", []) or []
+                person_match = None
+                credit_type = None
+
+                for person in persons:
+                    person_name = (person.get("name") or "").lower()
+                    # Match full name or last name
+                    if name_lower in person_name or person_name in name_lower:
+                        person_match = person
+                        role = (person.get("role") or "").lower()
+                        # Map Taddy roles to our credit types
+                        if "host" in role:
+                            credit_type = "host"
+                        elif "guest" in role:
+                            credit_type = "guest"
+                        elif role:
+                            credit_type = role  # Use whatever role they have
+                        else:
+                            credit_type = "guest"  # Default to guest if role not specified
+                        break
+                    # Also check by last name for partial matches
+                    elif len(name_parts) >= 2:
+                        last_name = name_parts[-1]
+                        first_name = name_parts[0]
+                        if len(last_name) >= 4 and (last_name in person_name or first_name in person_name):
+                            person_match = person
+                            role = (person.get("role") or "").lower()
+                            if "host" in role:
+                                credit_type = "host"
+                            elif "guest" in role:
+                                credit_type = "guest"
+                            elif role:
+                                credit_type = role
+                            else:
+                                credit_type = "guest"
+                            break
+
+                # If person is in the persons list, include the episode
+                if person_match:
+                    logger.debug(f"Accepting '{ep_name}' - {celebrity_name} listed as {credit_type} in persons metadata")
+                    is_host = (credit_type == "host")
+                else:
+                    # Fallback: check if they're the host via podcast series name
+                    podcast_name_lower = podcast_name.lower()
+                    is_host = name_lower in podcast_name_lower
+                    if not is_host and len(name_parts) >= 2:
+                        last_name = name_parts[-1]
+                        first_name = name_parts[0]
+                        if len(last_name) >= 4:
+                            is_host = (f"with {last_name}" in podcast_name_lower or
+                                       f"with {first_name}" in podcast_name_lower or
+                                       f"{first_name} {last_name}" in podcast_name_lower)
+
+                    if is_host:
+                        credit_type = "host"
+                        logger.debug(f"Accepting '{ep_name}' - host podcast (name in series title)")
+                    else:
+                        # No persons metadata - use WHITELIST approach
+                        # Only accept if title clearly indicates an interview/guest appearance
+                        ep_name_lower = ep_name.lower()
+                        if name_lower not in ep_name_lower:
+                            logger.debug(f"Skipping '{ep_name}' - name not in title")
+                            continue
+
+                        # Check podcast name for news/gossip shows first
+                        garbage_podcast_names = ['news', 'gossip', 'rumor', 'daily', 'trending', 'tmz', 'variety', 'march madness', 'cruz show', 'aesthetic arrest', 'devious maids']
+                        if any(word in podcast_name_lower for word in garbage_podcast_names):
+                            logger.debug(f"Skipping '{ep_name}' - podcast name suggests news/gossip")
+                            continue
+
+                        # Reject listicles (multiple comma-separated topics)
+                        comma_count = ep_name_lower.count(',')
+                        if comma_count >= 3:
+                            logger.debug(f"Skipping '{ep_name}' - listicle format ({comma_count} commas)")
+                            continue
+
+                        # WHITELIST: Only accept if title matches clear interview patterns
+                        interview_patterns = [
+                            # Direct interview indicators
+                            rf'(interview|interviews|interviewing)\s+(with\s+)?{re.escape(name_lower)}',
+                            rf'{re.escape(name_lower)}\s+(interview|interviewed)',
+                            # Guest indicators
+                            rf'(guest|featuring|feat\.?|ft\.?|with guest|special guest)[:\s]+{re.escape(name_lower)}',
+                            rf'{re.escape(name_lower)}\s+(joins|joined|stops by|sits down|talks|speaks|discusses|shares|reveals|opens up|gets real|gets honest)',
+                            # "Name on Topic" format (common interview title)
+                            rf'^{re.escape(name_lower)}\s+on\s+',
+                            # Episode number + name format ("Ep 123: Name...")
+                            rf'^(ep\.?|episode|#)\s*\d+[:\s]+{re.escape(name_lower)}',
+                            # Name at start followed by colon or dash (interview format)
+                            rf'^{re.escape(name_lower)}\s*[:\-–—]\s*',
+                            # "Conversation with Name"
+                            rf'(conversation|chat|talk|talking|speaking)\s+with\s+{re.escape(name_lower)}',
+                            # "Name Returns" / "Name is Back"
+                            rf'{re.escape(name_lower)}\s+(returns|is back|comes back)',
+                            # Q&A format
+                            rf'(q&a|q\s*&\s*a|ama)\s+(with\s+)?{re.escape(name_lower)}',
+                            # Podcast-specific patterns
+                            rf'{re.escape(name_lower)}\s+(live|in studio|in the studio|on the show|on the pod)',
+                        ]
+
+                        is_interview = False
+                        for pattern in interview_patterns:
+                            if re.search(pattern, ep_name_lower):
+                                is_interview = True
+                                logger.debug(f"Accepting '{ep_name}' - matches interview pattern")
+                                break
+
+                        if not is_interview:
+                            logger.debug(f"Skipping '{ep_name}' - no interview pattern match (name just mentioned)")
+                            continue
+
+                        credit_type = "guest"
+
+                # Get the artwork URL from podcast series
+                artwork_url = podcast_series.get("imageUrl")
+
+                # Parse date
+                date_published = ep.get("datePublished")
+                if date_published:
+                    # Taddy returns Unix timestamp in seconds
+                    try:
+                        pub_date = datetime.fromtimestamp(date_published)
+                        appearance_date = pub_date.strftime("%Y-%m-%d")
+                        status = "upcoming" if pub_date.date() > now.date() else "aired"
+                    except (ValueError, TypeError):
+                        appearance_date = None
+                        status = "aired"
+                else:
+                    appearance_date = None
+                    status = "aired"
+
+                # Get episode URL
+                episode_url = ep.get("websiteUrl")
+
+                appearance = {
+                    "appearance_type": "Podcast",
+                    "show_name": podcast_series.get("name", "Unknown Podcast"),
+                    "episode_title": ep.get("name"),
+                    "appearance_date": appearance_date,
+                    "status": status,
+                    "description": strip_html(ep.get("description")),
+                    "poster_url": artwork_url,
+                    "audio_url": ep.get("audioUrl"),
+                    "url": episode_url,
+                    "credit_type": credit_type or ("host" if is_host else "guest"),
+                    "character_name": "Self",
+                    "taddy_episode_uuid": ep.get("uuid"),
+                    "taddy_podcast_uuid": podcast_series.get("uuid"),
+                    "duration_seconds": None,  # Duration removed from query to reduce complexity
+                }
+
+                appearances.append(appearance)
+                logger.info(f"Found podcast appearance: {celebrity_name} on '{podcast_series.get('name')}' - {ep.get('name')}")
+
+            except Exception as e:
+                logger.error(f"Error parsing Taddy episode: {e}")
+                continue
+
+        return appearances
+
+    async def test_connection(self) -> bool:
+        """Test if the API credentials are valid"""
+        query = """
+        query TestConnection {
+            search(term: "test", filterForTypes: PODCASTSERIES, limitPerPage: 1) {
+                searchId
+            }
+        }
+        """
+
+        data = await self._graphql_query(query)
+        return data is not None
--- a/modules/task_checkpoint.py
+++ b/modules/task_checkpoint.py
@@ -0,0 +1,295 @@
+"""
+Task Checkpoint Module for Crash Recovery
+
+Tracks progress of long-running scheduler tasks so that if the scheduler
+crashes mid-task, it can resume from where it left off instead of
+re-processing everything from scratch.
+
+Uses the scheduler_state database (PostgreSQL via pgadapter).
+"""
+
+import json
+import sqlite3
+import threading
+import time
+from contextlib import closing
+from datetime import datetime
+from pathlib import Path
+from typing import Callable, List, Optional, Set
+
+from modules.universal_logger import get_logger
+
+logger = get_logger('TaskCheckpoint')
+
+# Path to the scheduler state database
+_SCHEDULER_DB_PATH = Path(__file__).parent.parent / 'database' / 'scheduler_state.db'
+
+# How many items to buffer before flushing to DB
+_FLUSH_INTERVAL = 5
+
+# Stale checkpoint threshold (hours) — abandon checkpoints older than this
+STALE_THRESHOLD_HOURS = 48
+
+
+class TaskCheckpoint:
+    """Track progress of a scheduler task for crash recovery.
+
+    Usage::
+
+        checkpoint = TaskCheckpoint('instagram_unified:all')
+        checkpoint.start(total_items=len(accounts))
+        for account in accounts:
+            if checkpoint.is_completed(account['username']):
+                continue
+            checkpoint.set_current(account['username'])
+            process(account)
+            checkpoint.mark_completed(account['username'])
+        checkpoint.finish()
+    """
+
+    def __init__(self, task_id: str, task_type: str = 'scraping'):
+        self.task_id = task_id
+        self.task_type = task_type
+        self._started = False
+        self._recovering = False
+        self._completed_items: Set[str] = set()
+        self._pending_flush: List[str] = []  # items not yet flushed to DB
+        self._current_item: Optional[str] = None
+        self._total_items: int = 0
+        self._lock = threading.Lock()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def start(self, total_items: int = 0):
+        """Create or resume a checkpoint record.
+
+        If a prior checkpoint exists for this task_id (left behind by a crash),
+        we load the completed items from it and set recovery mode.
+        """
+        self._total_items = total_items
+        self._started = True
+
+        existing = self._load_existing()
+        if existing is not None:
+            # Resuming from a crash
+            self._completed_items = existing
+            self._recovering = True
+            logger.info(
+                f"Resuming checkpoint for {self.task_id}: "
+                f"{len(self._completed_items)}/{total_items} items already completed",
+                module='Checkpoint',
+            )
+        else:
+            # Fresh run
+            self._completed_items = set()
+            self._recovering = False
+            self._create_record(total_items)
+
+    def is_recovering(self) -> bool:
+        """True if we are resuming from a prior crash."""
+        return self._recovering
+
+    def is_completed(self, item_id: str) -> bool:
+        """Check whether *item_id* was already processed in a previous run."""
+        return str(item_id) in self._completed_items
+
+    def get_remaining(self, items: list, key_fn: Callable) -> list:
+        """Return only items not yet completed.
+
+        Args:
+            items: Full list of items.
+            key_fn: Function that extracts the item key from each element.
+        """
+        return [item for item in items if str(key_fn(item)) not in self._completed_items]
+
+    def set_current(self, item_id: str):
+        """Record which item is currently being processed (for crash diagnostics)."""
+        self._current_item = str(item_id)
+        self._update_current_item()
+
+    def mark_completed(self, item_id: str):
+        """Mark an item as done. Batches DB writes every _FLUSH_INTERVAL items."""
+        item_id = str(item_id)
+        with self._lock:
+            self._completed_items.add(item_id)
+            self._pending_flush.append(item_id)
+            should_flush = len(self._pending_flush) >= _FLUSH_INTERVAL
+        if should_flush:
+            self._flush()
+
+    def finish(self):
+        """Task completed successfully — delete the checkpoint record."""
+        if not self._started:
+            return
+        self._flush()  # flush any remaining items
+        self._delete_record()
+        self._started = False
+
+    def finish_if_started(self):
+        """No-op if start() was never called; otherwise calls finish()."""
+        if self._started:
+            self.finish()
+
+    # ------------------------------------------------------------------
+    # Class methods for discovery
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def get_interrupted(cls) -> list:
+        """Find checkpoint records left behind by crashed tasks.
+
+        Returns a list of dicts with keys:
+            task_id, task_type, started_at, completed_count, total_items, current_item
+        """
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    "SELECT task_id, task_type, started_at, completed_items, "
+                    "total_items, current_item FROM scheduler_task_checkpoints "
+                    "WHERE status = 'running'"
+                )
+                rows = cursor.fetchall()
+
+            results = []
+            for row in rows:
+                task_id, task_type, started_at, completed_json, total_items, current_item = row
+                completed = cls._parse_completed_json(completed_json)
+                results.append({
+                    'task_id': task_id,
+                    'task_type': task_type,
+                    'started_at': started_at,
+                    'completed_count': len(completed),
+                    'total_items': total_items or 0,
+                    'current_item': current_item,
+                })
+            return results
+        except Exception as e:
+            if 'no such table' not in str(e).lower():
+                logger.warning(f"Error reading interrupted checkpoints: {e}", module='Checkpoint')
+            return []
+
+    @classmethod
+    def abandon(cls, task_id: str):
+        """Mark a checkpoint as abandoned (e.g. task no longer registered)."""
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                conn.execute(
+                    "UPDATE scheduler_task_checkpoints SET status = 'abandoned', "
+                    "updated_at = ? WHERE task_id = ?",
+                    (datetime.now().isoformat(), task_id),
+                )
+                conn.commit()
+        except Exception as e:
+            logger.warning(f"Error abandoning checkpoint {task_id}: {e}", module='Checkpoint')
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _load_existing(self) -> Optional[Set[str]]:
+        """Load completed items from an existing checkpoint, or return None."""
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    "SELECT completed_items FROM scheduler_task_checkpoints "
+                    "WHERE task_id = ? AND status = 'running'",
+                    (self.task_id,),
+                )
+                row = cursor.fetchone()
+                if row is None:
+                    return None
+                return self._parse_completed_json(row[0])
+        except Exception as e:
+            if 'no such table' not in str(e).lower():
+                logger.warning(f"Error loading checkpoint for {self.task_id}: {e}", module='Checkpoint')
+            return None
+
+    def _create_record(self, total_items: int):
+        """Insert a fresh checkpoint row (or replace an existing abandoned one)."""
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                conn.execute(
+                    "INSERT OR REPLACE INTO scheduler_task_checkpoints "
+                    "(task_id, task_type, started_at, completed_items, current_item, "
+                    "total_items, status, updated_at) "
+                    "VALUES (?, ?, ?, '[]', NULL, ?, 'running', ?)",
+                    (
+                        self.task_id,
+                        self.task_type,
+                        datetime.now().isoformat(),
+                        total_items,
+                        datetime.now().isoformat(),
+                    ),
+                )
+                conn.commit()
+        except Exception as e:
+            logger.warning(f"Error creating checkpoint for {self.task_id}: {e}", module='Checkpoint')
+
+    def _flush(self):
+        """Write pending completed items to the database."""
+        with self._lock:
+            if not self._pending_flush:
+                return
+            items_snapshot = list(self._completed_items)
+            self._pending_flush.clear()
+
+        try:
+            completed_json = json.dumps(items_snapshot)
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                conn.execute(
+                    "UPDATE scheduler_task_checkpoints "
+                    "SET completed_items = ?, total_items = ?, updated_at = ? "
+                    "WHERE task_id = ?",
+                    (
+                        completed_json,
+                        self._total_items,
+                        datetime.now().isoformat(),
+                        self.task_id,
+                    ),
+                )
+                conn.commit()
+        except Exception as e:
+            logger.warning(f"Error flushing checkpoint for {self.task_id}: {e}", module='Checkpoint')
+
+    def _update_current_item(self):
+        """Update the current_item column for crash diagnostics."""
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                conn.execute(
+                    "UPDATE scheduler_task_checkpoints "
+                    "SET current_item = ?, updated_at = ? WHERE task_id = ?",
+                    (self._current_item, datetime.now().isoformat(), self.task_id),
+                )
+                conn.commit()
+        except Exception as e:
+            # Non-critical — just diagnostics
+            pass
+
+    def _delete_record(self):
+        """Remove the checkpoint row on successful completion."""
+        try:
+            with closing(sqlite3.connect(str(_SCHEDULER_DB_PATH), timeout=10)) as conn:
+                conn.execute(
+                    "DELETE FROM scheduler_task_checkpoints WHERE task_id = ?",
+                    (self.task_id,),
+                )
+                conn.commit()
+        except Exception as e:
+            logger.warning(f"Error deleting checkpoint for {self.task_id}: {e}", module='Checkpoint')
+
+    @staticmethod
+    def _parse_completed_json(raw: str) -> Set[str]:
+        """Parse JSON array of completed item IDs, tolerating corruption."""
+        if not raw:
+            return set()
+        try:
+            items = json.loads(raw)
+            if isinstance(items, list):
+                return set(str(i) for i in items)
+        except (json.JSONDecodeError, TypeError):
+            logger.warning("Corrupted checkpoint data — starting fresh (scrapers deduplicate)", module='Checkpoint')
+        return set()
--- a/modules/thumbnail_cache_builder.py
+++ b/modules/thumbnail_cache_builder.py
@@ -0,0 +1,639 @@
+#!/usr/bin/env python3
+"""
+Background worker to pre-generate thumbnails and cache metadata for all media files.
+This improves performance by generating thumbnails in advance rather than on-demand.
+"""
+
+import sys
+import os
+import time
+import hashlib
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import io
+
+# Add parent directory to path so we can import modules
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+# Bootstrap database backend (must be before any database imports)
+import modules.db_bootstrap  # noqa: E402,F401
+
+import sqlite3
+
+from modules.universal_logger import get_logger
+
+logger = get_logger('ThumbnailCacheBuilder')
+
+
+class ThumbnailCacheBuilder:
+    """Build and maintain thumbnail and metadata cache for media files"""
+
+    def __init__(self):
+        self.scan_dirs = [
+            Path('/opt/immich/md'),
+            Path('/opt/immich/review'),
+            Path('/opt/immich/recycle')
+        ]
+        self.db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
+        self.metadata_db_path = Path(__file__).parent.parent / 'database' / 'media_metadata.db'
+        self.unified_db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
+        self.max_thumb_size = (300, 300)
+
+        # Image and video extensions
+        self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp'}
+        self.video_extensions = {'.mp4', '.mov', '.webm', '.avi', '.mkv', '.flv', '.m4v'}
+
+        self.stats = {
+            'processed': 0,
+            'thumbnails_created': 0,
+            'thumbnails_cached': 0,
+            'metadata_cached': 0,
+            'errors': 0,
+            'skipped': 0
+        }
+
+        self._init_metadata_db()
+
+    def _init_metadata_db(self):
+        """Initialize metadata cache database"""
+        self.metadata_db_path.parent.mkdir(parents=True, exist_ok=True)
+
+        conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
+        conn.execute('PRAGMA journal_mode=WAL')
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS media_metadata (
+                file_hash TEXT PRIMARY KEY,
+                file_path TEXT NOT NULL,
+                width INTEGER,
+                height INTEGER,
+                file_size INTEGER,
+                duration REAL,
+                format TEXT,
+                created_at TEXT,
+                file_mtime DOUBLE PRECISION
+            )
+        """)
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_meta_file_path ON media_metadata(file_path)")
+        conn.commit()
+        conn.close()
+
+        logger.info(f"Metadata database initialized at {self.metadata_db_path}", module="Database")
+
+    def _get_file_hash(self, file_path: Path, content_hash: str = None) -> str:
+        """Generate hash for file path or use content hash
+
+        Args:
+            file_path: Path to the file
+            content_hash: Optional SHA256 content hash from database (preferred for recycle bin)
+        """
+        if content_hash:
+            # Use first 64 chars of content hash (full SHA256 for cache key)
+            return content_hash[:64]
+        # Fall back to path-based hash
+        return hashlib.sha256(str(file_path).encode()).hexdigest()
+
+    def _generate_image_thumbnail(self, file_path: Path) -> tuple:
+        """Generate thumbnail and extract metadata for image
+        Returns: (thumbnail_data, width, height, format)
+        """
+        try:
+            with Image.open(file_path) as img:
+                # Get original dimensions
+                width, height = img.size
+                img_format = img.format
+
+                # Convert RGBA to RGB if needed
+                if img.mode == 'RGBA':
+                    background = Image.new('RGB', img.size, (255, 255, 255))
+                    background.paste(img, mask=img.split()[3])
+                    img = background
+                elif img.mode != 'RGB':
+                    img = img.convert('RGB')
+
+                # Generate thumbnail
+                img.thumbnail(self.max_thumb_size, Image.Resampling.LANCZOS)
+
+                # Save to bytes
+                buffer = io.BytesIO()
+                img.save(buffer, format='JPEG', quality=85, optimize=True)
+                thumbnail_data = buffer.getvalue()
+
+                return thumbnail_data, width, height, img_format
+        except Exception as e:
+            logger.error(f"Error generating image thumbnail for {file_path}: {e}", module="Error")
+            return None, None, None, None
+
+    def _generate_video_thumbnail(self, file_path: Path) -> tuple:
+        """Generate thumbnail and extract metadata for video using ffmpeg
+        Returns: (thumbnail_data, width, height, duration)
+        """
+        try:
+            import subprocess
+            import json
+
+            # Get video metadata using ffprobe
+            probe_cmd = [
+                'ffprobe',
+                '-v', 'quiet',
+                '-print_format', 'json',
+                '-show_format',
+                '-show_streams',
+                str(file_path)
+            ]
+
+            result = subprocess.run(probe_cmd, capture_output=True, text=True, timeout=30)
+            if result.returncode != 0:
+                logger.error(f"ffprobe failed for {file_path}", module="Error")
+                return None, None, None, None
+
+            metadata = json.loads(result.stdout)
+
+            # Extract video stream info
+            video_stream = next((s for s in metadata.get('streams', []) if s.get('codec_type') == 'video'), None)
+            if not video_stream:
+                return None, None, None, None
+
+            width = video_stream.get('width')
+            height = video_stream.get('height')
+            duration = float(metadata.get('format', {}).get('duration', 0))
+
+            # Generate thumbnail - seek to 1s or 0s for very short videos
+            temp_output = f"/tmp/thumb_{os.getpid()}.jpg"
+            seek_time = '00:00:01' if duration > 1.5 else '00:00:00'
+
+            thumb_cmd = [
+                'ffmpeg',
+                '-ss', seek_time,
+                '-i', str(file_path),
+                '-vframes', '1',
+                '-vf', f'scale={self.max_thumb_size[0]}:{self.max_thumb_size[1]}:force_original_aspect_ratio=decrease',
+                '-y',
+                temp_output
+            ]
+
+            result = subprocess.run(thumb_cmd, capture_output=True, timeout=30)
+            if result.returncode != 0 or not Path(temp_output).exists():
+                logger.error(f"ffmpeg thumbnail generation failed for {file_path}", module="Error")
+                return None, width, height, duration
+
+            # Read thumbnail data
+            with open(temp_output, 'rb') as f:
+                thumbnail_data = f.read()
+
+            # Clean up temp file
+            Path(temp_output).unlink(missing_ok=True)
+
+            return thumbnail_data, width, height, duration
+
+        except Exception as e:
+            logger.error(f"Error generating video thumbnail for {file_path}: {e}", module="Error")
+            return None, None, None, None
+
+    def _cache_thumbnail(self, file_path: Path, thumbnail_data: bytes, content_hash: str = None):
+        """Store thumbnail in cache database
+
+        Args:
+            file_path: Path to the file
+            thumbnail_data: JPEG thumbnail data
+            content_hash: Optional SHA256 content hash from database
+        """
+        try:
+            file_hash = self._get_file_hash(file_path, content_hash)
+            file_mtime = file_path.stat().st_mtime
+
+            conn = sqlite3.connect(str(self.db_path), timeout=30.0)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute("""
+                INSERT OR REPLACE INTO thumbnails
+                (file_hash, file_path, thumbnail_data, created_at, file_mtime)
+                VALUES (?, ?, ?, ?, ?)
+            """, (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
+            conn.commit()
+            conn.close()
+
+            return True
+        except Exception as e:
+            logger.error(f"Error caching thumbnail for {file_path}: {e}", module="Error")
+            return False
+
+    def _cache_metadata(self, file_path: Path, width: int, height: int, duration: float = None, format_type: str = None, content_hash: str = None):
+        """Store metadata in cache database
+
+        Args:
+            file_path: Path to the file
+            width: Image/video width
+            height: Image/video height
+            duration: Video duration (seconds)
+            format_type: Media format
+            content_hash: Optional SHA256 content hash from database
+        """
+        try:
+            file_hash = self._get_file_hash(file_path, content_hash)
+            file_mtime = file_path.stat().st_mtime
+            file_size = file_path.stat().st_size
+
+            conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute("""
+                INSERT OR REPLACE INTO media_metadata
+                (file_hash, file_path, width, height, file_size, duration, format, created_at, file_mtime)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (file_hash, str(file_path), width, height, file_size, duration, format_type,
+                  datetime.now().isoformat(), file_mtime))
+            conn.commit()
+            conn.close()
+
+            return True
+        except Exception as e:
+            logger.error(f"Error caching metadata for {file_path}: {e}", module="Error")
+            return False
+
+    def _is_cached_valid(self, file_path: Path, content_hash: str = None) -> bool:
+        """Check if file already has valid cached thumbnail and metadata
+
+        Args:
+            file_path: Path to the file
+            content_hash: Optional SHA256 content hash from database
+        """
+        try:
+            file_hash = self._get_file_hash(file_path, content_hash)
+            file_mtime = file_path.stat().st_mtime
+
+            # Check thumbnail cache
+            conn = sqlite3.connect(str(self.db_path), timeout=30.0)
+            conn.execute('PRAGMA journal_mode=WAL')
+            cursor = conn.execute(
+                "SELECT file_mtime FROM thumbnails WHERE file_hash = ?",
+                (file_hash,)
+            )
+            thumb_result = cursor.fetchone()
+            conn.close()
+
+            if not thumb_result or abs(thumb_result[0] - file_mtime) > 1:
+                return False
+
+            # Check metadata cache
+            conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
+            conn.execute('PRAGMA journal_mode=WAL')
+            cursor = conn.execute(
+                "SELECT file_mtime FROM media_metadata WHERE file_hash = ?",
+                (file_hash,)
+            )
+            meta_result = cursor.fetchone()
+            conn.close()
+
+            if not meta_result or abs(meta_result[0] - file_mtime) > 1:
+                return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Error checking cache for {file_path}: {e}", module="Error")
+            return False
+
+    def process_file(self, file_path: Path, content_hash: str = None) -> bool:
+        """Process a single file - generate thumbnail and cache metadata
+
+        Args:
+            file_path: Path to the file
+            content_hash: Optional SHA256 content hash from database (preferred for cache key)
+        """
+        try:
+            if not file_path.exists():
+                self.stats['skipped'] += 1
+                return True
+
+            # Check if already cached and up-to-date
+            if self._is_cached_valid(file_path, content_hash):
+                self.stats['skipped'] += 1
+                return True
+
+            file_ext = file_path.suffix.lower()
+
+            if file_ext in self.image_extensions:
+                # Process image
+                thumbnail_data, width, height, format_type = self._generate_image_thumbnail(file_path)
+
+                if thumbnail_data and width and height:
+                    # Cache thumbnail
+                    if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
+                        self.stats['thumbnails_created'] += 1
+
+                    # Cache metadata
+                    if self._cache_metadata(file_path, width, height, format_type=format_type, content_hash=content_hash):
+                        self.stats['metadata_cached'] += 1
+
+                    return True
+                else:
+                    self.stats['errors'] += 1
+                    return False
+
+            elif file_ext in self.video_extensions:
+                # Process video
+                thumbnail_data, width, height, duration = self._generate_video_thumbnail(file_path)
+
+                # Cache thumbnail if generated
+                if thumbnail_data:
+                    if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
+                        self.stats['thumbnails_created'] += 1
+
+                # Cache metadata if we have dimensions
+                if width and height:
+                    if self._cache_metadata(file_path, width, height, duration=duration, format_type='video', content_hash=content_hash):
+                        self.stats['metadata_cached'] += 1
+
+                # Consider successful even if thumbnail failed (metadata might still be cached)
+                if width and height:
+                    return True
+                else:
+                    self.stats['errors'] += 1
+                    return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Error processing file {file_path}: {e}", module="Error")
+            self.stats['errors'] += 1
+            return False
+
+    def _get_files_from_inventory(self) -> list:
+        """Query file_inventory table for all media files (database-first)
+        Returns: List of tuples (file_path, content_hash or None)
+        """
+        try:
+            conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
+            conn.row_factory = sqlite3.Row
+            cursor = conn.cursor()
+
+            # Query all files from file_inventory (any location: final, review, recycle)
+            # Include file_hash from recycle_bin if file is in recycle location
+            cursor.execute("""
+                SELECT
+                    fi.file_path,
+                    fi.content_type,
+                    fi.location,
+                    rb.file_hash as content_hash
+                FROM file_inventory fi
+                LEFT JOIN recycle_bin rb ON fi.file_path = rb.recycle_path
+                ORDER BY fi.created_date DESC
+            """)
+
+            rows = cursor.fetchall()
+            conn.close()
+
+            # Convert to Path objects and filter by extension
+            all_extensions = list(self.image_extensions) + list(self.video_extensions)
+            files = []
+
+            for row in rows:
+                file_path = Path(row['file_path'])
+                if file_path.suffix.lower() in all_extensions and file_path.exists():
+                    # Return tuple: (file_path, content_hash or None)
+                    content_hash = row['content_hash'] if row['content_hash'] else None
+                    files.append((file_path, content_hash))
+
+            return files
+
+        except Exception as e:
+            logger.error(f"Error querying file_inventory: {e}", module="Error")
+            # Fallback to filesystem scan if database query fails
+            logger.warning("Falling back to filesystem scan...", module="Warning")
+            return self._fallback_filesystem_scan()
+
+    def _fallback_filesystem_scan(self) -> list:
+        """Fallback: Scan filesystem if database query fails
+        Returns: List of tuples (file_path, None) - no content_hash available from filesystem
+        """
+        all_files = []
+        for scan_dir in self.scan_dirs:
+            if not scan_dir.exists():
+                continue
+            for ext in list(self.image_extensions) + list(self.video_extensions):
+                # Return tuples: (file_path, None) - no content hash from filesystem scan
+                all_files.extend([(f, None) for f in scan_dir.rglob(f"*{ext}")])
+        return all_files
+
+    def scan_and_process(self):
+        """Query file_inventory and process all files (database-first)"""
+        logger.info("Starting thumbnail and metadata cache build...", module="Core")
+        logger.info("Querying file_inventory table (database-first architecture)...", module="Core")
+
+        start_time = time.time()
+
+        # Query file_inventory instead of scanning filesystem
+        # Returns list of tuples: (file_path, content_hash or None)
+        all_files = self._get_files_from_inventory()
+
+        total_files = len(all_files)
+        logger.info(f"Found {total_files} media files to process from file_inventory", module="Core")
+
+        # Count how many have content hashes (from recycle bin)
+        files_with_hash = sum(1 for _, content_hash in all_files if content_hash)
+        if files_with_hash > 0:
+            logger.info(f"  - {files_with_hash} files have content hash (from recycle bin - cache survives moves)", module="Core")
+
+        # Process files with progress updates
+        for i, (file_path, content_hash) in enumerate(all_files, 1):
+            self.process_file(file_path, content_hash)
+            self.stats['processed'] += 1
+
+            # Progress update every 100 files
+            if i % 100 == 0 or i == total_files:
+                elapsed = time.time() - start_time
+                rate = i / elapsed if elapsed > 0 else 0
+                eta = (total_files - i) / rate if rate > 0 else 0
+
+                logger.info(f"Progress: {i}/{total_files} ({i/total_files*100:.1f}%) - "
+                           f"Rate: {rate:.1f} files/sec - ETA: {eta/60:.1f} min", module="Core")
+
+        # Final statistics
+        elapsed = time.time() - start_time
+        logger.info("=" * 60, module="Core")
+        logger.info("Thumbnail and Metadata Cache Build Complete", module="Core")
+        logger.info("=" * 60, module="Core")
+        logger.info(f"Total files processed: {self.stats['processed']}", module="Core")
+        logger.info(f"Thumbnails created: {self.stats['thumbnails_created']}", module="Core")
+        logger.info(f"Metadata cached: {self.stats['metadata_cached']}", module="Core")
+        logger.info(f"Files skipped (already cached): {self.stats['skipped']}", module="Core")
+        logger.info(f"Errors: {self.stats['errors']}", module="Core")
+        logger.info(f"Total time: {elapsed/60:.1f} minutes", module="Core")
+        logger.info(f"Average rate: {self.stats['processed']/elapsed:.1f} files/sec", module="Core")
+        logger.info("=" * 60, module="Core")
+
+    def cleanup_orphaned_records(self):
+        """Clean up orphaned database records for files that no longer exist"""
+        logger.info("Starting database cleanup for orphaned records...", module="Cleanup")
+        cleanup_stats = {
+            'face_recognition_scans': 0,
+            'downloads': 0,
+            'media_metadata': 0,
+            'thumbnail_cache': 0
+        }
+
+        conn = None
+        meta_conn = None
+        thumb_conn = None
+        main_conn = None
+
+        try:
+            # Clean up face_recognition_scans for files not in file_inventory
+            conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
+            cursor = conn.cursor()
+
+            # Find orphaned face_recognition_scans (files not in file_inventory)
+            cursor.execute("""
+                SELECT COUNT(*) FROM face_recognition_scans frs
+                WHERE NOT EXISTS (
+                    SELECT 1 FROM file_inventory fi WHERE fi.file_path = frs.file_path
+                )
+            """)
+            orphaned_count = cursor.fetchone()[0]
+
+            if orphaned_count > 0:
+                cursor.execute("""
+                    DELETE FROM face_recognition_scans
+                    WHERE NOT EXISTS (
+                        SELECT 1 FROM file_inventory fi WHERE fi.file_path = face_recognition_scans.file_path
+                    )
+                """)
+                conn.commit()
+                cleanup_stats['face_recognition_scans'] = orphaned_count
+                logger.info(f"Removed {orphaned_count} orphaned face_recognition_scans records", module="Cleanup")
+
+            # Clean up downloads for files not in file_inventory
+            cursor.execute("""
+                SELECT COUNT(*) FROM downloads d
+                WHERE d.file_path IS NOT NULL AND d.file_path != ''
+                AND NOT EXISTS (
+                    SELECT 1 FROM file_inventory fi WHERE fi.file_path = d.file_path
+                )
+            """)
+            orphaned_downloads = cursor.fetchone()[0]
+
+            if orphaned_downloads > 0:
+                cursor.execute("""
+                    DELETE FROM downloads
+                    WHERE file_path IS NOT NULL AND file_path != ''
+                    AND NOT EXISTS (
+                        SELECT 1 FROM file_inventory fi WHERE fi.file_path = downloads.file_path
+                    )
+                """)
+                conn.commit()
+                cleanup_stats['downloads'] = orphaned_downloads
+                logger.info(f"Removed {orphaned_downloads} orphaned downloads records", module="Cleanup")
+
+            conn.close()
+
+            # Clean up media_metadata cache for files not in file_inventory
+            try:
+                meta_conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
+                main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
+
+                # Get list of valid file paths from file_inventory
+                main_cursor = main_conn.cursor()
+                main_cursor.execute("SELECT file_path FROM file_inventory")
+                valid_paths = set(row[0] for row in main_cursor.fetchall())
+                main_conn.close()
+
+                # Check metadata for orphans
+                meta_cursor = meta_conn.cursor()
+                meta_cursor.execute("SELECT file_path FROM media_metadata")
+                all_meta_paths = [row[0] for row in meta_cursor.fetchall()]
+
+                orphaned_meta = [p for p in all_meta_paths if p not in valid_paths]
+                if orphaned_meta:
+                    placeholders = ','.join(['?' for _ in orphaned_meta])
+                    meta_cursor.execute(f"DELETE FROM media_metadata WHERE file_path IN ({placeholders})", orphaned_meta)
+                    meta_conn.commit()
+                    cleanup_stats['media_metadata'] = len(orphaned_meta)
+                    logger.info(f"Removed {len(orphaned_meta)} orphaned media_metadata records", module="Cleanup")
+
+                meta_conn.close()
+            except Exception:
+                pass  # metadata cleanup is non-critical
+
+            # Clean up thumbnail cache for files not in file_inventory
+            thumb_db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
+            try:
+                thumb_conn = sqlite3.connect(str(thumb_db_path), timeout=30.0)
+                main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
+
+                # Get list of valid file paths
+                main_cursor = main_conn.cursor()
+                main_cursor.execute("SELECT file_path FROM file_inventory")
+                valid_paths = set(row[0] for row in main_cursor.fetchall())
+                main_conn.close()
+
+                # Check thumbnails for orphans
+                thumb_cursor = thumb_conn.cursor()
+                # Thumbnails use file_hash as key, so we need to check existence differently
+                try:
+                    thumb_cursor.execute("SELECT file_path FROM thumbnails WHERE file_path IS NOT NULL")
+                    all_thumb_paths = [row[0] for row in thumb_cursor.fetchall()]
+
+                    orphaned_thumbs = [p for p in all_thumb_paths if p and p not in valid_paths]
+                    if orphaned_thumbs:
+                        placeholders = ','.join(['?' for _ in orphaned_thumbs])
+                        thumb_cursor.execute(f"DELETE FROM thumbnails WHERE file_path IN ({placeholders})", orphaned_thumbs)
+                        thumb_conn.commit()
+                        cleanup_stats['thumbnail_cache'] = len(orphaned_thumbs)
+                        logger.info(f"Removed {len(orphaned_thumbs)} orphaned thumbnail records", module="Cleanup")
+                except sqlite3.OperationalError:
+                    # Table structure may not have file_path column
+                    pass
+
+                thumb_conn.close()
+            except Exception:
+                pass  # thumbnail cleanup is non-critical
+
+            # Log summary
+            total_cleaned = sum(cleanup_stats.values())
+            logger.info("=" * 60, module="Cleanup")
+            logger.info("Database Cleanup Complete", module="Cleanup")
+            logger.info("=" * 60, module="Cleanup")
+            logger.info(f"Total orphaned records removed: {total_cleaned}", module="Cleanup")
+            for table, count in cleanup_stats.items():
+                if count > 0:
+                    logger.info(f"  - {table}: {count}", module="Cleanup")
+            logger.info("=" * 60, module="Cleanup")
+
+            return cleanup_stats
+
+        except Exception as e:
+            logger.error(f"Error during database cleanup: {e}", exc_info=True, module="Error")
+            return cleanup_stats
+        finally:
+            # Ensure all database connections are closed
+            for connection in [conn, meta_conn, thumb_conn, main_conn]:
+                if connection:
+                    try:
+                        connection.close()
+                    except Exception:
+                        pass  # Best effort cleanup
+
+
+def main():
+    """Main entry point"""
+    logger.info("Thumbnail Cache Builder starting...", module="Core")
+
+    try:
+        builder = ThumbnailCacheBuilder()
+
+        # Run database cleanup first (before processing)
+        logger.info("Phase 1: Database cleanup for orphaned records", module="Core")
+        builder.cleanup_orphaned_records()
+
+        # Then process thumbnails and metadata
+        logger.info("Phase 2: Thumbnail and metadata cache building", module="Core")
+        builder.scan_and_process()
+
+        logger.info("Thumbnail Cache Builder completed successfully", module="Core")
+        return 0
+    except Exception as e:
+        logger.error(f"Fatal error in Thumbnail Cache Builder: {e}", exc_info=True, module="Error")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/modules/tiktok_db_adapter.py
+++ b/modules/tiktok_db_adapter.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+TikTok Database Adapter for Unified Database
+Provides compatibility layer between TikTok module and unified database
+"""
+
+from typing import Optional, Dict
+from datetime import datetime
+import json
+
+class TikTokDatabaseAdapter:
+    """Adapter to make unified database work with TikTok module"""
+    
+    def __init__(self, unified_db):
+        """Initialize adapter with unified database instance"""
+        self.unified_db = unified_db
+        self.platform = 'tiktok'
+
+    def get_file_hash(self, file_path: str) -> Optional[str]:
+        """Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
+        return self.unified_db.get_file_hash(file_path)
+
+    def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
+        """Get download record by file hash (delegates to UnifiedDatabase)"""
+        return self.unified_db.get_download_by_file_hash(file_hash)
+
+    def record_download(self, video_id: str, username: str, filename: str,
+                       post_date: Optional[datetime] = None, metadata: Dict = None,
+                       file_path: str = None):
+        """Record a TikTok download in the unified database"""
+        # Convert TikTok's video_id to a URL format for unified database
+        # For carousel photos, append filename to make URL unique (otherwise url_hash collision)
+        url = f"https://www.tiktok.com/@{username}/video/{video_id}#{filename}"
+
+        # Calculate file hash if file_path provided
+        file_hash = None
+        if file_path:
+            try:
+                from pathlib import Path
+                if Path(file_path).exists():
+                    file_hash = self.unified_db.get_file_hash(file_path)
+            except Exception:
+                pass  # If hash fails, continue without it
+
+        # Detect content type from file extension
+        from pathlib import Path
+        ext = Path(filename).suffix.lower()
+        image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
+        content_type = 'image' if ext in image_exts else 'video'
+
+        return self.unified_db.record_download(
+            url=url,
+            platform=self.platform,
+            source=username,
+            content_type=content_type,
+            filename=filename,
+            post_date=post_date,
+            metadata=metadata,
+            file_hash=file_hash,
+            file_path=file_path
+        )
+    
+    def is_downloaded(self, video_id: str, username: str = None) -> bool:
+        """Check if a video has been downloaded"""
+        # Check if ANY file from this video_id has been downloaded
+        # (For carousels, URLs include #filename so we need to search by video_id pattern)
+        try:
+            import sqlite3
+            with self.unified_db.get_connection() as conn:
+                cursor = conn.cursor()
+                # Search for URLs containing this video_id
+                if username:
+                    url_pattern = f"https://www.tiktok.com/@{username}/video/{video_id}%"
+                else:
+                    url_pattern = f"%/video/{video_id}%"
+
+                cursor.execute(
+                    "SELECT 1 FROM downloads WHERE url LIKE ? AND platform = ? LIMIT 1",
+                    (url_pattern, self.platform)
+                )
+                return cursor.fetchone() is not None
+        except Exception:
+            return False
+    
+    def is_already_downloaded(self, video_id: str) -> bool:
+        """Check if a video has already been downloaded (alias for compatibility)"""
+        return self.is_downloaded(video_id)
+    
+    def get_download_info(self, video_id: str) -> Optional[Dict]:
+        """Get download information for a video"""
+        # This is a simplified lookup - may need to search by video_id in URL
+        results = self.unified_db.get_downloads(platform=self.platform, limit=1000)
+        
+        for download in results:
+            if video_id in download.get('url', ''):
+                return download
+        
+        return None
+    
+    def cleanup_old_downloads(self, days: int = 180):
+        """Clean up old download records"""
+        return self.unified_db.cleanup_old_downloads(days=days, platform=self.platform)
--- a/modules/tiktok_module.py
+++ b/modules/tiktok_module.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python3
+"""
+TikTok Download Module - Downloads TikTok videos with proper timestamp extraction
+"""
+
+import os
+import re
+import json
+import subprocess
+import sqlite3
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+from modules.base_module import LoggingMixin
+
+
+class TikTokDownloader(LoggingMixin):
+    """Downloads TikTok videos and extracts metadata including timestamps"""
+    
+    def __init__(self, base_path: Path = None, log_callback=None, use_database=True, unified_db=None):
+        """
+        Initialize TikTok downloader
+
+        Args:
+            base_path: Base path for downloads
+            log_callback: Optional callback for logging (tag, level, message)
+            use_database: Whether to use database for tracking downloads
+            unified_db: UnifiedDatabase instance (required)
+        """
+        # Initialize logging via mixin
+        self._init_logger('TikTok', log_callback, default_module='Download')
+
+        self.base_path = Path(base_path) if base_path else Path.cwd()
+        self.file_timestamps = {}  # Map of filename -> datetime
+        self.use_database = use_database
+
+        # Always use unified database adapter
+        if not unified_db:
+            raise ValueError("TikTok module requires unified_db - standalone database is no longer supported")
+
+        from modules.tiktok_db_adapter import TikTokDatabaseAdapter
+        self.db = TikTokDatabaseAdapter(unified_db)
+        self.use_unified_db = True
+
+        # Initialize activity status manager for real-time updates
+        from modules.activity_status import get_activity_manager
+        self.activity_manager = get_activity_manager(unified_db)
+
+        self.pending_downloads = []  # Track downloads for deferred database recording
+
+    def _is_already_downloaded(self, video_id: str, username: str = None) -> bool:
+        """Check if a video has already been downloaded"""
+        if not self.use_database:
+            return False
+
+        # Pass username for proper database lookup
+        if username:
+            return self.db.is_downloaded(video_id, username)
+        return self.db.is_already_downloaded(video_id)
+    
+    def _record_download(self, video_id: str, username: str, filename: str,
+                        post_date: Optional[datetime] = None, metadata: Dict = None,
+                        deferred: bool = False):
+        """Record a successful download in the database
+
+        Args:
+            deferred: If True, don't record to database now - add to pending_downloads list
+                     for later recording after file move is complete
+        """
+        # Extract just the filename from the full path for database
+        from pathlib import Path
+        file_path = str(filename)  # Full path
+        filename_only = Path(filename).name  # Just the filename
+
+        # If deferred, store for later recording instead of recording now
+        if deferred:
+            self.pending_downloads.append({
+                'video_id': video_id,
+                'username': username,
+                'filename': filename_only,
+                'post_date': post_date.isoformat() if post_date else None,
+                'file_path': file_path,
+                'metadata': metadata
+            })
+            self.log(f"Deferred recording for {video_id}", "debug")
+            return True
+
+        if not self.use_database:
+            return
+
+        return self.db.record_download(
+            video_id=video_id,
+            username=username,
+            filename=filename_only,
+            post_date=post_date,
+            metadata=metadata,
+            file_path=file_path
+        )
+
+    def get_pending_downloads(self):
+        """Get list of downloads that were deferred for later recording"""
+        return self.pending_downloads.copy()
+
+    def clear_pending_downloads(self):
+        """Clear the pending downloads list after they've been recorded"""
+        self.pending_downloads = []
+    
+    def extract_date_from_info(self, info_dict: Dict) -> Optional[datetime]:
+        """
+        Extract upload date from yt-dlp info dictionary
+        
+        Args:
+            info_dict: yt-dlp info dictionary
+        
+        Returns:
+            datetime object or None
+        """
+        # Try timestamp first (Unix timestamp - has full date and time)
+        # TikTok provides UTC timestamps, need to convert to local time
+        timestamp = info_dict.get('timestamp')
+        if timestamp:
+            try:
+                # Use UTC timestamp and convert to local
+                from datetime import timezone
+                dt_utc = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                dt = dt_utc.replace(tzinfo=None)  # Remove timezone info for local datetime
+                self.log(f"Extracted full timestamp (UTC): {dt}", "debug")
+                return dt
+            except Exception:
+                pass
+        
+        # Try release_timestamp (also has full date and time)
+        release_timestamp = info_dict.get('release_timestamp')
+        if release_timestamp:
+            try:
+                from datetime import timezone
+                dt_utc = datetime.fromtimestamp(release_timestamp, tz=timezone.utc)
+                dt = dt_utc.replace(tzinfo=None)  # Remove timezone info for local datetime
+                self.log(f"Extracted release timestamp (UTC): {dt}", "debug")
+                return dt
+            except Exception:
+                pass
+        
+        # Try modified_timestamp
+        modified_timestamp = info_dict.get('modified_timestamp')
+        if modified_timestamp:
+            try:
+                from datetime import timezone
+                dt_utc = datetime.fromtimestamp(modified_timestamp, tz=timezone.utc)
+                dt = dt_utc.replace(tzinfo=None)  # Remove timezone info for local datetime
+                self.log(f"Extracted modified timestamp (UTC): {dt}", "debug")
+                return dt
+            except Exception:
+                pass
+        
+        # Fall back to upload_date (YYYYMMDD format - only has date, no time)
+        # This should be last resort as it loses time information
+        upload_date = info_dict.get('upload_date')
+        if upload_date and len(upload_date) == 8:
+            try:
+                # Try to get time from filename if it has timestamp format
+                # TikTok sometimes includes timestamp in the video ID
+                dt = datetime.strptime(upload_date, '%Y%m%d')
+                self.log(f"Only date available (no time): {dt.date()}", "warning")
+                return dt
+            except Exception:
+                pass
+        
+        return None
+    
+    def download_profile(self,
+                        username: str,
+                        number_of_days: int = 7,
+                        full_profile: bool = False,
+                        output_dir: Path = None,
+                        defer_database: bool = False) -> Tuple[Dict[str, datetime], List[Path]]:
+        """
+        Download TikTok profile videos
+
+        Args:
+            username: TikTok username (without @)
+            number_of_days: Number of days to download (ignored if full_profile=True)
+            full_profile: If True, download entire profile
+            output_dir: Output directory (uses base_path/username if not specified)
+            defer_database: If True, don't record to database immediately - store in
+                           pending_downloads for later recording after file move is complete
+
+        Returns:
+            Tuple of (file_timestamps dict, list of downloaded files)
+        """
+        self.defer_database = defer_database  # Store for use in _record_download
+        username = username.lstrip('@')
+        output_dir = output_dir or self.base_path / username
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        self.log(f"Downloading TikTok profile: @{username}", "info")
+        self.activity_manager.update_status("Checking videos")
+
+        # HYBRID APPROACH: Use yt-dlp to get ID list (fast), then gallery-dl per video (handles carousels)
+
+        # Step 1: Use yt-dlp to quickly get list of video IDs with dates
+        profile_url = f"https://www.tiktok.com/@{username}"
+        list_cmd = [
+            "yt-dlp",
+            "--flat-playlist",  # Don't download, just list
+            "--print", "%(upload_date)s %(id)s",    # Print date and ID
+            "--quiet",
+            "--no-warnings",
+            profile_url
+        ]
+
+        self.log(f"Getting video list with yt-dlp...", "debug")
+
+        # Get list of video IDs with dates
+        try:
+            result = subprocess.run(list_cmd, capture_output=True, text=True, timeout=60)
+            lines = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+
+            # Parse and filter by date if needed
+            video_ids = []
+            if not full_profile and number_of_days:
+                from datetime import timedelta
+                cutoff_date = datetime.now() - timedelta(days=number_of_days)
+                cutoff_str = cutoff_date.strftime('%Y%m%d')
+
+                for line in lines:
+                    parts = line.split()
+                    if len(parts) >= 2:
+                        upload_date, video_id = parts[0], parts[1]
+                        # Only include videos after cutoff date
+                        if upload_date >= cutoff_str:
+                            video_ids.append(video_id)
+            else:
+                # No filter, take all
+                video_ids = [line.split()[1] for line in lines if len(line.split()) >= 2]
+
+            self.log(f"Found {len(video_ids)} posts to download", "info")
+        except Exception as e:
+            self.log(f"Failed to get video list: {e}", "error")
+            return {}, []
+
+        if not video_ids:
+            self.log("No videos found matching criteria", "info")
+            return {}, []
+
+        # Set initial progress so dashboard shows 0/N immediately
+        self.activity_manager.update_status(
+            "Downloading videos",
+            progress_current=0,
+            progress_total=len(video_ids)
+        )
+
+        # Crash recovery checkpoint
+        from modules.task_checkpoint import TaskCheckpoint
+        checkpoint = TaskCheckpoint(f'tiktok:{username}', 'scraping')
+        checkpoint.start(total_items=len(video_ids))
+        if checkpoint.is_recovering():
+            self.log(f"TikTok @{username}: recovering — skipping already-downloaded videos", "info")
+
+        # Step 2: Download each video individually with gallery-dl (fast per video, handles carousels)
+        for i, video_id in enumerate(video_ids, 1):
+            # Update progress at start of each iteration (fires even on skips)
+            self.activity_manager.update_status(
+                "Downloading videos",
+                progress_current=i,
+                progress_total=len(video_ids)
+            )
+
+            # Skip if already completed in a previous crashed run
+            if checkpoint.is_completed(video_id):
+                continue
+
+            checkpoint.set_current(video_id)
+
+            # Skip if already downloaded
+            if self._is_already_downloaded(video_id, username):
+                self.log(f"[{i}/{len(video_ids)}] Skipping already downloaded: {video_id}", "debug")
+                checkpoint.mark_completed(video_id)
+                continue
+
+            video_url = f"https://www.tiktok.com/@{username}/video/{video_id}"
+            self.log(f"[{i}/{len(video_ids)}] Downloading {video_id}", "debug")
+
+            cmd = [
+                "gallery-dl",
+                "--write-metadata",
+                "-D", str(output_dir),
+                "-f", "{date:%Y%m%d}_{desc}_{id}_{num}.{extension}",
+                video_url
+            ]
+
+            try:
+                self.log(f"Calling gallery-dl for {video_id}", "debug")
+                result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+                self.log(f"gallery-dl returned: code={result.returncode}, stdout lines={len(result.stdout.splitlines()) if result.stdout else 0}", "debug")
+                if result.returncode != 0 and result.stderr:
+                    stderr = result.stderr
+                    if "not available" in stderr.lower() or "404" in stderr:
+                        self.log(f"Video {video_id} not available (deleted or private)", "warning")
+                    else:
+                        self.log(f"Failed to download {video_id}: {stderr[:100]}", "warning")
+            except subprocess.TimeoutExpired:
+                self.log(f"Timeout downloading {video_id}", "warning")
+            except Exception as e:
+                self.log(f"Error downloading {video_id}: {e}", "warning")
+
+            checkpoint.mark_completed(video_id)
+
+        checkpoint.finish()
+
+        # Post-process: Rename files with long descriptions and remove audio-only files
+        for file in output_dir.glob("*"):
+            if file.is_file() and not file.suffix == '.json':
+                # Remove audio-only files (.mp3, .m4a, .aac)
+                if file.suffix.lower() in ['.mp3', '.m4a', '.aac', '.wav', '.ogg']:
+                    self.log(f"Removing audio-only file: {file.name}", "debug")
+                    file.unlink()
+                    # Also remove corresponding JSON
+                    json_file = file.with_suffix(file.suffix + '.json')
+                    if json_file.exists():
+                        json_file.unlink()
+                    continue
+
+                # Truncate long filenames (max 255 chars for Linux)
+                if len(file.name) > 200:  # Leave some margin
+                    # Parse filename: YYYYMMDD_description_ID_NUM.ext
+                    parts = file.name.rsplit('_', 2)  # Split from right to preserve ID and num
+                    if len(parts) == 3:
+                        date_and_desc, video_id, num_and_ext = parts
+                        # Split date from description
+                        date_part = date_and_desc[:8]  # YYYYMMDD
+                        desc_part = date_and_desc[9:]  # Everything after date_
+
+                        # Calculate max description length
+                        # Format: DATE_DESC_ID_NUM.EXT
+                        fixed_length = len(date_part) + len(video_id) + len(num_and_ext) + 3  # 3 underscores
+                        max_desc_len = 200 - fixed_length
+
+                        if len(desc_part) > max_desc_len:
+                            truncated_desc = desc_part[:max_desc_len-3] + "..."
+                            new_name = f"{date_part}_{truncated_desc}_{video_id}_{num_and_ext}"
+                            new_path = file.parent / new_name
+
+                            self.log(f"Truncating long filename: {file.name[:50]}... -> {new_name[:50]}...", "debug")
+                            file.rename(new_path)
+
+                            # Rename corresponding JSON file too
+                            json_file = Path(str(file) + '.json')
+                            if json_file.exists():
+                                new_json = Path(str(new_path) + '.json')
+                                json_file.rename(new_json)
+        
+        # Process downloaded files and extract timestamps from JSON
+        downloaded_files = []
+        file_timestamps = {}
+        processed_ids = set()  # Track IDs we've checked in DB (not in this loop, but in previous downloads)
+        started_ids = set()    # Track IDs we've started processing in THIS run
+
+        for json_file in output_dir.glob("*.json"):
+            try:
+                with open(json_file, 'r', encoding='utf-8') as f:
+                    info = json.load(f)
+
+                # Get video ID
+                video_id = info.get('id', '')
+
+                # Extract timestamp from gallery-dl's createTime field (needed for all files)
+                timestamp = None
+                create_time = info.get('createTime')
+                if create_time:
+                    try:
+                        timestamp = datetime.fromtimestamp(int(create_time))
+                        self.log(f"Extracted timestamp {timestamp} from createTime", "debug")
+                    except Exception:
+                        # Fall back to old yt-dlp method if createTime not available
+                        timestamp = self.extract_date_from_info(info)
+
+                # gallery-dl names JSON files as: filename.ext.json
+                # So we need to remove the .json extension to get the media file
+                media_file = Path(str(json_file)[:-5])  # Remove .json extension
+
+                if not media_file.exists():
+                    self.log(f"Media file not found for {json_file.name}", "warning")
+                    json_file.unlink()
+                    continue
+
+                video_file = media_file  # Use same variable name for compatibility
+
+                # Check if already downloaded - but only check ONCE per video_id per run
+                # (Don't check again for carousel photos #2, #3 after we've started processing #1)
+                if video_id and video_id not in started_ids:
+                    if self._is_already_downloaded(video_id, username):
+                        self.log(f"Skipping already downloaded post: {video_id}", "debug")
+                        # Mark as processed so we don't check again for this ID's other files
+                        processed_ids.add(video_id)
+                        # Just remove JSON file, keep media files (they're already processed)
+                        json_file.unlink()
+                        continue
+                    # Mark that we've started processing this video_id
+                    started_ids.add(video_id)
+
+                # Skip if this video_id was marked as already downloaded
+                if video_id in processed_ids:
+                    json_file.unlink()
+                    continue
+
+                # ALWAYS add file to downloaded list and apply timestamp (even for carousel photos #2, #3)
+                downloaded_files.append(video_file)
+                if timestamp:
+                    file_timestamps[video_file.name] = timestamp
+                    self.log(f"Extracted timestamp {timestamp} for {video_file.name}", "debug")
+
+                # Check for duplicate hash before recording (hash blacklist persists even if original deleted)
+                file_hash = self.db.get_file_hash(str(video_file)) if self.db else None
+                if file_hash:
+                    existing = self.db.get_download_by_file_hash(file_hash)
+                    if existing and existing.get('file_path') and str(video_file) != existing.get('file_path'):
+                        # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
+                        self.log(f"⚠ Duplicate content detected (hash match): {video_file.name} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
+                        # Delete the duplicate regardless of whether original file still exists
+                        try:
+                            video_file.unlink()
+                            self.log(f"Deleted duplicate (hash blacklist): {video_file.name}", "debug")
+                            # Mark as processed so we don't try to download again
+                            processed_ids.add(video_id)
+                            json_file.unlink()
+                            continue
+                        except Exception as e:
+                            self.log(f"Failed to delete duplicate {video_file.name}: {e}", "warning")
+
+                # Record in database (each file gets its own entry, even for carousels)
+                if video_id:
+                    self._record_download(
+                        video_id=video_id,
+                        username=username,
+                        filename=video_file.name,
+                        post_date=timestamp,
+                        metadata={"title": info.get('desc', ''), "description": info.get('desc', '')},
+                        deferred=self.defer_database
+                    )
+
+                # Remove JSON file after processing
+                json_file.unlink()
+                
+            except Exception as e:
+                self.log(f"Failed to process {json_file}: {e}", "error")
+        
+        self.log(f"Downloaded {len(downloaded_files)} files from @{username}", "info")
+
+        # Apply timestamps to files
+        import os
+        for file_path in downloaded_files:
+            filename = file_path.name
+            if filename in file_timestamps:
+                timestamp = file_timestamps[filename]
+                try:
+                    # Convert datetime to unix timestamp
+                    unix_time = timestamp.timestamp()
+                    # Set both access time and modification time
+                    os.utime(str(file_path), (unix_time, unix_time))
+                    self.log(f"Applied timestamp {timestamp} to {filename}", "debug")
+                except Exception as e:
+                    self.log(f"Failed to apply timestamp to {filename}: {e}", "warning")
+
+        # Store timestamps for later use
+        self.file_timestamps.update(file_timestamps)
+
+        return file_timestamps, downloaded_files
+    
+    def download_video(self, url: str, output_dir: Path = None) -> Tuple[Optional[datetime], Optional[Path]]:
+        """
+        Download a single TikTok video
+        
+        Args:
+            url: TikTok video URL
+            output_dir: Output directory
+        
+        Returns:
+            Tuple of (timestamp, downloaded file path)
+        """
+        output_dir = output_dir or self.base_path
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        self.log(f"Downloading video: {url}", "info")
+        
+        # First, get video info without downloading
+        cmd_info = [
+            "yt-dlp",
+            "--dump-json",
+            "--no-warnings",
+            "--quiet",
+            url
+        ]
+        
+        try:
+            result = subprocess.run(cmd_info, capture_output=True, text=True)
+            if result.returncode != 0:
+                self.log(f"Failed to get video info: {result.stderr}", "error")
+                return None, None
+            
+            info = json.loads(result.stdout)
+            timestamp = self.extract_date_from_info(info)
+
+            # Check if this is a photo post (no video, only audio)
+            formats = info.get('formats', [])
+            has_video = any(f.get('vcodec') != 'none' for f in formats)
+
+            if not has_video and len(formats) > 0:
+                # This is a photo/image post - skip it
+                self.log("Skipping TikTok photo post (only videos are downloaded)", "info")
+                return timestamp, None
+
+            # Download video
+            output_template = str(output_dir / "%(upload_date)s_%(title)s_%(id)s.%(ext)s")
+            cmd_download = [
+                "yt-dlp",
+                "--format", "best",  # Explicitly request best video+audio format
+                "--no-warnings",
+                "--quiet",
+                "-o", output_template,
+                url
+            ]
+
+            result = subprocess.run(cmd_download, capture_output=True, text=True)
+            if result.returncode != 0:
+                self.log(f"Failed to download video: {result.stderr}", "error")
+                return timestamp, None
+            
+            # Find the downloaded file
+            expected_name = output_template.replace('%(upload_date)s', info.get('upload_date', 'unknown'))
+            expected_name = expected_name.replace('%(title)s', info.get('title', 'video'))
+            expected_name = expected_name.replace('%(id)s', info.get('id', ''))
+            expected_name = expected_name.replace('%(ext)s', info.get('ext', 'mp4'))
+            
+            downloaded_file = Path(expected_name)
+            if not downloaded_file.exists():
+                # Try to find it by pattern
+                pattern = f"*{info.get('id', '')}*.mp4"
+                matches = list(output_dir.glob(pattern))
+                if matches:
+                    downloaded_file = matches[0]
+            
+            if downloaded_file.exists():
+                if timestamp:
+                    self.file_timestamps[downloaded_file.name] = timestamp
+                return timestamp, downloaded_file
+            
+            return timestamp, None
+            
+        except Exception as e:
+            self.log(f"Failed to download video: {e}", "error")
+            return None, None
+    
+    def get_file_timestamps(self) -> Dict[str, datetime]:
+        """Get the collected file timestamps"""
+        return self.file_timestamps.copy()
+    
+    def clear_timestamps(self):
+        """Clear the stored timestamps"""
+        self.file_timestamps.clear()
+
+
+def download_tiktok_profile(username: str,
+                           days: int = 7,
+                           base_path: Path = None,
+                           log_callback=None,
+                           unified_db=None) -> Dict[str, datetime]:
+    """
+    Simple function interface for downloading TikTok profile
+
+    Args:
+        username: TikTok username
+        days: Number of days to download
+        base_path: Base download path
+        log_callback: Optional logging callback
+        unified_db: UnifiedDatabase instance (required)
+
+    Returns:
+        Dictionary mapping filenames to timestamps
+    """
+    if not unified_db:
+        raise ValueError("unified_db is required for TikTok downloads")
+
+    downloader = TikTokDownloader(base_path=base_path, log_callback=log_callback, unified_db=unified_db)
+    timestamps, files = downloader.download_profile(username, number_of_days=days)
+    return timestamps
+
+
+if __name__ == "__main__":
+    # Test the module
+    import tempfile
+    
+    print("TikTok Downloader Module Test")
+    print("="*60)
+    
+    # Test with a small profile
+    with tempfile.TemporaryDirectory() as tmpdir:
+        downloader = TikTokDownloader(base_path=Path(tmpdir))
+        
+        # You can test with a real TikTok username
+        # timestamps, files = downloader.download_profile("username", number_of_days=1)
+        
+        print("Module ready for integration")
--- a/modules/tmdb_client.py
+++ b/modules/tmdb_client.py
--- a/modules/toolzu_module.py
+++ b/modules/toolzu_module.py
--- a/modules/unified_database.py
+++ b/modules/unified_database.py
--- a/modules/universal_logger.py
+++ b/modules/universal_logger.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+"""
+Universal Logging Module for Media Downloader
+Provides consistent logging across all components with automatic rotation and 7-day retention
+"""
+
+import logging
+import logging.handlers
+from pathlib import Path
+from datetime import datetime, timedelta
+import os
+import glob
+import sys
+
+class UniversalLogger:
+    """
+    Universal logger with automatic rotation and cleanup
+
+    Features:
+    - Consistent log format across all components
+    - Daily log rotation at midnight
+    - Automatic cleanup of logs older than 7 days
+    - Separate log files per component
+    - Console and file output
+    """
+
+    def __init__(
+        self,
+        component_name: str,
+        log_dir: str = None,
+        retention_days: int = 7,
+        console_level: str = 'INFO',
+        file_level: str = 'DEBUG'
+    ):
+        """
+        Initialize universal logger for a component
+
+        Args:
+            component_name: Name of the component (e.g., 'API', 'Scheduler', 'MediaDownloader')
+            log_dir: Directory to store logs (default: /opt/media-downloader/logs)
+            retention_days: Number of days to keep logs (default: 7)
+            console_level: Logging level for console output (default: INFO)
+            file_level: Logging level for file output (default: DEBUG)
+        """
+        self.component_name = component_name
+        self.retention_days = retention_days
+
+        # Set up log directory
+        if log_dir is None:
+            base_path = Path(__file__).parent.parent
+            self.log_dir = base_path / 'logs'
+        else:
+            self.log_dir = Path(log_dir)
+
+        self.log_dir.mkdir(exist_ok=True, parents=True)
+
+        # Create logger
+        self.logger = logging.getLogger(f'MediaDownloader.{component_name}')
+        self.logger.setLevel(logging.DEBUG)
+
+        # Remove existing handlers to prevent duplicates
+        self.logger.handlers = []
+
+        # Create formatter - matches media-downloader.py format
+        # Format: 2025-11-12 21:00:00.123456 [ComponentName] [Module] [LEVEL] message
+        # Custom formatter to include microseconds for proper log sorting
+        class MicrosecondFormatter(logging.Formatter):
+            def formatTime(self, record, datefmt=None):
+                ct = datetime.fromtimestamp(record.created)
+                return ct.strftime('%Y-%m-%d %H:%M:%S.%f')
+
+        formatter = MicrosecondFormatter(
+            '%(asctime)s [%(name)s] %(message)s'
+        )
+
+        # File handler with date-stamped filename (one file per day)
+        # Format: 20251113_component.log (all logs for the day append to same file)
+        date_stamp = datetime.now().strftime('%Y%m%d')
+        log_file = self.log_dir / f'{date_stamp}_{component_name.lower()}.log'
+        file_handler = logging.FileHandler(
+            filename=str(log_file),
+            mode='a',  # Append mode - preserves logs across restarts
+            encoding='utf-8'
+        )
+        file_handler.setLevel(getattr(logging, file_level.upper()))
+        file_handler.setFormatter(formatter)
+        self.logger.addHandler(file_handler)
+
+        # Console handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(getattr(logging, console_level.upper()))
+        console_handler.setFormatter(formatter)
+        self.logger.addHandler(console_handler)
+
+        # Suppress noisy third-party loggers
+        logging.getLogger('asyncio').setLevel(logging.WARNING)
+        logging.getLogger('selenium').setLevel(logging.WARNING)
+        logging.getLogger('urllib3').setLevel(logging.WARNING)
+        logging.getLogger('websocket').setLevel(logging.WARNING)
+        logging.getLogger('requests').setLevel(logging.WARNING)
+        logging.getLogger('PIL').setLevel(logging.WARNING)
+        logging.getLogger('instaloader').setLevel(logging.WARNING)
+        logging.getLogger('tensorflow').setLevel(logging.ERROR)
+        logging.getLogger('deepface').setLevel(logging.WARNING)
+
+        # Clean up old logs on initialization
+        self._cleanup_old_logs()
+
+    def _cleanup_old_logs(self):
+        """Remove log files older than retention_days"""
+        try:
+            cutoff_date = datetime.now() - timedelta(days=self.retention_days)
+            # Match pattern: YYYYMMDD_HHMMSS_component.log
+            pattern = str(self.log_dir / f'*_{self.component_name.lower()}.log')
+
+            cleaned_count = 0
+            for log_file in glob.glob(pattern):
+                try:
+                    file_path = Path(log_file)
+                    # Check file modification time
+                    mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
+                    if mtime < cutoff_date:
+                        file_path.unlink()
+                        cleaned_count += 1
+                except Exception as e:
+                    # Don't fail if we can't clean up a single file
+                    pass
+
+            if cleaned_count > 0:
+                # Log cleanup message through the logger itself (after file handler is set up)
+                self.info(f"Cleaned up {cleaned_count} old {self.component_name} log file(s)", module='LogCleanup')
+        except Exception as e:
+            # Don't fail initialization if cleanup fails
+            pass
+
+    def _format_message(self, module: str, level: str, message: str) -> str:
+        """
+        Format message to match media-downloader.py style
+
+        Args:
+            module: Module name (e.g., 'Core', 'Forum', 'Instagram')
+            level: Log level (e.g., 'INFO', 'ERROR', 'DEBUG')
+            message: Log message
+
+        Returns:
+            Formatted message: [Module] [LEVEL] message
+        """
+        return f"[{module}] [{level.upper()}] {message}"
+
+    def _broadcast_error(self, message: str, module: str, level: str = 'ERROR'):
+        """
+        Broadcast error to connected WebSocket clients for real-time notifications.
+        Fails silently to not disrupt logging.
+        """
+        try:
+            # Try to import the WebSocket manager from the API
+            # This will only work when the API is running
+            from web.backend.api import manager
+            if manager and manager.active_connections:
+                manager.broadcast_sync({
+                    'type': 'error_alert',
+                    'error': {
+                        'module': module,
+                        'level': level,
+                        'message': message[:200],  # Truncate for notification
+                        'timestamp': datetime.now().isoformat(),
+                        'component': self.component_name
+                    }
+                })
+        except Exception:
+            # Fail silently - API may not be running or manager not available
+            pass
+
+    def _record_error_to_db(self, message: str, module: str, level: str = 'ERROR'):
+        """
+        Record error to error_log database table for dashboard display.
+        Uses a separate connection to avoid circular dependencies.
+        Fails silently to not disrupt logging.
+        """
+        try:
+            import sqlite3
+            import hashlib
+            import re
+            from pathlib import Path
+
+            # Get database path
+            db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
+            if not db_path.exists():
+                return
+
+            # Normalize message for deduplication (remove variable parts like URLs, paths, numbers)
+            normalized = message
+            normalized = re.sub(r'/[\w/\-\.]+\.(jpg|png|mp4|webp|gif|heic|mov)', '{file}', normalized)
+            normalized = re.sub(r'https?://[^\s]+', '{url}', normalized)
+            normalized = re.sub(r'\b\d+\b', '{n}', normalized)
+            normalized = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', '{uuid}', normalized)
+
+            # Create error hash for deduplication (module + normalized message)
+            error_key = f"{module}:{normalized[:200]}"
+            error_hash = hashlib.sha256(error_key.encode()).hexdigest()
+
+            # Use a quick connection with short timeout
+            conn = sqlite3.connect(str(db_path), timeout=2.0)
+            conn.execute("PRAGMA busy_timeout = 2000")
+            cursor = conn.cursor()
+
+            now = datetime.now().isoformat()
+
+            # Upsert: insert new error or update occurrence count
+            # Reset viewed_at and dismissed_at to NULL when error recurs so it shows as "new" on dashboard
+            cursor.execute('''
+                INSERT INTO error_log (error_hash, module, level, message, first_seen, last_seen, occurrence_count, log_file)
+                VALUES (?, ?, ?, ?, ?, ?, 1, ?)
+                ON CONFLICT(error_hash) DO UPDATE SET
+                    last_seen = excluded.last_seen,
+                    occurrence_count = error_log.occurrence_count + 1,
+                    viewed_at = NULL,
+                    dismissed_at = NULL
+            ''', (error_hash, module, level, message[:500], now, now, self.component_name))
+
+            conn.commit()
+            conn.close()
+
+            # Broadcast to WebSocket clients for real-time notification
+            self._broadcast_error(message, module, level)
+
+        except Exception:
+            # Fail silently - don't let error logging break the main logging
+            pass
+
+    def debug(self, message: str, module: str = 'Core'):
+        """Log debug message"""
+        self.logger.debug(self._format_message(module, 'DEBUG', message))
+
+    def info(self, message: str, module: str = 'Core'):
+        """Log info message"""
+        self.logger.info(self._format_message(module, 'INFO', message))
+
+    def warning(self, message: str, module: str = 'Core'):
+        """Log warning message"""
+        self.logger.warning(self._format_message(module, 'WARNING', message))
+
+    def error(self, message: str, module: str = 'Core'):
+        """Log error message and record to error_log database"""
+        self.logger.error(self._format_message(module, 'ERROR', message))
+        # Record error to database for dashboard display
+        self._record_error_to_db(message, module)
+
+    def critical(self, message: str, module: str = 'Core'):
+        """Log critical message and record to error_log database"""
+        self.logger.critical(self._format_message(module, 'CRITICAL', message))
+        # Record critical errors to database for dashboard display
+        self._record_error_to_db(message, module, level='CRITICAL')
+
+    def success(self, message: str, module: str = 'Core'):
+        """Log success message (maps to INFO level)"""
+        self.logger.info(self._format_message(module, 'SUCCESS', message))
+
+    def log(self, message: str, level: str = 'INFO', module: str = 'Core'):
+        """
+        Generic log method supporting all levels
+
+        Args:
+            message: Log message
+            level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, SUCCESS)
+            module: Module name
+        """
+        level_map = {
+            'DEBUG': self.debug,
+            'INFO': self.info,
+            'WARNING': self.warning,
+            'ERROR': self.error,
+            'CRITICAL': self.critical,
+            'SUCCESS': self.success
+        }
+
+        log_func = level_map.get(level.upper(), self.info)
+        log_func(message, module)
+
+    def get_callback(self):
+        """
+        Get a callback function compatible with existing module signatures
+
+        Returns:
+            Callback function that can be passed to modules expecting log_callback
+        """
+        def callback(*args):
+            """
+            Flexible callback that handles multiple signature formats:
+            - callback(message, level)
+            - callback(message, level, module)
+            """
+            if len(args) == 2:
+                message, level = args
+                # Extract module from message if present
+                if message.startswith('[') and ']' in message:
+                    end_bracket = message.index(']')
+                    module = message[1:end_bracket]
+                    message = message[end_bracket+1:].strip()
+                    # Remove level tag if present
+                    if message.startswith('[') and ']' in message:
+                        message = message[message.index(']')+1:].strip()
+                    self.log(message, level, module)
+                else:
+                    self.log(message, level)
+            elif len(args) == 3:
+                message, level, module = args
+                self.log(message, level, module)
+            else:
+                # Default: treat as simple message
+                self.info(str(args))
+
+        return callback
+
+
+# Singleton instances for common components
+_logger_instances = {}
+
+def get_logger(
+    component_name: str,
+    log_dir: str = None,
+    retention_days: int = 7,
+    console_level: str = 'INFO',
+    file_level: str = 'DEBUG'
+) -> UniversalLogger:
+    """
+    Get or create a logger instance for a component (singleton pattern)
+
+    Args:
+        component_name: Name of the component
+        log_dir: Directory to store logs
+        retention_days: Number of days to keep logs
+        console_level: Console logging level
+        file_level: File logging level
+
+    Returns:
+        UniversalLogger instance
+    """
+    if component_name not in _logger_instances:
+        _logger_instances[component_name] = UniversalLogger(
+            component_name=component_name,
+            log_dir=log_dir,
+            retention_days=retention_days,
+            console_level=console_level,
+            file_level=file_level
+        )
+
+    return _logger_instances[component_name]
--- a/modules/universal_video_downloader.py
+++ b/modules/universal_video_downloader.py
--- a/modules/youtube_channel_monitor.py
+++ b/modules/youtube_channel_monitor.py