media-downloader/modules/youtube_channel_monitor.py

#!/usr/bin/env python3
"""
YouTube Channel Monitor Module

Monitors specified YouTube channels for new videos matching global phrases,
then automatically adds matching videos to the download queue.

Design:
- Global settings (phrases, interval, quality) apply to ALL channels
- Channels are just URLs to monitor - no per-channel configuration
- All channels are checked together when the interval triggers
"""

import asyncio
import json
import random
import re
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple

from modules.universal_logger import get_logger

logger = get_logger('YouTubeMonitor')


class YouTubeChannelMonitor:
    """
    Background monitor for YouTube channels.
    Uses global phrases and interval settings for all channels.
    """

    def __init__(self, db_path: str, activity_manager=None):
        """
        Initialize the YouTube Channel Monitor.

        Args:
            db_path: Path to the SQLite database
            activity_manager: Optional activity manager for status updates
        """
        self.db_path = db_path
        self.activity_manager = activity_manager
        self.yt_dlp_path = '/opt/media-downloader/venv/bin/yt-dlp'
        self.default_output_path = '/opt/immich/md/youtube/'

    def _get_connection(self) -> sqlite3.Connection:
        """Get a database connection with row factory."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        return conn

    # =========================================================================
    # GLOBAL SETTINGS METHODS
    # =========================================================================

    def get_global_settings(self) -> Dict:
        """Get the global monitor settings."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT phrases, check_interval_hours, quality, enabled, last_checked, updated_at,
                       auto_start_queue, notifications_enabled, auto_pause_threshold_months,
                       paused_check_interval_days, max_results_per_phrase
                FROM youtube_monitor_settings
                WHERE id = 1
            ''')
            row = cursor.fetchone()
            if row:
                settings = dict(row)
                try:
                    settings['phrases'] = json.loads(settings['phrases'])
                except (json.JSONDecodeError, TypeError, ValueError):
                    settings['phrases'] = []
                # Ensure all fields are present with defaults
                if 'auto_start_queue' not in settings:
                    settings['auto_start_queue'] = 0
                if 'notifications_enabled' not in settings:
                    settings['notifications_enabled'] = 1
                if 'auto_pause_threshold_months' not in settings:
                    settings['auto_pause_threshold_months'] = 24
                if 'paused_check_interval_days' not in settings:
                    settings['paused_check_interval_days'] = 14
                if 'max_results_per_phrase' not in settings:
                    settings['max_results_per_phrase'] = 100
                return settings
            # Return defaults if no row exists
            return {
                'phrases': [],
                'check_interval_hours': 6,
                'quality': 'best',
                'enabled': 1,
                'last_checked': None,
                'updated_at': None,
                'auto_start_queue': 0,
                'notifications_enabled': 1,
                'auto_pause_threshold_months': 24,
                'paused_check_interval_days': 14,
                'max_results_per_phrase': 100
            }
        finally:
            conn.close()

    def update_global_settings(self, phrases: List[str] = None,
                               check_interval_hours: int = None,
                               quality: str = None,
                               enabled: bool = None,
                               auto_start_queue: bool = None,
                               notifications_enabled: bool = None,
                               auto_pause_threshold_months: int = None,
                               paused_check_interval_days: int = None,
                               max_results_per_phrase: int = None) -> bool:
        """
        Update global monitor settings.

        Args:
            phrases: List of phrases to match in video titles/descriptions
            check_interval_hours: How often to check all channels
            quality: Video quality preference
            enabled: Whether monitoring is enabled globally
            auto_start_queue: Whether to auto-start the download queue after adding videos
            notifications_enabled: Whether to send notifications when videos are added
            auto_pause_threshold_months: Months of inactivity before auto-pausing channels
            paused_check_interval_days: Days between re-checking paused channels
            max_results_per_phrase: Maximum number of videos to process per search phrase

        Returns:
            True if update was successful
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Build update parts
            updates = []
            values = []

            if phrases is not None:
                updates.append('phrases = ?')
                values.append(json.dumps(phrases))
            if check_interval_hours is not None:
                updates.append('check_interval_hours = ?')
                values.append(check_interval_hours)
            if quality is not None:
                updates.append('quality = ?')
                values.append(quality)
            if enabled is not None:
                updates.append('enabled = ?')
                values.append(1 if enabled else 0)
            if auto_start_queue is not None:
                updates.append('auto_start_queue = ?')
                values.append(1 if auto_start_queue else 0)
            if notifications_enabled is not None:
                updates.append('notifications_enabled = ?')
                values.append(1 if notifications_enabled else 0)
            if auto_pause_threshold_months is not None:
                updates.append('auto_pause_threshold_months = ?')
                values.append(auto_pause_threshold_months)
            if paused_check_interval_days is not None:
                updates.append('paused_check_interval_days = ?')
                values.append(paused_check_interval_days)
            if max_results_per_phrase is not None:
                updates.append('max_results_per_phrase = ?')
                values.append(max_results_per_phrase)

            if not updates:
                return False

            updates.append('updated_at = ?')
            values.append(datetime.now().isoformat())

            cursor.execute(f'''
                UPDATE youtube_monitor_settings
                SET {', '.join(updates)}
                WHERE id = 1
            ''', values)
            conn.commit()

            logger.info(f"Updated global YouTube monitor settings")
            return cursor.rowcount > 0
        finally:
            conn.close()

    def _update_last_checked(self):
        """Update the last_checked timestamp in global settings."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                UPDATE youtube_monitor_settings
                SET last_checked = ?
                WHERE id = 1
            ''', (datetime.now().isoformat(),))
            conn.commit()
        finally:
            conn.close()

    # =========================================================================
    # CHANNEL MANAGEMENT METHODS
    # =========================================================================

    def get_all_channels(self) -> List[Dict]:
        """Get all YouTube channel monitors."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
                       status, always_active, last_video_date, last_check_date,
                       paused_date, paused_reason, total_videos_found
                FROM youtube_channel_monitors
                ORDER BY created_at DESC
            ''')
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_enabled_channels(self) -> List[Dict]:
        """Get all enabled YouTube channels."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
                       status, always_active, last_video_date, last_check_date,
                       paused_date, paused_reason, total_videos_found
                FROM youtube_channel_monitors
                WHERE status = 'active'
                ORDER BY channel_name, channel_url
            ''')
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_channel(self, channel_id: int) -> Optional[Dict]:
        """Get a specific channel by ID."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at
                FROM youtube_channel_monitors
                WHERE id = ?
            ''', (channel_id,))
            row = cursor.fetchone()
            return dict(row) if row else None
        finally:
            conn.close()

    def add_channel(self, channel_url: str, channel_name: str = None, enabled: bool = True) -> int:
        """
        Add a new YouTube channel to monitor.

        Args:
            channel_url: YouTube channel URL
            channel_name: Optional display name for the channel
            enabled: Whether the channel is enabled

        Returns:
            The ID of the created channel
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                INSERT INTO youtube_channel_monitors (channel_url, channel_name, enabled)
                VALUES (?, ?, ?)
            ''', (channel_url, channel_name, 1 if enabled else 0))
            conn.commit()
            channel_id = cursor.lastrowid
            logger.info(f"Added YouTube channel {channel_id}: {channel_name or channel_url}")
            return channel_id
        finally:
            conn.close()

    def update_channel(self, channel_id: int, **kwargs) -> bool:
        """
        Update a YouTube channel.

        Args:
            channel_id: ID of the channel to update
            **kwargs: Fields to update (channel_url, channel_name, enabled)

        Returns:
            True if update was successful
        """
        allowed_fields = {'channel_url', 'channel_name', 'enabled'}

        updates = {}
        for key, value in kwargs.items():
            if key in allowed_fields:
                if key == 'enabled':
                    updates[key] = 1 if value else 0
                else:
                    updates[key] = value

        if not updates:
            return False

        conn = self._get_connection()
        try:
            set_clause = ', '.join(f'{k} = ?' for k in updates.keys())
            values = list(updates.values()) + [channel_id]

            cursor = conn.cursor()
            cursor.execute(f'''
                UPDATE youtube_channel_monitors
                SET {set_clause}
                WHERE id = ?
            ''', values)
            conn.commit()
            logger.info(f"Updated YouTube channel {channel_id}")
            return cursor.rowcount > 0
        finally:
            conn.close()

    def delete_channel(self, channel_id: int) -> bool:
        """
        Delete a YouTube channel and its history.

        Args:
            channel_id: ID of the channel to delete

        Returns:
            True if deletion was successful
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            # Delete history first
            cursor.execute('DELETE FROM youtube_monitor_history WHERE monitor_id = ?', (channel_id,))
            # Delete channel
            cursor.execute('DELETE FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
            conn.commit()
            logger.info(f"Deleted YouTube channel {channel_id}")
            return cursor.rowcount > 0
        finally:
            conn.close()

    async def fetch_channel_id(self, channel_url: str) -> Optional[str]:
        """
        Fetch YouTube channel ID from URL using yt-dlp, with curl/grep fallback.

        Args:
            channel_url: YouTube channel URL

        Returns:
            Channel ID (UC...) or None if not found
        """
        # Method 1: Try yt-dlp first
        try:
            cmd = [
                self.yt_dlp_path,
                '--dump-json',
                '--playlist-end', '1',
                f'{channel_url}/videos'
            ]

            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )

            stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)

            if stdout:
                data = json.loads(stdout.decode('utf-8'))
                channel_id = data.get('channel_id')
                if channel_id and channel_id.startswith('UC'):
                    logger.debug(f"Fetched channel ID via yt-dlp: {channel_id}")
                    return channel_id
        except (asyncio.TimeoutError, json.JSONDecodeError, Exception) as e:
            logger.debug(f"yt-dlp method failed for {channel_url}: {e}")

        # Method 2: Fallback to curl/grep method
        try:
            cmd = [
                'curl', '-Ls', channel_url
            ]

            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )

            stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)

            if stdout:
                html = stdout.decode('utf-8')
                # Look for channel ID patterns in the HTML
                pattern = r'"(?:browseId|externalId|channelId)":"(UC[^"]+)"'
                match = re.search(pattern, html)
                if match:
                    channel_id = match.group(1)
                    logger.debug(f"Fetched channel ID via curl/grep: {channel_id}")
                    return channel_id
        except (asyncio.TimeoutError, Exception) as e:
            logger.debug(f"curl/grep method failed for {channel_url}: {e}")

        logger.warning(f"Could not fetch channel ID for {channel_url}")
        return None

    # =========================================================================
    # STATUS MANAGEMENT METHODS (v11.20.0)
    # =========================================================================

    def get_active_channels(self) -> List[Dict]:
        """Get channels with status='active'."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
                       status, always_active, last_video_date, last_check_date,
                       paused_date, paused_reason, total_videos_found
                FROM youtube_channel_monitors
                WHERE status = 'active'
                ORDER BY channel_name, channel_url
            ''')
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_paused_channels(self) -> List[Dict]:
        """Get channels with status like 'paused_%'."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
                       status, always_active, last_video_date, last_check_date,
                       paused_date, paused_reason, total_videos_found
                FROM youtube_channel_monitors
                WHERE status LIKE 'paused_%'
                ORDER BY paused_date DESC
            ''')
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_channels_filtered(self,
                             status_filter: str = None,
                             always_active_filter: str = None,
                             search: str = None,
                             sort_field: str = 'name',
                             sort_ascending: bool = True,
                             limit: int = None,
                             offset: int = 0) -> Dict:
        """
        Get channels with server-side filtering, searching, sorting, and pagination.

        Args:
            status_filter: 'all', 'active', 'paused_manual', 'paused_auto', 'paused_all'
            always_active_filter: 'all', 'always_active', 'regular'
            search: Search term for channel name or URL
            sort_field: 'name', 'last_checked', 'last_video_date', 'videos_found', 'created_at'
            sort_ascending: Sort direction
            limit: Maximum number of results
            offset: Offset for pagination

        Returns:
            Dict with 'channels' list and 'total' count
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Build WHERE clause
            where_clauses = []
            params = []

            # Status filter
            if status_filter and status_filter != 'all':
                if status_filter == 'active':
                    where_clauses.append("status = 'active'")
                elif status_filter == 'paused_manual':
                    where_clauses.append("status = 'paused_manual'")
                elif status_filter == 'paused_auto':
                    where_clauses.append("status = 'paused_auto'")
                elif status_filter == 'paused_all':
                    where_clauses.append("status LIKE 'paused_%'")

            # Always active filter
            if always_active_filter and always_active_filter != 'all':
                if always_active_filter == 'always_active':
                    where_clauses.append("always_active = 1")
                elif always_active_filter == 'regular':
                    where_clauses.append("(always_active = 0 OR always_active IS NULL)")

            # Search filter
            if search:
                where_clauses.append("(channel_name LIKE ? OR channel_url LIKE ?)")
                search_param = f"%{search}%"
                params.extend([search_param, search_param])

            where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""

            # Get total count
            count_query = f"SELECT COUNT(*) FROM youtube_channel_monitors {where_sql}"
            cursor.execute(count_query, params)
            total = cursor.fetchone()[0]

            # Build ORDER BY clause
            sort_columns = {
                'name': 'LOWER(COALESCE(channel_name, channel_url))',
                'last_checked': 'last_check_date',
                'last_video_date': 'last_video_date',
                'videos_found': 'total_videos_found',
                'created_at': 'created_at'
            }
            sort_column = sort_columns.get(sort_field, 'LOWER(COALESCE(channel_name, channel_url))')
            sort_direction = 'ASC' if sort_ascending else 'DESC'
            order_by = f"ORDER BY {sort_column} {sort_direction}"

            # Build main query with pagination (using parameterized queries for security)
            limit_sql = "LIMIT ? OFFSET ?" if limit else ""

            query = f'''
                SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
                       status, always_active, last_video_date, last_check_date,
                       paused_date, paused_reason, total_videos_found, channel_id
                FROM youtube_channel_monitors
                {where_sql}
                {order_by}
                {limit_sql}
            '''

            # Add limit/offset to params if pagination is used
            query_params = list(params)
            if limit:
                query_params.extend([limit, offset])

            cursor.execute(query, query_params)
            channels = [dict(row) for row in cursor.fetchall()]

            return {
                'channels': channels,
                'total': total
            }
        finally:
            conn.close()

    def pause_channel(self, channel_id: int, reason: str = None, auto: bool = False) -> bool:
        """
        Pause a channel manually or automatically.

        Args:
            channel_id: ID of the channel to pause
            reason: Optional reason for pausing
            auto: If True, set status to 'paused_auto', otherwise 'paused_manual'

        Returns:
            True if pause was successful
        """
        status = 'paused_auto' if auto else 'paused_manual'
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                UPDATE youtube_channel_monitors
                SET status = ?, paused_date = ?, paused_reason = ?
                WHERE id = ?
            ''', (status, datetime.now().isoformat(), reason, channel_id))
            conn.commit()
            logger.info(f"{'Auto-' if auto else ''}Paused channel {channel_id}: {reason}")
            return cursor.rowcount > 0
        finally:
            conn.close()

    def resume_channel(self, channel_id: int) -> bool:
        """
        Resume a paused channel.

        Args:
            channel_id: ID of the channel to resume

        Returns:
            True if resume was successful
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                UPDATE youtube_channel_monitors
                SET status = 'active', paused_date = NULL, paused_reason = NULL
                WHERE id = ?
            ''', (channel_id,))
            conn.commit()
            logger.info(f"Resumed channel {channel_id}")
            return cursor.rowcount > 0
        finally:
            conn.close()

    def toggle_always_active(self, channel_id: int, value: bool) -> bool:
        """
        Toggle always_active flag for a channel.

        Args:
            channel_id: ID of the channel
            value: True to enable always_active, False to disable

        Returns:
            True if toggle was successful
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                UPDATE youtube_channel_monitors
                SET always_active = ?
                WHERE id = ?
            ''', (1 if value else 0, channel_id))
            conn.commit()
            logger.info(f"Set always_active={value} for channel {channel_id}")
            return cursor.rowcount > 0
        finally:
            conn.close()

    def get_statistics(self) -> Dict:
        """Get monitor statistics."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT
                    COUNT(*) as total,
                    SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active,
                    SUM(CASE WHEN status = 'paused_manual' THEN 1 ELSE 0 END) as paused_manual,
                    SUM(CASE WHEN status = 'paused_auto' THEN 1 ELSE 0 END) as paused_auto,
                    SUM(CASE WHEN always_active = 1 THEN 1 ELSE 0 END) as always_active_count,
                    SUM(COALESCE(total_videos_found, 0)) as total_videos
                FROM youtube_channel_monitors
            ''')
            row = cursor.fetchone()
            return dict(row) if row else {
                'total': 0,
                'active': 0,
                'paused_manual': 0,
                'paused_auto': 0,
                'always_active_count': 0,
                'total_videos': 0
            }
        finally:
            conn.close()

    # =========================================================================
    # AUTO-PAUSE AND PAUSED-CHECK LOGIC (v11.20.0)
    # =========================================================================

    async def _check_channel_for_auto_pause(self, channel_id: int) -> bool:
        """
        Check if a single channel should be auto-paused based on inactivity or no matched videos.
        Called immediately after checking each channel.

        Args:
            channel_id: ID of the channel to check

        Returns:
            True if channel was auto-paused, False otherwise
        """
        from datetime import timedelta

        settings = self.get_global_settings()
        threshold_months = settings.get('auto_pause_threshold_months', 24)

        # Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
        cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
        cutoff_str = cutoff_date.strftime('%Y%m%d')

        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Check if this specific channel should be auto-paused
            cursor.execute('''
                SELECT id, channel_name, last_video_date, always_active, status,
                       videos_found, last_check_date
                FROM youtube_channel_monitors
                WHERE id = ?
            ''', (channel_id,))

            row = cursor.fetchone()
            if not row:
                return False

            channel_name = row['channel_name']
            last_video_date = row['last_video_date']
            always_active = row['always_active']
            status = row['status']
            videos_found = row['videos_found']
            last_check_date = row['last_check_date']

            # Don't auto-pause if already paused or always_active
            if status != 'active' or always_active == 1:
                return False

            # Auto-pause if channel has been checked but has 0 matched videos
            if videos_found == 0 and last_check_date:
                reason = "No matching videos found"

                cursor.execute('''
                    UPDATE youtube_channel_monitors
                    SET status = 'paused_auto',
                        paused_date = ?,
                        paused_reason = ?
                    WHERE id = ?
                ''', (datetime.now().isoformat(), reason, channel_id))

                conn.commit()
                logger.info(f"Auto-paused channel '{channel_name}': {reason}")
                return True

            # Auto-pause if channel is inactive (no uploads in threshold period)
            if last_video_date and last_video_date < cutoff_str:
                # Calculate days since last upload for the pause reason
                try:
                    if len(last_video_date) == 8 and last_video_date.isdigit():
                        last_upload = datetime.strptime(last_video_date, '%Y%m%d')
                    else:
                        last_upload = datetime.fromisoformat(last_video_date)

                    days_inactive = (datetime.now() - last_upload).days
                    reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"

                    cursor.execute('''
                        UPDATE youtube_channel_monitors
                        SET status = 'paused_auto',
                            paused_date = ?,
                            paused_reason = ?
                        WHERE id = ?
                    ''', (datetime.now().isoformat(), reason, channel_id))

                    conn.commit()
                    logger.info(f"Auto-paused channel '{channel_name}': {reason}")
                    return True
                except (ValueError, TypeError) as e:
                    logger.error(f"Error parsing date for channel {channel_id}: {e}")
                    return False

            return False
        finally:
            conn.close()

    async def check_for_inactive_channels(self) -> int:
        """
        Check for channels that should be auto-paused based on inactivity.

        Returns:
            Number of channels auto-paused
        """
        from datetime import timedelta

        settings = self.get_global_settings()
        threshold_months = settings.get('auto_pause_threshold_months', 24)

        # Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
        cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
        cutoff_str = cutoff_date.strftime('%Y%m%d')

        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Find active channels that haven't posted in threshold period
            # Note: Comparing YYYYMMDD strings works correctly (20231225 < 20241227)
            cursor.execute('''
                SELECT id, channel_name, last_video_date
                FROM youtube_channel_monitors
                WHERE status = 'active'
                  AND always_active = 0
                  AND last_video_date IS NOT NULL
                  AND last_video_date < ?
            ''', (cutoff_str,))

            inactive_channels = cursor.fetchall()
            paused_count = 0

            for row in inactive_channels:
                channel_id = row['id']
                channel_name = row['channel_name']
                last_video_date = row['last_video_date']

                # Calculate days since last upload
                try:
                    # Handle both YYYYMMDD format (from yt-dlp) and ISO format
                    if len(last_video_date) == 8 and last_video_date.isdigit():
                        # YYYYMMDD format from yt-dlp
                        last_upload = datetime.strptime(last_video_date, '%Y%m%d')
                    else:
                        # ISO format
                        last_upload = datetime.fromisoformat(last_video_date)

                    days_inactive = (datetime.now() - last_upload).days

                    reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"

                    cursor.execute('''
                        UPDATE youtube_channel_monitors
                        SET status = 'paused_auto',
                            paused_date = ?,
                            paused_reason = ?
                        WHERE id = ?
                    ''', (datetime.now().isoformat(), reason, channel_id))

                    paused_count += 1
                    logger.info(f"Auto-paused channel '{channel_name}': {reason}")
                except (ValueError, TypeError) as e:
                    logger.error(f"Error parsing date for channel {channel_id}: {e}")
                    continue

            conn.commit()
            return paused_count
        finally:
            conn.close()

    async def check_paused_channels(self) -> int:
        """
        Periodically check paused channels to see if they've resumed posting.

        Returns:
            Number of channels auto-resumed
        """
        from datetime import timedelta

        settings = self.get_global_settings()
        check_interval_days = settings.get('paused_check_interval_days', 14)
        threshold_months = settings.get('auto_pause_threshold_months', 24)

        # Find paused channels that need checking
        cutoff_date = datetime.now() - timedelta(days=check_interval_days)

        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            cursor.execute('''
                SELECT id, channel_url, channel_name, status
                FROM youtube_channel_monitors
                WHERE status LIKE 'paused_%'
                  AND (last_check_date IS NULL OR last_check_date < ?)
            ''', (cutoff_date.isoformat(),))

            paused_channels = [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

        resumed_count = 0
        for channel in paused_channels:
            try:
                # Check if channel has new videos (light check - just get latest 5)
                videos = await self.get_channel_videos(channel['channel_url'], max_results=5)

                most_recent_upload = None
                if videos:
                    # Find most recent video
                    for video in videos:
                        upload_date_str = video.get('upload_date')
                        if upload_date_str:
                            if not most_recent_upload or upload_date_str > most_recent_upload:
                                most_recent_upload = upload_date_str

                # Update last_check_date and last_video_date
                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute('''
                        UPDATE youtube_channel_monitors
                        SET last_check_date = ?,
                            last_video_date = ?
                        WHERE id = ?
                    ''', (datetime.now().isoformat(), most_recent_upload, channel['id']))
                    conn.commit()
                finally:
                    conn.close()

                # Check if auto-paused channel should be resumed
                if channel['status'] == 'paused_auto' and most_recent_upload:
                    try:
                        # Parse date (yt-dlp format: YYYYMMDD)
                        if len(most_recent_upload) == 8:
                            upload_datetime = datetime.strptime(most_recent_upload, '%Y%m%d')
                        else:
                            upload_datetime = datetime.fromisoformat(most_recent_upload)

                        days_since_upload = (datetime.now() - upload_datetime).days

                        # If upload is recent (within threshold), auto-resume
                        if days_since_upload < (threshold_months * 30):
                            self.resume_channel(channel['id'])
                            resumed_count += 1
                            logger.info(f"Auto-resumed channel '{channel['channel_name']}' - new upload detected ({days_since_upload} days old)")
                    except (ValueError, TypeError) as e:
                        logger.error(f"Error parsing upload date for channel {channel['id']}: {e}")

            except Exception as e:
                logger.error(f"Error checking paused channel {channel['id']} ({channel['channel_name']}): {e}")
                continue

        return resumed_count

    def check_paused_channels_sync(self) -> int:
        """Synchronous wrapper for check_paused_channels."""
        import asyncio
        loop = asyncio.new_event_loop()
        try:
            return loop.run_until_complete(self.check_paused_channels())
        finally:
            loop.close()

    # =========================================================================
    # HISTORY METHODS
    # =========================================================================

    def get_channel_history(self, channel_id: int, limit: int = 50) -> List[Dict]:
        """Get history for a specific channel."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, monitor_id, video_id, video_title, matched_phrase, action, created_at
                FROM youtube_monitor_history
                WHERE monitor_id = ?
                ORDER BY created_at DESC
                LIMIT ?
            ''', (channel_id, limit))
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_all_history(self, limit: int = 100) -> List[Dict]:
        """Get combined history for all channels."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT h.id, h.monitor_id, h.video_id, h.video_title, h.matched_phrase,
                       h.action, h.created_at, c.channel_name, c.channel_url
                FROM youtube_monitor_history h
                LEFT JOIN youtube_channel_monitors c ON h.monitor_id = c.id
                ORDER BY h.created_at DESC
                LIMIT ?
            ''', (limit,))
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def _is_video_processed(self, channel_id: int, video_id: str) -> bool:
        """Check if a video has already been processed for a channel."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT 1 FROM youtube_monitor_history
                WHERE monitor_id = ? AND video_id = ?
            ''', (channel_id, video_id))
            return cursor.fetchone() is not None
        finally:
            conn.close()

    def _record_video_processed(self, channel_id: int, video_id: str,
                                video_title: str, matched_phrase: str, action: str):
        """Record that a video has been processed."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                INSERT OR IGNORE INTO youtube_monitor_history
                (monitor_id, video_id, video_title, matched_phrase, action)
                VALUES (?, ?, ?, ?, ?)
            ''', (channel_id, video_id, video_title, matched_phrase, action))
            conn.commit()
        except Exception as e:
            logger.error(f"Failed to record video processed: {e}")
        finally:
            conn.close()

    def _update_channel_stats(self, channel_id: int, videos_added: int, most_recent_upload: str = None):
        """Update channel statistics after a check."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Get channel info for accurate matching
            cursor.execute('SELECT channel_id, channel_name FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
            row = cursor.fetchone()
            if not row:
                return
            yt_channel_id, channel_name = row[0], row[1]

            # Count videos from celebrity_discovered_videos (Internet Discovery database)
            # This is the shared database that shows on the Internet Discovery page
            if yt_channel_id:
                cursor.execute('''
                    SELECT COUNT(*)
                    FROM celebrity_discovered_videos
                    WHERE (
                        -- Match by channel_id
                        channel_id = ?
                        OR
                        -- Fallback: match by name if video has no channel_id
                        (channel_id IS NULL OR channel_id = '')
                        AND REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
                    )
                    AND platform = 'youtube'
                ''', (yt_channel_id, channel_name))
                total_count = cursor.fetchone()[0]
            else:
                # Fallback to name-only matching if monitor has no channel_id
                cursor.execute('''
                    SELECT COUNT(*)
                    FROM celebrity_discovered_videos
                    WHERE REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
                    AND platform = 'youtube'
                ''', (channel_name,))
                total_count = cursor.fetchone()[0]

            # Update last_video_date if we have a new value, or keep existing if we don't
            cursor.execute('''
                UPDATE youtube_channel_monitors
                SET last_checked = ?,
                    last_check_date = ?,
                    videos_found = ?,
                    total_videos_found = ?,
                    last_video_date = CASE
                        WHEN ? IS NOT NULL THEN ?
                        ELSE last_video_date
                    END
                WHERE id = ?
            ''', (
                datetime.now().isoformat(),
                datetime.now().isoformat(),
                total_count,
                total_count,
                most_recent_upload,
                most_recent_upload,
                channel_id
            ))
            conn.commit()
        finally:
            conn.close()

    # =========================================================================
    # VIDEO FETCHING AND MATCHING
    # =========================================================================

    async def _get_channel_latest_upload_date(self, channel_url: str) -> str:
        """
        Get the upload date of the most recent video on a channel.
        Uses full metadata fetch (not flat-playlist) to get accurate upload_date.
        Tries multiple URL formats if the first attempt fails.

        Args:
            channel_url: URL of the YouTube channel

        Returns:
            Upload date string in YYYYMMDD format, or None if not found
        """
        # Try multiple URL formats
        urls_to_try = []

        # First try: /videos suffix
        base_url = channel_url.rstrip('/')
        for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
            if base_url.endswith(suffix):
                base_url = base_url[:-len(suffix)]
                break
        urls_to_try.append(f"{base_url}/videos")

        # Second try: base URL without suffix
        urls_to_try.append(base_url)

        # Third try: /streams suffix (for channels that primarily stream)
        urls_to_try.append(f"{base_url}/streams")

        for url_attempt in urls_to_try:
            cmd = [
                self.yt_dlp_path,
                '--playlist-end', '1',  # Only get the most recent video
                '--dump-json',
                '--no-warnings',
                '--ignore-errors',
                '--skip-download',
                url_attempt
            ]

            try:
                process = await asyncio.create_subprocess_exec(
                    *cmd,
                    stdout=asyncio.subprocess.PIPE,
                    stderr=asyncio.subprocess.PIPE
                )
                stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)

                # Check stderr for specific errors
                stderr_text = stderr.decode().strip()
                if 'does not have a' in stderr_text.lower() or '404' in stderr_text:
                    # Try next URL format
                    continue

                for line in stdout.decode().strip().split('\n'):
                    if line:
                        try:
                            data = json.loads(line)
                            upload_date = data.get('upload_date')
                            if upload_date:
                                logger.debug(f"Successfully fetched upload date {upload_date} from {url_attempt}")
                                return upload_date
                        except json.JSONDecodeError:
                            pass

            except asyncio.TimeoutError:
                logger.debug(f"Timeout fetching latest upload date from {url_attempt}")
                continue
            except Exception as e:
                logger.debug(f"Error fetching latest upload date from {url_attempt}: {e}")
                continue

        logger.warning(f"Could not fetch latest upload date from {base_url} after trying all URL formats")
        return None

    async def get_channel_videos(self, channel_url: str, max_results: int = 20, search_phrase: str = None) -> List[Dict]:
        """
        Fetch videos from a YouTube channel using yt-dlp.

        Args:
            channel_url: URL of the YouTube channel
            max_results: Maximum number of videos to fetch
            search_phrase: Optional phrase to search within the channel

        Returns:
            List of video metadata dictionaries (basic info from flat-playlist)
        """
        # Build the URL based on whether we're searching or fetching recent
        if search_phrase:
            # Use channel search URL to find videos matching the phrase
            # Remove any trailing path from channel URL
            base_url = channel_url.rstrip('/')
            for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
                if base_url.endswith(suffix):
                    base_url = base_url[:-len(suffix)]
            # URL encode the search phrase
            import urllib.parse
            encoded_phrase = urllib.parse.quote(search_phrase)
            channel_url = f"{base_url}/search?query={encoded_phrase}"
        else:
            # Ensure URL ends with /videos for recent uploads
            if not channel_url.endswith('/videos'):
                if channel_url.endswith('/'):
                    channel_url = channel_url + 'videos'
                else:
                    channel_url = channel_url + '/videos'

        cmd = [
            self.yt_dlp_path,
            '--flat-playlist',
            '--dump-json',
            '--playlist-end', str(max_results),
            '--no-warnings',
            '--ignore-errors',
            channel_url
        ]

        try:
            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=90)

            videos = []
            for line in stdout.decode().strip().split('\n'):
                if line:
                    try:
                        data = json.loads(line)
                        videos.append({
                            'video_id': data.get('id'),
                            'title': data.get('title', ''),
                            'channel_name': data.get('uploader', data.get('channel', '')),
                            'channel_id': data.get('channel_id', ''),
                            'upload_date': data.get('upload_date'),
                            'duration': data.get('duration', 0),
                            'view_count': data.get('view_count', 0),
                            'thumbnail': data.get('thumbnail', ''),
                            'description': data.get('description', ''),
                            'url': f"https://www.youtube.com/watch?v={data.get('id')}"
                        })
                    except json.JSONDecodeError:
                        pass

            logger.debug(f"Fetched {len(videos)} videos from {channel_url}")
            return videos
        except asyncio.TimeoutError:
            logger.error(f"Timeout fetching videos from {channel_url}")
            return []
        except Exception as e:
            logger.error(f"Error fetching videos from {channel_url}: {e}")
            return []

    async def fetch_video_metadata(self, video_id: str) -> Dict:
        """
        Fetch full metadata for a single video including upload date, resolution, and thumbnail.

        Args:
            video_id: YouTube video ID

        Returns:
            Dictionary with full video metadata
        """
        try:
            cmd = [
                self.yt_dlp_path,
                f'https://www.youtube.com/watch?v={video_id}',
                '--dump-json',
                '--no-download',
                '--no-warnings',
                '--ignore-errors'
            ]

            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)

            if stdout:
                data = json.loads(stdout.decode().strip())

                # Extract max resolution (height) and corresponding width from formats
                max_resolution = 0
                max_width = 0
                formats = data.get('formats', [])
                for fmt in formats:
                    height = fmt.get('height')
                    if height and isinstance(height, int) and height > max_resolution:
                        # Only count video formats (not audio-only)
                        if fmt.get('vcodec', 'none') != 'none':
                            max_resolution = height
                            # Get the width for this format
                            width = fmt.get('width')
                            if width and isinstance(width, int):
                                max_width = width

                # Get best thumbnail - prefer jpg over webp for better compatibility
                thumbnail = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
                thumbnails = data.get('thumbnails', [])
                if thumbnails:
                    # Filter for jpg thumbnails only (webp may not exist for all videos)
                    jpg_thumbs = [t for t in thumbnails if t.get('url', '').endswith('.jpg')]
                    if jpg_thumbs:
                        # Get highest quality jpg thumbnail
                        best_thumb = max(jpg_thumbs, key=lambda t: t.get('height', 0) or 0)
                        thumbnail = best_thumb.get('url', thumbnail)

                return {
                    'video_id': video_id,
                    'title': data.get('title', ''),
                    'channel_name': data.get('uploader', data.get('channel', '')),
                    'channel_id': data.get('channel_id', ''),
                    'upload_date': data.get('upload_date', ''),
                    'duration': data.get('duration', 0),
                    'view_count': data.get('view_count', 0),
                    'thumbnail': thumbnail,
                    'description': data.get('description', '')[:500] if data.get('description') else '',
                    'max_resolution': max_resolution if max_resolution > 0 else None,
                    'max_width': max_width if max_width > 0 else None,
                    'url': f"https://www.youtube.com/watch?v={video_id}"
                }
        except asyncio.TimeoutError:
            logger.warning(f"Timeout fetching metadata for {video_id}")
        except Exception as e:
            logger.warning(f"Failed to fetch metadata for {video_id}: {e}")
        return {}

    def _matches_phrase(self, title: str, description: str, phrases: List[str]) -> Optional[str]:
        """
        Check if video matches any phrase.
        Also checks hashtag variations (e.g., "Eva Longoria" matches "#EvaLongoria").

        Args:
            title: Video title
            description: Video description
            phrases: List of phrases to match

        Returns:
            The matched phrase, or None if no match
        """
        text = f"{title} {description}".lower()

        for phrase in phrases:
            phrase_lower = phrase.lower()

            # Check direct match
            if phrase_lower in text:
                return phrase

            # Check hashtag variation (e.g., "Eva Longoria" -> "#evalongoria")
            # Remove spaces, hyphens, underscores from phrase for hashtag matching
            hashtag_phrase = '#' + phrase_lower.replace(' ', '').replace('-', '').replace('_', '')
            if hashtag_phrase in text:
                return phrase

        return None

    def _add_to_download_queue(self, video: Dict, channel: Dict, quality: str) -> bool:
        """
        Add a matching video directly to the video_download_queue.

        Args:
            video: Video metadata dictionary (should be full metadata from fetch_video_metadata)
            channel: Channel dictionary
            quality: Video quality from global settings

        Returns:
            True if successfully added to queue
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Check if video already exists in queue
            cursor.execute('''
                SELECT 1 FROM video_download_queue
                WHERE platform = 'youtube' AND video_id = ?
            ''', (video['video_id'],))

            if cursor.fetchone():
                logger.debug(f"Video {video['video_id']} already in queue")
                return False

            # Parse upload date if available (format: YYYYMMDD)
            upload_date = None
            if video.get('upload_date'):
                try:
                    upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
                except (ValueError, TypeError):
                    pass  # Skip invalid date formats

            # Insert into queue with all metadata fields matching celebrity discovery
            cursor.execute('''
                INSERT INTO video_download_queue
                (platform, video_id, url, title, channel_name, thumbnail, duration,
                 upload_date, view_count, max_resolution, max_width, description, source_type,
                 source_name, priority, status, metadata)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                'youtube',
                video['video_id'],
                video['url'],
                video['title'],
                video.get('channel_name', channel.get('channel_name', '')),
                video.get('thumbnail', ''),
                video.get('duration', 0),
                upload_date,
                video.get('view_count', 0),
                video.get('max_resolution'),  # Now included from full metadata
                video.get('max_width'),  # Video width for aspect ratio
                video.get('description', '')[:500] if video.get('description') else None,
                'youtube_monitor',
                f"Monitor: {channel.get('channel_name', channel['channel_url'])}",
                5,  # Default priority
                'pending',
                json.dumps({
                    'channel_id': channel['id'],
                    'quality': quality,
                    'output_path': self.default_output_path,
                    'matched_from': 'youtube_channel_monitor'
                })
            ))
            conn.commit()
            logger.info(f"Added video '{video['title'][:50]}' to download queue (res: {video.get('max_resolution', 'N/A')}p)")
            return True
        except sqlite3.IntegrityError:
            logger.debug(f"Video {video['video_id']} already exists in queue (integrity error)")
            return False
        except Exception as e:
            logger.error(f"Failed to add video to queue: {e}")
            return False
        finally:
            conn.close()

    def _get_or_create_monitor_preset(self, celebrity_id: int, channel_name: str) -> int:
        """Get or create a preset for YouTube Monitor videos."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            preset_name = f"YT Monitor: {channel_name[:30]}"

            # Check if preset exists
            cursor.execute('''
                SELECT id FROM celebrity_search_presets
                WHERE celebrity_id = ? AND name = ?
            ''', (celebrity_id, preset_name))
            row = cursor.fetchone()
            if row:
                return row[0]

            # Create new preset
            cursor.execute('''
                INSERT INTO celebrity_search_presets
                (name, celebrity_id, source_type, source_value, platform, enabled, category)
                VALUES (?, ?, 'youtube_monitor', ?, 'youtube', 1, 'youtube_monitor')
            ''', (preset_name, celebrity_id, channel_name))
            conn.commit()
            return cursor.lastrowid
        finally:
            conn.close()

    def _find_celebrity_by_phrase(self, phrase: str) -> Optional[int]:
        """Find a celebrity ID that matches the phrase (by name)."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            # Try exact match first
            cursor.execute('''
                SELECT id FROM celebrity_profiles
                WHERE LOWER(name) = LOWER(?)
            ''', (phrase,))
            row = cursor.fetchone()
            if row:
                return row[0]

            # Try partial match
            cursor.execute('''
                SELECT id FROM celebrity_profiles
                WHERE LOWER(name) LIKE LOWER(?)
            ''', (f'%{phrase}%',))
            row = cursor.fetchone()
            return row[0] if row else None
        finally:
            conn.close()

    def _add_to_discovery(self, video: Dict, channel: Dict, matched_phrase: str) -> bool:
        """
        Add a matching video to the celebrity discovery page.

        Args:
            video: Video metadata dictionary
            channel: Channel dictionary
            matched_phrase: The phrase that matched (used to find celebrity)

        Returns:
            True if successfully added
        """
        conn = self._get_connection()
        try:
            cursor = conn.cursor()

            # Find celebrity by phrase
            celebrity_id = self._find_celebrity_by_phrase(matched_phrase)
            if not celebrity_id:
                logger.warning(f"No celebrity found for phrase '{matched_phrase}' - skipping")
                return False

            # Get or create preset for this channel
            channel_name = channel.get('channel_name', channel['channel_url'].split('@')[-1])
            preset_id = self._get_or_create_monitor_preset(celebrity_id, channel_name)

            # Check if video already exists in discovery
            cursor.execute('''
                SELECT 1 FROM celebrity_discovered_videos
                WHERE video_id = ? AND platform = 'youtube'
            ''', (video['video_id'],))

            if cursor.fetchone():
                logger.debug(f"Video {video['video_id']} already in discovery")
                return False

            # Parse upload date if available (format: YYYYMMDD)
            upload_date = None
            if video.get('upload_date'):
                try:
                    upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
                except (ValueError, TypeError):
                    pass  # Skip invalid date formats

            # Insert into celebrity_discovered_videos
            cursor.execute('''
                INSERT INTO celebrity_discovered_videos
                (preset_id, celebrity_id, video_id, platform, url, title, channel_name,
                 channel_id, thumbnail, duration, upload_date, view_count, description,
                 content_type, status, max_resolution, max_width, metadata)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                preset_id,
                celebrity_id,
                video['video_id'],
                'youtube',
                video['url'],
                video['title'],
                video.get('channel_name', channel.get('channel_name', '')),
                video.get('channel_id', ''),
                video.get('thumbnail', ''),
                video.get('duration', 0),
                upload_date,
                video.get('view_count', 0),
                video.get('description', '')[:500] if video.get('description') else None,
                'youtube_monitor',
                'new',
                video.get('max_resolution'),
                video.get('max_width'),
                json.dumps({
                    'monitor_channel_id': channel['id'],
                    'monitor_channel_name': channel_name,
                    'matched_phrase': matched_phrase
                })
            ))
            conn.commit()

            # Pre-cache thumbnail for faster page loading
            thumbnail_url = video.get('thumbnail', '')
            if thumbnail_url:
                self._cache_thumbnail(video['video_id'], thumbnail_url, cursor, conn)

            # Update monitor's channel_id if not set (for accurate future matching)
            if video.get('channel_id'):
                cursor.execute('''
                    UPDATE youtube_channel_monitors
                    SET channel_id = ?
                    WHERE id = ? AND (channel_id IS NULL OR channel_id = '')
                ''', (video['channel_id'], channel['id']))
                conn.commit()

            logger.info(f"Added video '{video['title'][:50]}' to discovery (res: {video.get('max_resolution', 'N/A')}p)")
            return True
        except sqlite3.IntegrityError:
            logger.debug(f"Video {video['video_id']} already exists in discovery (integrity error)")
            return False
        except Exception as e:
            logger.error(f"Failed to add video to discovery: {e}")
            return False
        finally:
            conn.close()

    def _cache_thumbnail(self, video_id: str, thumbnail_url: str, cursor, conn) -> None:
        """
        Pre-cache thumbnail by fetching from URL and storing in database.
        This speeds up Internet Discovery page loading.
        """
        try:
            import requests
            response = requests.get(thumbnail_url, timeout=10, headers={
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
            })
            if response.status_code == 200 and response.content:
                cursor.execute('''
                    UPDATE celebrity_discovered_videos
                    SET thumbnail_data = ?
                    WHERE video_id = ?
                ''', (response.content, video_id))
                conn.commit()
                logger.debug(f"Cached thumbnail for {video_id}")
        except Exception as e:
            logger.debug(f"Failed to cache thumbnail for {video_id}: {e}")

    # =========================================================================
    # MAIN CHECK METHODS
    # =========================================================================

    async def check_channel(self, channel: Dict, phrases: List[str], quality: str) -> int:
        """
        Check a single channel for matching videos by searching for each phrase.

        Args:
            channel: Channel dictionary
            phrases: Global phrases to search for
            quality: Global quality setting

        Returns:
            Number of new videos added to Internet Discovery
        """
        channel_name = channel.get('channel_name') or channel['channel_url']

        if not phrases:
            logger.warning(f"No global phrases configured - skipping check")
            return 0

        logger.info(f"Checking channel: {channel_name}")

        videos_added = 0
        seen_video_ids = set()
        most_recent_upload = None  # Track most recent video upload date

        # First, get the channel's latest videos (no search) to track last upload date
        # This is needed for auto-pause logic even if no videos match the search phrases
        try:
            # Get just the most recent video with full metadata to get upload_date
            most_recent_upload = await self._get_channel_latest_upload_date(channel['channel_url'])
            if most_recent_upload:
                logger.debug(f"Channel latest upload: {most_recent_upload}")
        except Exception as e:
            logger.debug(f"Could not fetch latest upload date for {channel_name}: {e}")

        # Search for each phrase on the channel
        settings = self.get_global_settings()
        max_results = settings.get('max_results_per_phrase', 100)

        for phrase in phrases:
            # Search the channel for this phrase
            videos = await self.get_channel_videos(channel['channel_url'], max_results=max_results, search_phrase=phrase)

            logger.debug(f"Found {len(videos)} videos searching for '{phrase}' on {channel_name}")

            for video in videos:
                video_id = video.get('video_id')
                if not video_id:
                    continue

                # Track most recent upload date
                upload_date = video.get('upload_date')
                if upload_date:
                    if not most_recent_upload or upload_date > most_recent_upload:
                        most_recent_upload = upload_date

                # Skip duplicates within this check (same video found by multiple phrases)
                if video_id in seen_video_ids:
                    continue
                seen_video_ids.add(video_id)

                # Skip if already processed
                if self._is_video_processed(channel['id'], video_id):
                    continue

                # Verify the phrase actually appears in title (not description)
                # YouTube search can return related/recommended content
                matched_phrase = self._matches_phrase(
                    video.get('title', ''),
                    '',  # Only match on title, not description
                    [phrase]
                )

                if matched_phrase:
                    # Fetch full metadata for matching video (includes resolution, thumbnail, etc.)
                    logger.info(f"Matched phrase '{matched_phrase}' - fetching full metadata for: {video['title'][:60]}")
                    full_video = await self.fetch_video_metadata(video_id)

                    if full_video:
                        # IMPORTANT: Verify the video is actually from the monitored channel
                        # YouTube's channel search can return videos from other channels
                        video_channel_name = (full_video.get('channel_name') or '').lower().strip()
                        monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
                        video_channel_id = (full_video.get('channel_id') or '').lower().strip()

                        # Check if channel matches (by name or by channel ID in URL)
                        channel_url_lower = (channel.get('channel_url') or '').lower()
                        channel_matches = (
                            video_channel_name == monitored_channel_name or
                            (video_channel_id and video_channel_id in channel_url_lower) or
                            (video_channel_name and video_channel_name in channel_url_lower)
                        )

                        if not channel_matches:
                            logger.debug(f"Skipping video from different channel: '{full_video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
                            self._record_video_processed(
                                channel['id'], video_id,
                                full_video.get('title', video.get('title', '')),
                                matched_phrase, 'wrong_channel'
                            )
                            continue

                        # Use full metadata - add to discovery page
                        if self._add_to_discovery(full_video, channel, matched_phrase):
                            self._record_video_processed(
                                channel['id'], video_id,
                                full_video.get('title', video.get('title', '')),
                                matched_phrase, 'discovered'
                            )
                            videos_added += 1
                        else:
                            # Already in discovery or failed
                            self._record_video_processed(
                                channel['id'], video_id,
                                full_video.get('title', video.get('title', '')),
                                matched_phrase, 'skipped'
                            )
                    else:
                        # Fallback to basic info if full metadata fetch fails
                        logger.warning(f"Could not fetch full metadata for {video_id}, using basic info")

                        # Still verify channel matches using basic info
                        video_channel_name = (video.get('channel_name') or '').lower().strip()
                        monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
                        channel_url_lower = (channel.get('channel_url') or '').lower()

                        channel_matches = (
                            video_channel_name == monitored_channel_name or
                            (video_channel_name and video_channel_name in channel_url_lower)
                        )

                        if not channel_matches:
                            logger.debug(f"Skipping video from different channel: '{video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
                            self._record_video_processed(
                                channel['id'], video_id,
                                video.get('title', ''), matched_phrase, 'wrong_channel'
                            )
                            continue

                        if self._add_to_discovery(video, channel, matched_phrase):
                            self._record_video_processed(
                                channel['id'], video_id,
                                video.get('title', ''), matched_phrase, 'discovered'
                            )
                            videos_added += 1
                        else:
                            self._record_video_processed(
                                channel['id'], video_id,
                                video.get('title', ''), matched_phrase, 'skipped'
                            )

                    # Small delay between metadata fetches to avoid rate limiting
                    await asyncio.sleep(1)

            # Small delay between phrase searches
            if len(phrases) > 1:
                await asyncio.sleep(2)

        # Update channel stats
        self._update_channel_stats(channel['id'], videos_added, most_recent_upload)

        # Check if this channel should be auto-paused due to inactivity
        await self._check_channel_for_auto_pause(channel['id'])

        return videos_added

    async def check_single_channel(self, channel_id: int) -> int:
        """
        Check a single channel by ID (for manual trigger).

        Args:
            channel_id: ID of the channel to check

        Returns:
            Number of new videos added to Internet Discovery
        """
        channel = self.get_channel(channel_id)
        if not channel:
            logger.error(f"Channel {channel_id} not found")
            return 0

        settings = self.get_global_settings()
        phrases = settings.get('phrases', [])
        quality = settings.get('quality', 'best')

        if not phrases:
            logger.warning(f"No global phrases configured")
            return 0

        videos_added = await self.check_channel(channel, phrases, quality)

        # Auto-start queue if enabled and videos were added
        if videos_added > 0 and settings.get('auto_start_queue'):
            await self._trigger_queue_start()

        return videos_added

    async def run_check_cycle(self) -> int:
        """
        Main entry point - check all enabled channels using global settings.

        Returns:
            Total number of new videos added to Internet Discovery
        """
        settings = self.get_global_settings()

        if not settings.get('enabled'):
            logger.debug("YouTube channel monitoring is disabled globally")
            return 0

        phrases = settings.get('phrases', [])
        quality = settings.get('quality', 'best')

        if not phrases:
            logger.debug("No global phrases configured for YouTube monitoring")
            return 0

        channels = self.get_active_channels()

        if not channels:
            logger.debug("No active YouTube channels to monitor")
            return 0

        total_added = 0
        total_channels = len(channels)

        logger.info(f"Running YouTube channel monitor: {total_channels} channels, phrases: {phrases}")

        # Start background task tracking (separate from main scheduler activity)
        if self.activity_manager:
            self.activity_manager.start_background_task(
                'youtube_monitor',
                'youtube_channel_monitor',
                'YouTube Channel Monitor',
                'Running',
                {'total_channels': total_channels, 'videos_found': 0}
            )

        # Randomize order to avoid detection patterns
        random.shuffle(channels)

        for idx, channel in enumerate(channels, 1):
            try:
                channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')

                # Update background task status
                if self.activity_manager:
                    self.activity_manager.update_background_task(
                        'youtube_monitor',
                        f'Checking: {channel_name}',
                        idx, total_channels,
                        {'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
                    )

                videos_added = await self.check_channel(channel, phrases, quality)

                # Update status if we found new videos
                if videos_added > 0 and self.activity_manager:
                    self.activity_manager.update_background_task(
                        'youtube_monitor',
                        f'Found {videos_added} new in {channel_name}',
                        idx, total_channels,
                        {'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
                    )

                total_added += videos_added

                # Delay between channel checks (with jitter to avoid detection)
                base_delay = 4 + random.uniform(0, 2)  # 4-6 seconds
                await asyncio.sleep(base_delay)

                # Batch pause every 50 channels to reduce rate limiting
                if idx % 50 == 0 and idx < total_channels:
                    logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
                    if self.activity_manager:
                        self.activity_manager.update_background_task(
                            'youtube_monitor',
                            f'Rate limit pause ({idx}/{total_channels})',
                            idx, total_channels,
                            {'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
                        )
                    await asyncio.sleep(30)

            except Exception as e:
                logger.error(f"Error checking channel {channel['id']}: {e}")

        # Update global last_checked
        self._update_last_checked()

        # Note: Auto-pause now happens per-channel in check_channel() for real-time feedback

        # Stop background task tracking
        if self.activity_manager:
            self.activity_manager.stop_background_task('youtube_monitor')

        if total_added > 0:
            logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")

            # Send notification if enabled
            if settings.get('notifications_enabled'):
                self._send_notification(total_added)

            # Auto-start queue if enabled
            if settings.get('auto_start_queue'):
                await self._trigger_queue_start()
        else:
            logger.debug("YouTube channel monitor complete: no new matching videos")

        return total_added

    async def _trigger_queue_start(self):
        """Trigger the video download queue to start processing."""
        try:
            # Try direct access first (works when running within API process)
            from web.backend.routers.video_queue import queue_processor, get_app_state
            import asyncio

            if queue_processor.is_running and not queue_processor.is_paused:
                logger.info("Auto-start: Queue processor already running")
                return

            if queue_processor.is_paused:
                queue_processor.resume()
                logger.info("Auto-start: Queue processor resumed")
                return

            app_state = get_app_state()

            # Check if app_state.db is available (may be None when running from scheduler)
            if app_state is None or app_state.db is None:
                logger.debug("Auto-start: app_state.db not available, skipping")
                return

            # Check if there are pending items
            with app_state.db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT COUNT(*) FROM video_download_queue WHERE status = 'pending'")
                pending_count = cursor.fetchone()[0]

            if pending_count == 0:
                logger.debug("Auto-start: No pending items in queue")
                return

            # Start the processor
            queue_processor.start(app_state.db)
            logger.info(f"Auto-started download queue: {pending_count} pending items")

        except ImportError:
            # Running from scheduler process - fall back to HTTP (won't work without auth)
            logger.debug("Auto-start: Running outside API process, queue must be started manually")
        except Exception as e:
            logger.warning(f"Could not auto-start download queue: {e}")

    def _send_notification(self, videos_added: int):
        """
        Send a Pushover notification about new videos added.

        Args:
            videos_added: Number of videos added to Internet Discovery
        """
        try:
            import random
            from modules.pushover_notifier import PushoverNotifier
            from modules.settings_manager import SettingsManager
            from modules.unified_database import UnifiedDatabase

            # Get pushover config from settings
            settings_manager = SettingsManager(self.db_path)
            pushover_config = settings_manager.get('pushover', {})

            if not pushover_config.get('enabled'):
                logger.debug("Pushover notifications disabled globally")
                return

            # Create unified_db for recording notification to database
            unified_db = UnifiedDatabase(self.db_path)

            # Create notifier with unified_db so notification is recorded
            notifier = PushoverNotifier(
                api_token=pushover_config.get('api_token'),
                user_key=pushover_config.get('user_key'),
                unified_db=unified_db
            )

            # Get thumbnail and channel summary from videos just added to Internet Discovery
            image_path = None
            channel_summary = ""
            channel_list = []
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                # Get only the videos from this batch (most recent N videos from celebrity_discovered_videos)
                cursor.execute('''
                    SELECT thumbnail, title, channel_name FROM celebrity_discovered_videos
                    WHERE content_type = 'youtube_monitor' AND thumbnail IS NOT NULL AND thumbnail != ''
                    ORDER BY discovered_at DESC
                    LIMIT ?
                ''', (videos_added,))
                rows = cursor.fetchall()
                if rows:
                    # Pick a random thumbnail from this batch
                    selected = random.choice(rows)
                    thumbnail_url = selected['thumbnail']

                    # Build channel summary from unique channels in THIS batch only
                    uploaders = {}
                    for row in rows:
                        uploader = row['channel_name'] or 'Unknown'
                        uploaders[uploader] = uploaders.get(uploader, 0) + 1
                        if uploader not in channel_list:
                            channel_list.append(uploader)

                    # Format: "Channel1 (3), Channel2 (2)" - only if count > 1
                    channel_parts = [f"{name} ({count})" if count > 1 else name
                                    for name, count in sorted(uploaders.items(), key=lambda x: -x[1])[:5]]
                    if channel_parts:
                        channel_summary = "\n\nFrom: " + ", ".join(channel_parts)

                    # Download thumbnail to temp file
                    if thumbnail_url:
                        import urllib.request
                        import tempfile
                        try:
                            temp_dir = tempfile.gettempdir()
                            temp_path = f"{temp_dir}/yt_thumb_{random.randint(1000, 9999)}.jpg"
                            urllib.request.urlretrieve(thumbnail_url, temp_path)
                            image_path = temp_path
                            logger.debug(f"Downloaded thumbnail for notification: {temp_path}")
                        except Exception as e:
                            logger.debug(f"Could not download thumbnail: {e}")
            finally:
                conn.close()

            # Build message
            title = "YouTube Monitor"
            message = f"Added {videos_added} new video{'s' if videos_added > 1 else ''} to Internet Discovery{channel_summary}"

            # Set notification context for database recording
            notifier._current_notification_context = {
                'platform': 'youtube',
                'source': 'youtube_monitor',
                'content_type': 'video',
                'download_count': videos_added,
                'metadata': {'channels': channel_list}
            }

            # Send notification
            success = notifier.send_notification(
                title=title,
                message=message,
                priority=0,  # Normal priority
                image_path=image_path
            )

            # Clean up temp file
            if image_path:
                try:
                    import os
                    os.unlink(image_path)
                except OSError:
                    pass  # Best effort cleanup of temp file

            if success:
                logger.info(f"Sent notification: {videos_added} videos added")
            else:
                logger.debug("Notification not sent (disabled or failed)")

        except Exception as e:
            logger.warning(f"Could not send notification: {e}")

    async def check_all_now(self, from_scheduler: bool = False) -> int:
        """
        Force check all channels immediately (ignoring interval).

        Args:
            from_scheduler: If True, send push notifications (scheduler runs only)

        Returns:
            Total number of new videos added to Internet Discovery
        """
        settings = self.get_global_settings()
        phrases = settings.get('phrases', [])
        quality = settings.get('quality', 'best')

        if not phrases:
            logger.warning("No global phrases configured")
            return 0

        channels = self.get_enabled_channels()

        if not channels:
            logger.warning("No enabled YouTube channels to monitor")
            return 0

        total_added = 0
        total_channels = len(channels)

        logger.info(f"Force checking all YouTube channels: {total_channels} channels")

        # Crash recovery checkpoint
        from modules.task_checkpoint import TaskCheckpoint
        checkpoint = TaskCheckpoint('youtube_channel_monitor', 'background')
        checkpoint.start(total_items=total_channels)
        if checkpoint.is_recovering():
            logger.info(f"YouTube monitor: recovering — skipping already-checked channels")

        # Start background task tracking (separate from main scheduler activity)
        if self.activity_manager:
            self.activity_manager.start_background_task(
                'youtube_monitor',
                'youtube_channel_monitor',
                'YouTube Channel Monitor',
                'Running',
                {'total_channels': total_channels, 'videos_found': 0}
            )

        # Randomize order to avoid detection patterns
        random.shuffle(channels)

        for idx, channel in enumerate(channels, 1):
            try:
                channel_id = str(channel.get('id', ''))
                channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')

                if checkpoint.is_completed(channel_id):
                    continue

                checkpoint.set_current(channel_id)

                if self.activity_manager:
                    self.activity_manager.update_background_task(
                        'youtube_monitor',
                        f'Checking: {channel_name}',
                        idx, total_channels,
                        {'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
                    )

                videos_added = await self.check_channel(channel, phrases, quality)

                # Update status if we found new videos
                if videos_added > 0 and self.activity_manager:
                    self.activity_manager.update_background_task(
                        'youtube_monitor',
                        f'Found {videos_added} new in {channel_name}',
                        idx, total_channels,
                        {'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
                    )

                total_added += videos_added

                checkpoint.mark_completed(channel_id)

                # Delay between channel checks (with jitter to avoid detection)
                base_delay = 4 + random.uniform(0, 2)  # 4-6 seconds
                await asyncio.sleep(base_delay)

                # Batch pause every 50 channels to reduce rate limiting
                if idx % 50 == 0 and idx < total_channels:
                    logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
                    if self.activity_manager:
                        self.activity_manager.update_background_task(
                            'youtube_monitor',
                            f'Rate limit pause ({idx}/{total_channels})',
                            idx, total_channels,
                            {'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
                        )
                    await asyncio.sleep(30)

            except Exception as e:
                logger.error(f"Error checking channel {channel['id']}: {e}")

        # Update global last_checked
        self._update_last_checked()

        # Checkpoint complete
        checkpoint.finish()

        # Stop background task tracking
        if self.activity_manager:
            self.activity_manager.stop_background_task('youtube_monitor')

        if total_added > 0:
            logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")

            # Send notification if enabled (only for scheduler runs)
            if from_scheduler and settings.get('notifications_enabled'):
                self._send_notification(total_added)

            # Auto-start the download queue if configured
            if settings.get('auto_start_queue'):
                await self._trigger_queue_start()
        else:
            logger.debug("YouTube channel monitor complete: no new matching videos")

        return total_added

    def run_sync(self) -> int:
        """
        Synchronous wrapper for run_check_cycle.
        Used by scheduler which expects synchronous callbacks.

        Returns:
            Total number of new videos added to Internet Discovery
        """
        try:
            loop = asyncio.get_event_loop()
        except RuntimeError:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)

        return loop.run_until_complete(self.run_check_cycle())


# Convenience function for external use
def create_youtube_monitor(db_path: str, activity_manager=None) -> YouTubeChannelMonitor:
    """Create a YouTubeChannelMonitor instance."""
    return YouTubeChannelMonitor(db_path, activity_manager)