#!/usr/bin/env python3 """ YouTube Channel Monitor Module Monitors specified YouTube channels for new videos matching global phrases, then automatically adds matching videos to the download queue. Design: - Global settings (phrases, interval, quality) apply to ALL channels - Channels are just URLs to monitor - no per-channel configuration - All channels are checked together when the interval triggers """ import asyncio import json import random import re import sqlite3 from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple from modules.universal_logger import get_logger logger = get_logger('YouTubeMonitor') class YouTubeChannelMonitor: """ Background monitor for YouTube channels. Uses global phrases and interval settings for all channels. """ def __init__(self, db_path: str, activity_manager=None): """ Initialize the YouTube Channel Monitor. Args: db_path: Path to the SQLite database activity_manager: Optional activity manager for status updates """ self.db_path = db_path self.activity_manager = activity_manager self.yt_dlp_path = '/opt/media-downloader/venv/bin/yt-dlp' self.default_output_path = '/opt/immich/md/youtube/' def _get_connection(self) -> sqlite3.Connection: """Get a database connection with row factory.""" conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row return conn # ========================================================================= # GLOBAL SETTINGS METHODS # ========================================================================= def get_global_settings(self) -> Dict: """Get the global monitor settings.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT phrases, check_interval_hours, quality, enabled, last_checked, updated_at, auto_start_queue, notifications_enabled, auto_pause_threshold_months, paused_check_interval_days, max_results_per_phrase FROM youtube_monitor_settings WHERE id = 1 ''') row = cursor.fetchone() if row: settings = dict(row) try: settings['phrases'] = json.loads(settings['phrases']) except (json.JSONDecodeError, TypeError, ValueError): settings['phrases'] = [] # Ensure all fields are present with defaults if 'auto_start_queue' not in settings: settings['auto_start_queue'] = 0 if 'notifications_enabled' not in settings: settings['notifications_enabled'] = 1 if 'auto_pause_threshold_months' not in settings: settings['auto_pause_threshold_months'] = 24 if 'paused_check_interval_days' not in settings: settings['paused_check_interval_days'] = 14 if 'max_results_per_phrase' not in settings: settings['max_results_per_phrase'] = 100 return settings # Return defaults if no row exists return { 'phrases': [], 'check_interval_hours': 6, 'quality': 'best', 'enabled': 1, 'last_checked': None, 'updated_at': None, 'auto_start_queue': 0, 'notifications_enabled': 1, 'auto_pause_threshold_months': 24, 'paused_check_interval_days': 14, 'max_results_per_phrase': 100 } finally: conn.close() def update_global_settings(self, phrases: List[str] = None, check_interval_hours: int = None, quality: str = None, enabled: bool = None, auto_start_queue: bool = None, notifications_enabled: bool = None, auto_pause_threshold_months: int = None, paused_check_interval_days: int = None, max_results_per_phrase: int = None) -> bool: """ Update global monitor settings. Args: phrases: List of phrases to match in video titles/descriptions check_interval_hours: How often to check all channels quality: Video quality preference enabled: Whether monitoring is enabled globally auto_start_queue: Whether to auto-start the download queue after adding videos notifications_enabled: Whether to send notifications when videos are added auto_pause_threshold_months: Months of inactivity before auto-pausing channels paused_check_interval_days: Days between re-checking paused channels max_results_per_phrase: Maximum number of videos to process per search phrase Returns: True if update was successful """ conn = self._get_connection() try: cursor = conn.cursor() # Build update parts updates = [] values = [] if phrases is not None: updates.append('phrases = ?') values.append(json.dumps(phrases)) if check_interval_hours is not None: updates.append('check_interval_hours = ?') values.append(check_interval_hours) if quality is not None: updates.append('quality = ?') values.append(quality) if enabled is not None: updates.append('enabled = ?') values.append(1 if enabled else 0) if auto_start_queue is not None: updates.append('auto_start_queue = ?') values.append(1 if auto_start_queue else 0) if notifications_enabled is not None: updates.append('notifications_enabled = ?') values.append(1 if notifications_enabled else 0) if auto_pause_threshold_months is not None: updates.append('auto_pause_threshold_months = ?') values.append(auto_pause_threshold_months) if paused_check_interval_days is not None: updates.append('paused_check_interval_days = ?') values.append(paused_check_interval_days) if max_results_per_phrase is not None: updates.append('max_results_per_phrase = ?') values.append(max_results_per_phrase) if not updates: return False updates.append('updated_at = ?') values.append(datetime.now().isoformat()) cursor.execute(f''' UPDATE youtube_monitor_settings SET {', '.join(updates)} WHERE id = 1 ''', values) conn.commit() logger.info(f"Updated global YouTube monitor settings") return cursor.rowcount > 0 finally: conn.close() def _update_last_checked(self): """Update the last_checked timestamp in global settings.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' UPDATE youtube_monitor_settings SET last_checked = ? WHERE id = 1 ''', (datetime.now().isoformat(),)) conn.commit() finally: conn.close() # ========================================================================= # CHANNEL MANAGEMENT METHODS # ========================================================================= def get_all_channels(self) -> List[Dict]: """Get all YouTube channel monitors.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at, status, always_active, last_video_date, last_check_date, paused_date, paused_reason, total_videos_found FROM youtube_channel_monitors ORDER BY created_at DESC ''') return [dict(row) for row in cursor.fetchall()] finally: conn.close() def get_enabled_channels(self) -> List[Dict]: """Get all enabled YouTube channels.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at, status, always_active, last_video_date, last_check_date, paused_date, paused_reason, total_videos_found FROM youtube_channel_monitors WHERE status = 'active' ORDER BY channel_name, channel_url ''') return [dict(row) for row in cursor.fetchall()] finally: conn.close() def get_channel(self, channel_id: int) -> Optional[Dict]: """Get a specific channel by ID.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at FROM youtube_channel_monitors WHERE id = ? ''', (channel_id,)) row = cursor.fetchone() return dict(row) if row else None finally: conn.close() def add_channel(self, channel_url: str, channel_name: str = None, enabled: bool = True) -> int: """ Add a new YouTube channel to monitor. Args: channel_url: YouTube channel URL channel_name: Optional display name for the channel enabled: Whether the channel is enabled Returns: The ID of the created channel """ conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' INSERT INTO youtube_channel_monitors (channel_url, channel_name, enabled) VALUES (?, ?, ?) ''', (channel_url, channel_name, 1 if enabled else 0)) conn.commit() channel_id = cursor.lastrowid logger.info(f"Added YouTube channel {channel_id}: {channel_name or channel_url}") return channel_id finally: conn.close() def update_channel(self, channel_id: int, **kwargs) -> bool: """ Update a YouTube channel. Args: channel_id: ID of the channel to update **kwargs: Fields to update (channel_url, channel_name, enabled) Returns: True if update was successful """ allowed_fields = {'channel_url', 'channel_name', 'enabled'} updates = {} for key, value in kwargs.items(): if key in allowed_fields: if key == 'enabled': updates[key] = 1 if value else 0 else: updates[key] = value if not updates: return False conn = self._get_connection() try: set_clause = ', '.join(f'{k} = ?' for k in updates.keys()) values = list(updates.values()) + [channel_id] cursor = conn.cursor() cursor.execute(f''' UPDATE youtube_channel_monitors SET {set_clause} WHERE id = ? ''', values) conn.commit() logger.info(f"Updated YouTube channel {channel_id}") return cursor.rowcount > 0 finally: conn.close() def delete_channel(self, channel_id: int) -> bool: """ Delete a YouTube channel and its history. Args: channel_id: ID of the channel to delete Returns: True if deletion was successful """ conn = self._get_connection() try: cursor = conn.cursor() # Delete history first cursor.execute('DELETE FROM youtube_monitor_history WHERE monitor_id = ?', (channel_id,)) # Delete channel cursor.execute('DELETE FROM youtube_channel_monitors WHERE id = ?', (channel_id,)) conn.commit() logger.info(f"Deleted YouTube channel {channel_id}") return cursor.rowcount > 0 finally: conn.close() async def fetch_channel_id(self, channel_url: str) -> Optional[str]: """ Fetch YouTube channel ID from URL using yt-dlp, with curl/grep fallback. Args: channel_url: YouTube channel URL Returns: Channel ID (UC...) or None if not found """ # Method 1: Try yt-dlp first try: cmd = [ self.yt_dlp_path, '--dump-json', '--playlist-end', '1', f'{channel_url}/videos' ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10) if stdout: data = json.loads(stdout.decode('utf-8')) channel_id = data.get('channel_id') if channel_id and channel_id.startswith('UC'): logger.debug(f"Fetched channel ID via yt-dlp: {channel_id}") return channel_id except (asyncio.TimeoutError, json.JSONDecodeError, Exception) as e: logger.debug(f"yt-dlp method failed for {channel_url}: {e}") # Method 2: Fallback to curl/grep method try: cmd = [ 'curl', '-Ls', channel_url ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10) if stdout: html = stdout.decode('utf-8') # Look for channel ID patterns in the HTML pattern = r'"(?:browseId|externalId|channelId)":"(UC[^"]+)"' match = re.search(pattern, html) if match: channel_id = match.group(1) logger.debug(f"Fetched channel ID via curl/grep: {channel_id}") return channel_id except (asyncio.TimeoutError, Exception) as e: logger.debug(f"curl/grep method failed for {channel_url}: {e}") logger.warning(f"Could not fetch channel ID for {channel_url}") return None # ========================================================================= # STATUS MANAGEMENT METHODS (v11.20.0) # ========================================================================= def get_active_channels(self) -> List[Dict]: """Get channels with status='active'.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at, status, always_active, last_video_date, last_check_date, paused_date, paused_reason, total_videos_found FROM youtube_channel_monitors WHERE status = 'active' ORDER BY channel_name, channel_url ''') return [dict(row) for row in cursor.fetchall()] finally: conn.close() def get_paused_channels(self) -> List[Dict]: """Get channels with status like 'paused_%'.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at, status, always_active, last_video_date, last_check_date, paused_date, paused_reason, total_videos_found FROM youtube_channel_monitors WHERE status LIKE 'paused_%' ORDER BY paused_date DESC ''') return [dict(row) for row in cursor.fetchall()] finally: conn.close() def get_channels_filtered(self, status_filter: str = None, always_active_filter: str = None, search: str = None, sort_field: str = 'name', sort_ascending: bool = True, limit: int = None, offset: int = 0) -> Dict: """ Get channels with server-side filtering, searching, sorting, and pagination. Args: status_filter: 'all', 'active', 'paused_manual', 'paused_auto', 'paused_all' always_active_filter: 'all', 'always_active', 'regular' search: Search term for channel name or URL sort_field: 'name', 'last_checked', 'last_video_date', 'videos_found', 'created_at' sort_ascending: Sort direction limit: Maximum number of results offset: Offset for pagination Returns: Dict with 'channels' list and 'total' count """ conn = self._get_connection() try: cursor = conn.cursor() # Build WHERE clause where_clauses = [] params = [] # Status filter if status_filter and status_filter != 'all': if status_filter == 'active': where_clauses.append("status = 'active'") elif status_filter == 'paused_manual': where_clauses.append("status = 'paused_manual'") elif status_filter == 'paused_auto': where_clauses.append("status = 'paused_auto'") elif status_filter == 'paused_all': where_clauses.append("status LIKE 'paused_%'") # Always active filter if always_active_filter and always_active_filter != 'all': if always_active_filter == 'always_active': where_clauses.append("always_active = 1") elif always_active_filter == 'regular': where_clauses.append("(always_active = 0 OR always_active IS NULL)") # Search filter if search: where_clauses.append("(channel_name LIKE ? OR channel_url LIKE ?)") search_param = f"%{search}%" params.extend([search_param, search_param]) where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else "" # Get total count count_query = f"SELECT COUNT(*) FROM youtube_channel_monitors {where_sql}" cursor.execute(count_query, params) total = cursor.fetchone()[0] # Build ORDER BY clause sort_columns = { 'name': 'LOWER(COALESCE(channel_name, channel_url))', 'last_checked': 'last_check_date', 'last_video_date': 'last_video_date', 'videos_found': 'total_videos_found', 'created_at': 'created_at' } sort_column = sort_columns.get(sort_field, 'LOWER(COALESCE(channel_name, channel_url))') sort_direction = 'ASC' if sort_ascending else 'DESC' order_by = f"ORDER BY {sort_column} {sort_direction}" # Build main query with pagination (using parameterized queries for security) limit_sql = "LIMIT ? OFFSET ?" if limit else "" query = f''' SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at, status, always_active, last_video_date, last_check_date, paused_date, paused_reason, total_videos_found, channel_id FROM youtube_channel_monitors {where_sql} {order_by} {limit_sql} ''' # Add limit/offset to params if pagination is used query_params = list(params) if limit: query_params.extend([limit, offset]) cursor.execute(query, query_params) channels = [dict(row) for row in cursor.fetchall()] return { 'channels': channels, 'total': total } finally: conn.close() def pause_channel(self, channel_id: int, reason: str = None, auto: bool = False) -> bool: """ Pause a channel manually or automatically. Args: channel_id: ID of the channel to pause reason: Optional reason for pausing auto: If True, set status to 'paused_auto', otherwise 'paused_manual' Returns: True if pause was successful """ status = 'paused_auto' if auto else 'paused_manual' conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' UPDATE youtube_channel_monitors SET status = ?, paused_date = ?, paused_reason = ? WHERE id = ? ''', (status, datetime.now().isoformat(), reason, channel_id)) conn.commit() logger.info(f"{'Auto-' if auto else ''}Paused channel {channel_id}: {reason}") return cursor.rowcount > 0 finally: conn.close() def resume_channel(self, channel_id: int) -> bool: """ Resume a paused channel. Args: channel_id: ID of the channel to resume Returns: True if resume was successful """ conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' UPDATE youtube_channel_monitors SET status = 'active', paused_date = NULL, paused_reason = NULL WHERE id = ? ''', (channel_id,)) conn.commit() logger.info(f"Resumed channel {channel_id}") return cursor.rowcount > 0 finally: conn.close() def toggle_always_active(self, channel_id: int, value: bool) -> bool: """ Toggle always_active flag for a channel. Args: channel_id: ID of the channel value: True to enable always_active, False to disable Returns: True if toggle was successful """ conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' UPDATE youtube_channel_monitors SET always_active = ? WHERE id = ? ''', (1 if value else 0, channel_id)) conn.commit() logger.info(f"Set always_active={value} for channel {channel_id}") return cursor.rowcount > 0 finally: conn.close() def get_statistics(self) -> Dict: """Get monitor statistics.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT COUNT(*) as total, SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active, SUM(CASE WHEN status = 'paused_manual' THEN 1 ELSE 0 END) as paused_manual, SUM(CASE WHEN status = 'paused_auto' THEN 1 ELSE 0 END) as paused_auto, SUM(CASE WHEN always_active = 1 THEN 1 ELSE 0 END) as always_active_count, SUM(COALESCE(total_videos_found, 0)) as total_videos FROM youtube_channel_monitors ''') row = cursor.fetchone() return dict(row) if row else { 'total': 0, 'active': 0, 'paused_manual': 0, 'paused_auto': 0, 'always_active_count': 0, 'total_videos': 0 } finally: conn.close() # ========================================================================= # AUTO-PAUSE AND PAUSED-CHECK LOGIC (v11.20.0) # ========================================================================= async def _check_channel_for_auto_pause(self, channel_id: int) -> bool: """ Check if a single channel should be auto-paused based on inactivity or no matched videos. Called immediately after checking each channel. Args: channel_id: ID of the channel to check Returns: True if channel was auto-paused, False otherwise """ from datetime import timedelta settings = self.get_global_settings() threshold_months = settings.get('auto_pause_threshold_months', 24) # Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates) cutoff_date = datetime.now() - timedelta(days=threshold_months * 30) cutoff_str = cutoff_date.strftime('%Y%m%d') conn = self._get_connection() try: cursor = conn.cursor() # Check if this specific channel should be auto-paused cursor.execute(''' SELECT id, channel_name, last_video_date, always_active, status, videos_found, last_check_date FROM youtube_channel_monitors WHERE id = ? ''', (channel_id,)) row = cursor.fetchone() if not row: return False channel_name = row['channel_name'] last_video_date = row['last_video_date'] always_active = row['always_active'] status = row['status'] videos_found = row['videos_found'] last_check_date = row['last_check_date'] # Don't auto-pause if already paused or always_active if status != 'active' or always_active == 1: return False # Auto-pause if channel has been checked but has 0 matched videos if videos_found == 0 and last_check_date: reason = "No matching videos found" cursor.execute(''' UPDATE youtube_channel_monitors SET status = 'paused_auto', paused_date = ?, paused_reason = ? WHERE id = ? ''', (datetime.now().isoformat(), reason, channel_id)) conn.commit() logger.info(f"Auto-paused channel '{channel_name}': {reason}") return True # Auto-pause if channel is inactive (no uploads in threshold period) if last_video_date and last_video_date < cutoff_str: # Calculate days since last upload for the pause reason try: if len(last_video_date) == 8 and last_video_date.isdigit(): last_upload = datetime.strptime(last_video_date, '%Y%m%d') else: last_upload = datetime.fromisoformat(last_video_date) days_inactive = (datetime.now() - last_upload).days reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)" cursor.execute(''' UPDATE youtube_channel_monitors SET status = 'paused_auto', paused_date = ?, paused_reason = ? WHERE id = ? ''', (datetime.now().isoformat(), reason, channel_id)) conn.commit() logger.info(f"Auto-paused channel '{channel_name}': {reason}") return True except (ValueError, TypeError) as e: logger.error(f"Error parsing date for channel {channel_id}: {e}") return False return False finally: conn.close() async def check_for_inactive_channels(self) -> int: """ Check for channels that should be auto-paused based on inactivity. Returns: Number of channels auto-paused """ from datetime import timedelta settings = self.get_global_settings() threshold_months = settings.get('auto_pause_threshold_months', 24) # Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates) cutoff_date = datetime.now() - timedelta(days=threshold_months * 30) cutoff_str = cutoff_date.strftime('%Y%m%d') conn = self._get_connection() try: cursor = conn.cursor() # Find active channels that haven't posted in threshold period # Note: Comparing YYYYMMDD strings works correctly (20231225 < 20241227) cursor.execute(''' SELECT id, channel_name, last_video_date FROM youtube_channel_monitors WHERE status = 'active' AND always_active = 0 AND last_video_date IS NOT NULL AND last_video_date < ? ''', (cutoff_str,)) inactive_channels = cursor.fetchall() paused_count = 0 for row in inactive_channels: channel_id = row['id'] channel_name = row['channel_name'] last_video_date = row['last_video_date'] # Calculate days since last upload try: # Handle both YYYYMMDD format (from yt-dlp) and ISO format if len(last_video_date) == 8 and last_video_date.isdigit(): # YYYYMMDD format from yt-dlp last_upload = datetime.strptime(last_video_date, '%Y%m%d') else: # ISO format last_upload = datetime.fromisoformat(last_video_date) days_inactive = (datetime.now() - last_upload).days reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)" cursor.execute(''' UPDATE youtube_channel_monitors SET status = 'paused_auto', paused_date = ?, paused_reason = ? WHERE id = ? ''', (datetime.now().isoformat(), reason, channel_id)) paused_count += 1 logger.info(f"Auto-paused channel '{channel_name}': {reason}") except (ValueError, TypeError) as e: logger.error(f"Error parsing date for channel {channel_id}: {e}") continue conn.commit() return paused_count finally: conn.close() async def check_paused_channels(self) -> int: """ Periodically check paused channels to see if they've resumed posting. Returns: Number of channels auto-resumed """ from datetime import timedelta settings = self.get_global_settings() check_interval_days = settings.get('paused_check_interval_days', 14) threshold_months = settings.get('auto_pause_threshold_months', 24) # Find paused channels that need checking cutoff_date = datetime.now() - timedelta(days=check_interval_days) conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, channel_url, channel_name, status FROM youtube_channel_monitors WHERE status LIKE 'paused_%' AND (last_check_date IS NULL OR last_check_date < ?) ''', (cutoff_date.isoformat(),)) paused_channels = [dict(row) for row in cursor.fetchall()] finally: conn.close() resumed_count = 0 for channel in paused_channels: try: # Check if channel has new videos (light check - just get latest 5) videos = await self.get_channel_videos(channel['channel_url'], max_results=5) most_recent_upload = None if videos: # Find most recent video for video in videos: upload_date_str = video.get('upload_date') if upload_date_str: if not most_recent_upload or upload_date_str > most_recent_upload: most_recent_upload = upload_date_str # Update last_check_date and last_video_date conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' UPDATE youtube_channel_monitors SET last_check_date = ?, last_video_date = ? WHERE id = ? ''', (datetime.now().isoformat(), most_recent_upload, channel['id'])) conn.commit() finally: conn.close() # Check if auto-paused channel should be resumed if channel['status'] == 'paused_auto' and most_recent_upload: try: # Parse date (yt-dlp format: YYYYMMDD) if len(most_recent_upload) == 8: upload_datetime = datetime.strptime(most_recent_upload, '%Y%m%d') else: upload_datetime = datetime.fromisoformat(most_recent_upload) days_since_upload = (datetime.now() - upload_datetime).days # If upload is recent (within threshold), auto-resume if days_since_upload < (threshold_months * 30): self.resume_channel(channel['id']) resumed_count += 1 logger.info(f"Auto-resumed channel '{channel['channel_name']}' - new upload detected ({days_since_upload} days old)") except (ValueError, TypeError) as e: logger.error(f"Error parsing upload date for channel {channel['id']}: {e}") except Exception as e: logger.error(f"Error checking paused channel {channel['id']} ({channel['channel_name']}): {e}") continue return resumed_count def check_paused_channels_sync(self) -> int: """Synchronous wrapper for check_paused_channels.""" import asyncio loop = asyncio.new_event_loop() try: return loop.run_until_complete(self.check_paused_channels()) finally: loop.close() # ========================================================================= # HISTORY METHODS # ========================================================================= def get_channel_history(self, channel_id: int, limit: int = 50) -> List[Dict]: """Get history for a specific channel.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT id, monitor_id, video_id, video_title, matched_phrase, action, created_at FROM youtube_monitor_history WHERE monitor_id = ? ORDER BY created_at DESC LIMIT ? ''', (channel_id, limit)) return [dict(row) for row in cursor.fetchall()] finally: conn.close() def get_all_history(self, limit: int = 100) -> List[Dict]: """Get combined history for all channels.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT h.id, h.monitor_id, h.video_id, h.video_title, h.matched_phrase, h.action, h.created_at, c.channel_name, c.channel_url FROM youtube_monitor_history h LEFT JOIN youtube_channel_monitors c ON h.monitor_id = c.id ORDER BY h.created_at DESC LIMIT ? ''', (limit,)) return [dict(row) for row in cursor.fetchall()] finally: conn.close() def _is_video_processed(self, channel_id: int, video_id: str) -> bool: """Check if a video has already been processed for a channel.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' SELECT 1 FROM youtube_monitor_history WHERE monitor_id = ? AND video_id = ? ''', (channel_id, video_id)) return cursor.fetchone() is not None finally: conn.close() def _record_video_processed(self, channel_id: int, video_id: str, video_title: str, matched_phrase: str, action: str): """Record that a video has been processed.""" conn = self._get_connection() try: cursor = conn.cursor() cursor.execute(''' INSERT OR IGNORE INTO youtube_monitor_history (monitor_id, video_id, video_title, matched_phrase, action) VALUES (?, ?, ?, ?, ?) ''', (channel_id, video_id, video_title, matched_phrase, action)) conn.commit() except Exception as e: logger.error(f"Failed to record video processed: {e}") finally: conn.close() def _update_channel_stats(self, channel_id: int, videos_added: int, most_recent_upload: str = None): """Update channel statistics after a check.""" conn = self._get_connection() try: cursor = conn.cursor() # Get channel info for accurate matching cursor.execute('SELECT channel_id, channel_name FROM youtube_channel_monitors WHERE id = ?', (channel_id,)) row = cursor.fetchone() if not row: return yt_channel_id, channel_name = row[0], row[1] # Count videos from celebrity_discovered_videos (Internet Discovery database) # This is the shared database that shows on the Internet Discovery page if yt_channel_id: cursor.execute(''' SELECT COUNT(*) FROM celebrity_discovered_videos WHERE ( -- Match by channel_id channel_id = ? OR -- Fallback: match by name if video has no channel_id (channel_id IS NULL OR channel_id = '') AND REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '') ) AND platform = 'youtube' ''', (yt_channel_id, channel_name)) total_count = cursor.fetchone()[0] else: # Fallback to name-only matching if monitor has no channel_id cursor.execute(''' SELECT COUNT(*) FROM celebrity_discovered_videos WHERE REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '') AND platform = 'youtube' ''', (channel_name,)) total_count = cursor.fetchone()[0] # Update last_video_date if we have a new value, or keep existing if we don't cursor.execute(''' UPDATE youtube_channel_monitors SET last_checked = ?, last_check_date = ?, videos_found = ?, total_videos_found = ?, last_video_date = CASE WHEN ? IS NOT NULL THEN ? ELSE last_video_date END WHERE id = ? ''', ( datetime.now().isoformat(), datetime.now().isoformat(), total_count, total_count, most_recent_upload, most_recent_upload, channel_id )) conn.commit() finally: conn.close() # ========================================================================= # VIDEO FETCHING AND MATCHING # ========================================================================= async def _get_channel_latest_upload_date(self, channel_url: str) -> str: """ Get the upload date of the most recent video on a channel. Uses full metadata fetch (not flat-playlist) to get accurate upload_date. Tries multiple URL formats if the first attempt fails. Args: channel_url: URL of the YouTube channel Returns: Upload date string in YYYYMMDD format, or None if not found """ # Try multiple URL formats urls_to_try = [] # First try: /videos suffix base_url = channel_url.rstrip('/') for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']: if base_url.endswith(suffix): base_url = base_url[:-len(suffix)] break urls_to_try.append(f"{base_url}/videos") # Second try: base URL without suffix urls_to_try.append(base_url) # Third try: /streams suffix (for channels that primarily stream) urls_to_try.append(f"{base_url}/streams") for url_attempt in urls_to_try: cmd = [ self.yt_dlp_path, '--playlist-end', '1', # Only get the most recent video '--dump-json', '--no-warnings', '--ignore-errors', '--skip-download', url_attempt ] try: process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30) # Check stderr for specific errors stderr_text = stderr.decode().strip() if 'does not have a' in stderr_text.lower() or '404' in stderr_text: # Try next URL format continue for line in stdout.decode().strip().split('\n'): if line: try: data = json.loads(line) upload_date = data.get('upload_date') if upload_date: logger.debug(f"Successfully fetched upload date {upload_date} from {url_attempt}") return upload_date except json.JSONDecodeError: pass except asyncio.TimeoutError: logger.debug(f"Timeout fetching latest upload date from {url_attempt}") continue except Exception as e: logger.debug(f"Error fetching latest upload date from {url_attempt}: {e}") continue logger.warning(f"Could not fetch latest upload date from {base_url} after trying all URL formats") return None async def get_channel_videos(self, channel_url: str, max_results: int = 20, search_phrase: str = None) -> List[Dict]: """ Fetch videos from a YouTube channel using yt-dlp. Args: channel_url: URL of the YouTube channel max_results: Maximum number of videos to fetch search_phrase: Optional phrase to search within the channel Returns: List of video metadata dictionaries (basic info from flat-playlist) """ # Build the URL based on whether we're searching or fetching recent if search_phrase: # Use channel search URL to find videos matching the phrase # Remove any trailing path from channel URL base_url = channel_url.rstrip('/') for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']: if base_url.endswith(suffix): base_url = base_url[:-len(suffix)] # URL encode the search phrase import urllib.parse encoded_phrase = urllib.parse.quote(search_phrase) channel_url = f"{base_url}/search?query={encoded_phrase}" else: # Ensure URL ends with /videos for recent uploads if not channel_url.endswith('/videos'): if channel_url.endswith('/'): channel_url = channel_url + 'videos' else: channel_url = channel_url + '/videos' cmd = [ self.yt_dlp_path, '--flat-playlist', '--dump-json', '--playlist-end', str(max_results), '--no-warnings', '--ignore-errors', channel_url ] try: process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=90) videos = [] for line in stdout.decode().strip().split('\n'): if line: try: data = json.loads(line) videos.append({ 'video_id': data.get('id'), 'title': data.get('title', ''), 'channel_name': data.get('uploader', data.get('channel', '')), 'channel_id': data.get('channel_id', ''), 'upload_date': data.get('upload_date'), 'duration': data.get('duration', 0), 'view_count': data.get('view_count', 0), 'thumbnail': data.get('thumbnail', ''), 'description': data.get('description', ''), 'url': f"https://www.youtube.com/watch?v={data.get('id')}" }) except json.JSONDecodeError: pass logger.debug(f"Fetched {len(videos)} videos from {channel_url}") return videos except asyncio.TimeoutError: logger.error(f"Timeout fetching videos from {channel_url}") return [] except Exception as e: logger.error(f"Error fetching videos from {channel_url}: {e}") return [] async def fetch_video_metadata(self, video_id: str) -> Dict: """ Fetch full metadata for a single video including upload date, resolution, and thumbnail. Args: video_id: YouTube video ID Returns: Dictionary with full video metadata """ try: cmd = [ self.yt_dlp_path, f'https://www.youtube.com/watch?v={video_id}', '--dump-json', '--no-download', '--no-warnings', '--ignore-errors' ] process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30) if stdout: data = json.loads(stdout.decode().strip()) # Extract max resolution (height) and corresponding width from formats max_resolution = 0 max_width = 0 formats = data.get('formats', []) for fmt in formats: height = fmt.get('height') if height and isinstance(height, int) and height > max_resolution: # Only count video formats (not audio-only) if fmt.get('vcodec', 'none') != 'none': max_resolution = height # Get the width for this format width = fmt.get('width') if width and isinstance(width, int): max_width = width # Get best thumbnail - prefer jpg over webp for better compatibility thumbnail = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg" thumbnails = data.get('thumbnails', []) if thumbnails: # Filter for jpg thumbnails only (webp may not exist for all videos) jpg_thumbs = [t for t in thumbnails if t.get('url', '').endswith('.jpg')] if jpg_thumbs: # Get highest quality jpg thumbnail best_thumb = max(jpg_thumbs, key=lambda t: t.get('height', 0) or 0) thumbnail = best_thumb.get('url', thumbnail) return { 'video_id': video_id, 'title': data.get('title', ''), 'channel_name': data.get('uploader', data.get('channel', '')), 'channel_id': data.get('channel_id', ''), 'upload_date': data.get('upload_date', ''), 'duration': data.get('duration', 0), 'view_count': data.get('view_count', 0), 'thumbnail': thumbnail, 'description': data.get('description', '')[:500] if data.get('description') else '', 'max_resolution': max_resolution if max_resolution > 0 else None, 'max_width': max_width if max_width > 0 else None, 'url': f"https://www.youtube.com/watch?v={video_id}" } except asyncio.TimeoutError: logger.warning(f"Timeout fetching metadata for {video_id}") except Exception as e: logger.warning(f"Failed to fetch metadata for {video_id}: {e}") return {} def _matches_phrase(self, title: str, description: str, phrases: List[str]) -> Optional[str]: """ Check if video matches any phrase. Also checks hashtag variations (e.g., "Eva Longoria" matches "#EvaLongoria"). Args: title: Video title description: Video description phrases: List of phrases to match Returns: The matched phrase, or None if no match """ text = f"{title} {description}".lower() for phrase in phrases: phrase_lower = phrase.lower() # Check direct match if phrase_lower in text: return phrase # Check hashtag variation (e.g., "Eva Longoria" -> "#evalongoria") # Remove spaces, hyphens, underscores from phrase for hashtag matching hashtag_phrase = '#' + phrase_lower.replace(' ', '').replace('-', '').replace('_', '') if hashtag_phrase in text: return phrase return None def _add_to_download_queue(self, video: Dict, channel: Dict, quality: str) -> bool: """ Add a matching video directly to the video_download_queue. Args: video: Video metadata dictionary (should be full metadata from fetch_video_metadata) channel: Channel dictionary quality: Video quality from global settings Returns: True if successfully added to queue """ conn = self._get_connection() try: cursor = conn.cursor() # Check if video already exists in queue cursor.execute(''' SELECT 1 FROM video_download_queue WHERE platform = 'youtube' AND video_id = ? ''', (video['video_id'],)) if cursor.fetchone(): logger.debug(f"Video {video['video_id']} already in queue") return False # Parse upload date if available (format: YYYYMMDD) upload_date = None if video.get('upload_date'): try: upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat() except (ValueError, TypeError): pass # Skip invalid date formats # Insert into queue with all metadata fields matching celebrity discovery cursor.execute(''' INSERT INTO video_download_queue (platform, video_id, url, title, channel_name, thumbnail, duration, upload_date, view_count, max_resolution, max_width, description, source_type, source_name, priority, status, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( 'youtube', video['video_id'], video['url'], video['title'], video.get('channel_name', channel.get('channel_name', '')), video.get('thumbnail', ''), video.get('duration', 0), upload_date, video.get('view_count', 0), video.get('max_resolution'), # Now included from full metadata video.get('max_width'), # Video width for aspect ratio video.get('description', '')[:500] if video.get('description') else None, 'youtube_monitor', f"Monitor: {channel.get('channel_name', channel['channel_url'])}", 5, # Default priority 'pending', json.dumps({ 'channel_id': channel['id'], 'quality': quality, 'output_path': self.default_output_path, 'matched_from': 'youtube_channel_monitor' }) )) conn.commit() logger.info(f"Added video '{video['title'][:50]}' to download queue (res: {video.get('max_resolution', 'N/A')}p)") return True except sqlite3.IntegrityError: logger.debug(f"Video {video['video_id']} already exists in queue (integrity error)") return False except Exception as e: logger.error(f"Failed to add video to queue: {e}") return False finally: conn.close() def _get_or_create_monitor_preset(self, celebrity_id: int, channel_name: str) -> int: """Get or create a preset for YouTube Monitor videos.""" conn = self._get_connection() try: cursor = conn.cursor() preset_name = f"YT Monitor: {channel_name[:30]}" # Check if preset exists cursor.execute(''' SELECT id FROM celebrity_search_presets WHERE celebrity_id = ? AND name = ? ''', (celebrity_id, preset_name)) row = cursor.fetchone() if row: return row[0] # Create new preset cursor.execute(''' INSERT INTO celebrity_search_presets (name, celebrity_id, source_type, source_value, platform, enabled, category) VALUES (?, ?, 'youtube_monitor', ?, 'youtube', 1, 'youtube_monitor') ''', (preset_name, celebrity_id, channel_name)) conn.commit() return cursor.lastrowid finally: conn.close() def _find_celebrity_by_phrase(self, phrase: str) -> Optional[int]: """Find a celebrity ID that matches the phrase (by name).""" conn = self._get_connection() try: cursor = conn.cursor() # Try exact match first cursor.execute(''' SELECT id FROM celebrity_profiles WHERE LOWER(name) = LOWER(?) ''', (phrase,)) row = cursor.fetchone() if row: return row[0] # Try partial match cursor.execute(''' SELECT id FROM celebrity_profiles WHERE LOWER(name) LIKE LOWER(?) ''', (f'%{phrase}%',)) row = cursor.fetchone() return row[0] if row else None finally: conn.close() def _add_to_discovery(self, video: Dict, channel: Dict, matched_phrase: str) -> bool: """ Add a matching video to the celebrity discovery page. Args: video: Video metadata dictionary channel: Channel dictionary matched_phrase: The phrase that matched (used to find celebrity) Returns: True if successfully added """ conn = self._get_connection() try: cursor = conn.cursor() # Find celebrity by phrase celebrity_id = self._find_celebrity_by_phrase(matched_phrase) if not celebrity_id: logger.warning(f"No celebrity found for phrase '{matched_phrase}' - skipping") return False # Get or create preset for this channel channel_name = channel.get('channel_name', channel['channel_url'].split('@')[-1]) preset_id = self._get_or_create_monitor_preset(celebrity_id, channel_name) # Check if video already exists in discovery cursor.execute(''' SELECT 1 FROM celebrity_discovered_videos WHERE video_id = ? AND platform = 'youtube' ''', (video['video_id'],)) if cursor.fetchone(): logger.debug(f"Video {video['video_id']} already in discovery") return False # Parse upload date if available (format: YYYYMMDD) upload_date = None if video.get('upload_date'): try: upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat() except (ValueError, TypeError): pass # Skip invalid date formats # Insert into celebrity_discovered_videos cursor.execute(''' INSERT INTO celebrity_discovered_videos (preset_id, celebrity_id, video_id, platform, url, title, channel_name, channel_id, thumbnail, duration, upload_date, view_count, description, content_type, status, max_resolution, max_width, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( preset_id, celebrity_id, video['video_id'], 'youtube', video['url'], video['title'], video.get('channel_name', channel.get('channel_name', '')), video.get('channel_id', ''), video.get('thumbnail', ''), video.get('duration', 0), upload_date, video.get('view_count', 0), video.get('description', '')[:500] if video.get('description') else None, 'youtube_monitor', 'new', video.get('max_resolution'), video.get('max_width'), json.dumps({ 'monitor_channel_id': channel['id'], 'monitor_channel_name': channel_name, 'matched_phrase': matched_phrase }) )) conn.commit() # Pre-cache thumbnail for faster page loading thumbnail_url = video.get('thumbnail', '') if thumbnail_url: self._cache_thumbnail(video['video_id'], thumbnail_url, cursor, conn) # Update monitor's channel_id if not set (for accurate future matching) if video.get('channel_id'): cursor.execute(''' UPDATE youtube_channel_monitors SET channel_id = ? WHERE id = ? AND (channel_id IS NULL OR channel_id = '') ''', (video['channel_id'], channel['id'])) conn.commit() logger.info(f"Added video '{video['title'][:50]}' to discovery (res: {video.get('max_resolution', 'N/A')}p)") return True except sqlite3.IntegrityError: logger.debug(f"Video {video['video_id']} already exists in discovery (integrity error)") return False except Exception as e: logger.error(f"Failed to add video to discovery: {e}") return False finally: conn.close() def _cache_thumbnail(self, video_id: str, thumbnail_url: str, cursor, conn) -> None: """ Pre-cache thumbnail by fetching from URL and storing in database. This speeds up Internet Discovery page loading. """ try: import requests response = requests.get(thumbnail_url, timeout=10, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' }) if response.status_code == 200 and response.content: cursor.execute(''' UPDATE celebrity_discovered_videos SET thumbnail_data = ? WHERE video_id = ? ''', (response.content, video_id)) conn.commit() logger.debug(f"Cached thumbnail for {video_id}") except Exception as e: logger.debug(f"Failed to cache thumbnail for {video_id}: {e}") # ========================================================================= # MAIN CHECK METHODS # ========================================================================= async def check_channel(self, channel: Dict, phrases: List[str], quality: str) -> int: """ Check a single channel for matching videos by searching for each phrase. Args: channel: Channel dictionary phrases: Global phrases to search for quality: Global quality setting Returns: Number of new videos added to Internet Discovery """ channel_name = channel.get('channel_name') or channel['channel_url'] if not phrases: logger.warning(f"No global phrases configured - skipping check") return 0 logger.info(f"Checking channel: {channel_name}") videos_added = 0 seen_video_ids = set() most_recent_upload = None # Track most recent video upload date # First, get the channel's latest videos (no search) to track last upload date # This is needed for auto-pause logic even if no videos match the search phrases try: # Get just the most recent video with full metadata to get upload_date most_recent_upload = await self._get_channel_latest_upload_date(channel['channel_url']) if most_recent_upload: logger.debug(f"Channel latest upload: {most_recent_upload}") except Exception as e: logger.debug(f"Could not fetch latest upload date for {channel_name}: {e}") # Search for each phrase on the channel settings = self.get_global_settings() max_results = settings.get('max_results_per_phrase', 100) for phrase in phrases: # Search the channel for this phrase videos = await self.get_channel_videos(channel['channel_url'], max_results=max_results, search_phrase=phrase) logger.debug(f"Found {len(videos)} videos searching for '{phrase}' on {channel_name}") for video in videos: video_id = video.get('video_id') if not video_id: continue # Track most recent upload date upload_date = video.get('upload_date') if upload_date: if not most_recent_upload or upload_date > most_recent_upload: most_recent_upload = upload_date # Skip duplicates within this check (same video found by multiple phrases) if video_id in seen_video_ids: continue seen_video_ids.add(video_id) # Skip if already processed if self._is_video_processed(channel['id'], video_id): continue # Verify the phrase actually appears in title (not description) # YouTube search can return related/recommended content matched_phrase = self._matches_phrase( video.get('title', ''), '', # Only match on title, not description [phrase] ) if matched_phrase: # Fetch full metadata for matching video (includes resolution, thumbnail, etc.) logger.info(f"Matched phrase '{matched_phrase}' - fetching full metadata for: {video['title'][:60]}") full_video = await self.fetch_video_metadata(video_id) if full_video: # IMPORTANT: Verify the video is actually from the monitored channel # YouTube's channel search can return videos from other channels video_channel_name = (full_video.get('channel_name') or '').lower().strip() monitored_channel_name = (channel.get('channel_name') or '').lower().strip() video_channel_id = (full_video.get('channel_id') or '').lower().strip() # Check if channel matches (by name or by channel ID in URL) channel_url_lower = (channel.get('channel_url') or '').lower() channel_matches = ( video_channel_name == monitored_channel_name or (video_channel_id and video_channel_id in channel_url_lower) or (video_channel_name and video_channel_name in channel_url_lower) ) if not channel_matches: logger.debug(f"Skipping video from different channel: '{full_video.get('channel_name')}' (expected '{channel.get('channel_name')}')") self._record_video_processed( channel['id'], video_id, full_video.get('title', video.get('title', '')), matched_phrase, 'wrong_channel' ) continue # Use full metadata - add to discovery page if self._add_to_discovery(full_video, channel, matched_phrase): self._record_video_processed( channel['id'], video_id, full_video.get('title', video.get('title', '')), matched_phrase, 'discovered' ) videos_added += 1 else: # Already in discovery or failed self._record_video_processed( channel['id'], video_id, full_video.get('title', video.get('title', '')), matched_phrase, 'skipped' ) else: # Fallback to basic info if full metadata fetch fails logger.warning(f"Could not fetch full metadata for {video_id}, using basic info") # Still verify channel matches using basic info video_channel_name = (video.get('channel_name') or '').lower().strip() monitored_channel_name = (channel.get('channel_name') or '').lower().strip() channel_url_lower = (channel.get('channel_url') or '').lower() channel_matches = ( video_channel_name == monitored_channel_name or (video_channel_name and video_channel_name in channel_url_lower) ) if not channel_matches: logger.debug(f"Skipping video from different channel: '{video.get('channel_name')}' (expected '{channel.get('channel_name')}')") self._record_video_processed( channel['id'], video_id, video.get('title', ''), matched_phrase, 'wrong_channel' ) continue if self._add_to_discovery(video, channel, matched_phrase): self._record_video_processed( channel['id'], video_id, video.get('title', ''), matched_phrase, 'discovered' ) videos_added += 1 else: self._record_video_processed( channel['id'], video_id, video.get('title', ''), matched_phrase, 'skipped' ) # Small delay between metadata fetches to avoid rate limiting await asyncio.sleep(1) # Small delay between phrase searches if len(phrases) > 1: await asyncio.sleep(2) # Update channel stats self._update_channel_stats(channel['id'], videos_added, most_recent_upload) # Check if this channel should be auto-paused due to inactivity await self._check_channel_for_auto_pause(channel['id']) return videos_added async def check_single_channel(self, channel_id: int) -> int: """ Check a single channel by ID (for manual trigger). Args: channel_id: ID of the channel to check Returns: Number of new videos added to Internet Discovery """ channel = self.get_channel(channel_id) if not channel: logger.error(f"Channel {channel_id} not found") return 0 settings = self.get_global_settings() phrases = settings.get('phrases', []) quality = settings.get('quality', 'best') if not phrases: logger.warning(f"No global phrases configured") return 0 videos_added = await self.check_channel(channel, phrases, quality) # Auto-start queue if enabled and videos were added if videos_added > 0 and settings.get('auto_start_queue'): await self._trigger_queue_start() return videos_added async def run_check_cycle(self) -> int: """ Main entry point - check all enabled channels using global settings. Returns: Total number of new videos added to Internet Discovery """ settings = self.get_global_settings() if not settings.get('enabled'): logger.debug("YouTube channel monitoring is disabled globally") return 0 phrases = settings.get('phrases', []) quality = settings.get('quality', 'best') if not phrases: logger.debug("No global phrases configured for YouTube monitoring") return 0 channels = self.get_active_channels() if not channels: logger.debug("No active YouTube channels to monitor") return 0 total_added = 0 total_channels = len(channels) logger.info(f"Running YouTube channel monitor: {total_channels} channels, phrases: {phrases}") # Start background task tracking (separate from main scheduler activity) if self.activity_manager: self.activity_manager.start_background_task( 'youtube_monitor', 'youtube_channel_monitor', 'YouTube Channel Monitor', 'Running', {'total_channels': total_channels, 'videos_found': 0} ) # Randomize order to avoid detection patterns random.shuffle(channels) for idx, channel in enumerate(channels, 1): try: channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@') # Update background task status if self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Checking: {channel_name}', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name} ) videos_added = await self.check_channel(channel, phrases, quality) # Update status if we found new videos if videos_added > 0 and self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Found {videos_added} new in {channel_name}', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added} ) total_added += videos_added # Delay between channel checks (with jitter to avoid detection) base_delay = 4 + random.uniform(0, 2) # 4-6 seconds await asyncio.sleep(base_delay) # Batch pause every 50 channels to reduce rate limiting if idx % 50 == 0 and idx < total_channels: logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting") if self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Rate limit pause ({idx}/{total_channels})', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'} ) await asyncio.sleep(30) except Exception as e: logger.error(f"Error checking channel {channel['id']}: {e}") # Update global last_checked self._update_last_checked() # Note: Auto-pause now happens per-channel in check_channel() for real-time feedback # Stop background task tracking if self.activity_manager: self.activity_manager.stop_background_task('youtube_monitor') if total_added > 0: logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue") # Send notification if enabled if settings.get('notifications_enabled'): self._send_notification(total_added) # Auto-start queue if enabled if settings.get('auto_start_queue'): await self._trigger_queue_start() else: logger.debug("YouTube channel monitor complete: no new matching videos") return total_added async def _trigger_queue_start(self): """Trigger the video download queue to start processing.""" try: # Try direct access first (works when running within API process) from web.backend.routers.video_queue import queue_processor, get_app_state import asyncio if queue_processor.is_running and not queue_processor.is_paused: logger.info("Auto-start: Queue processor already running") return if queue_processor.is_paused: queue_processor.resume() logger.info("Auto-start: Queue processor resumed") return app_state = get_app_state() # Check if app_state.db is available (may be None when running from scheduler) if app_state is None or app_state.db is None: logger.debug("Auto-start: app_state.db not available, skipping") return # Check if there are pending items with app_state.db.get_connection() as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM video_download_queue WHERE status = 'pending'") pending_count = cursor.fetchone()[0] if pending_count == 0: logger.debug("Auto-start: No pending items in queue") return # Start the processor queue_processor.start(app_state.db) logger.info(f"Auto-started download queue: {pending_count} pending items") except ImportError: # Running from scheduler process - fall back to HTTP (won't work without auth) logger.debug("Auto-start: Running outside API process, queue must be started manually") except Exception as e: logger.warning(f"Could not auto-start download queue: {e}") def _send_notification(self, videos_added: int): """ Send a Pushover notification about new videos added. Args: videos_added: Number of videos added to Internet Discovery """ try: import random from modules.pushover_notifier import PushoverNotifier from modules.settings_manager import SettingsManager from modules.unified_database import UnifiedDatabase # Get pushover config from settings settings_manager = SettingsManager(self.db_path) pushover_config = settings_manager.get('pushover', {}) if not pushover_config.get('enabled'): logger.debug("Pushover notifications disabled globally") return # Create unified_db for recording notification to database unified_db = UnifiedDatabase(self.db_path) # Create notifier with unified_db so notification is recorded notifier = PushoverNotifier( api_token=pushover_config.get('api_token'), user_key=pushover_config.get('user_key'), unified_db=unified_db ) # Get thumbnail and channel summary from videos just added to Internet Discovery image_path = None channel_summary = "" channel_list = [] conn = self._get_connection() try: cursor = conn.cursor() # Get only the videos from this batch (most recent N videos from celebrity_discovered_videos) cursor.execute(''' SELECT thumbnail, title, channel_name FROM celebrity_discovered_videos WHERE content_type = 'youtube_monitor' AND thumbnail IS NOT NULL AND thumbnail != '' ORDER BY discovered_at DESC LIMIT ? ''', (videos_added,)) rows = cursor.fetchall() if rows: # Pick a random thumbnail from this batch selected = random.choice(rows) thumbnail_url = selected['thumbnail'] # Build channel summary from unique channels in THIS batch only uploaders = {} for row in rows: uploader = row['channel_name'] or 'Unknown' uploaders[uploader] = uploaders.get(uploader, 0) + 1 if uploader not in channel_list: channel_list.append(uploader) # Format: "Channel1 (3), Channel2 (2)" - only if count > 1 channel_parts = [f"{name} ({count})" if count > 1 else name for name, count in sorted(uploaders.items(), key=lambda x: -x[1])[:5]] if channel_parts: channel_summary = "\n\nFrom: " + ", ".join(channel_parts) # Download thumbnail to temp file if thumbnail_url: import urllib.request import tempfile try: temp_dir = tempfile.gettempdir() temp_path = f"{temp_dir}/yt_thumb_{random.randint(1000, 9999)}.jpg" urllib.request.urlretrieve(thumbnail_url, temp_path) image_path = temp_path logger.debug(f"Downloaded thumbnail for notification: {temp_path}") except Exception as e: logger.debug(f"Could not download thumbnail: {e}") finally: conn.close() # Build message title = "YouTube Monitor" message = f"Added {videos_added} new video{'s' if videos_added > 1 else ''} to Internet Discovery{channel_summary}" # Set notification context for database recording notifier._current_notification_context = { 'platform': 'youtube', 'source': 'youtube_monitor', 'content_type': 'video', 'download_count': videos_added, 'metadata': {'channels': channel_list} } # Send notification success = notifier.send_notification( title=title, message=message, priority=0, # Normal priority image_path=image_path ) # Clean up temp file if image_path: try: import os os.unlink(image_path) except OSError: pass # Best effort cleanup of temp file if success: logger.info(f"Sent notification: {videos_added} videos added") else: logger.debug("Notification not sent (disabled or failed)") except Exception as e: logger.warning(f"Could not send notification: {e}") async def check_all_now(self, from_scheduler: bool = False) -> int: """ Force check all channels immediately (ignoring interval). Args: from_scheduler: If True, send push notifications (scheduler runs only) Returns: Total number of new videos added to Internet Discovery """ settings = self.get_global_settings() phrases = settings.get('phrases', []) quality = settings.get('quality', 'best') if not phrases: logger.warning("No global phrases configured") return 0 channels = self.get_enabled_channels() if not channels: logger.warning("No enabled YouTube channels to monitor") return 0 total_added = 0 total_channels = len(channels) logger.info(f"Force checking all YouTube channels: {total_channels} channels") # Crash recovery checkpoint from modules.task_checkpoint import TaskCheckpoint checkpoint = TaskCheckpoint('youtube_channel_monitor', 'background') checkpoint.start(total_items=total_channels) if checkpoint.is_recovering(): logger.info(f"YouTube monitor: recovering — skipping already-checked channels") # Start background task tracking (separate from main scheduler activity) if self.activity_manager: self.activity_manager.start_background_task( 'youtube_monitor', 'youtube_channel_monitor', 'YouTube Channel Monitor', 'Running', {'total_channels': total_channels, 'videos_found': 0} ) # Randomize order to avoid detection patterns random.shuffle(channels) for idx, channel in enumerate(channels, 1): try: channel_id = str(channel.get('id', '')) channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@') if checkpoint.is_completed(channel_id): continue checkpoint.set_current(channel_id) if self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Checking: {channel_name}', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name} ) videos_added = await self.check_channel(channel, phrases, quality) # Update status if we found new videos if videos_added > 0 and self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Found {videos_added} new in {channel_name}', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added} ) total_added += videos_added checkpoint.mark_completed(channel_id) # Delay between channel checks (with jitter to avoid detection) base_delay = 4 + random.uniform(0, 2) # 4-6 seconds await asyncio.sleep(base_delay) # Batch pause every 50 channels to reduce rate limiting if idx % 50 == 0 and idx < total_channels: logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting") if self.activity_manager: self.activity_manager.update_background_task( 'youtube_monitor', f'Rate limit pause ({idx}/{total_channels})', idx, total_channels, {'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'} ) await asyncio.sleep(30) except Exception as e: logger.error(f"Error checking channel {channel['id']}: {e}") # Update global last_checked self._update_last_checked() # Checkpoint complete checkpoint.finish() # Stop background task tracking if self.activity_manager: self.activity_manager.stop_background_task('youtube_monitor') if total_added > 0: logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue") # Send notification if enabled (only for scheduler runs) if from_scheduler and settings.get('notifications_enabled'): self._send_notification(total_added) # Auto-start the download queue if configured if settings.get('auto_start_queue'): await self._trigger_queue_start() else: logger.debug("YouTube channel monitor complete: no new matching videos") return total_added def run_sync(self) -> int: """ Synchronous wrapper for run_check_cycle. Used by scheduler which expects synchronous callbacks. Returns: Total number of new videos added to Internet Discovery """ try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop.run_until_complete(self.run_check_cycle()) # Convenience function for external use def create_youtube_monitor(db_path: str, activity_manager=None) -> YouTubeChannelMonitor: """Create a YouTubeChannelMonitor instance.""" return YouTubeChannelMonitor(db_path, activity_manager)