Files
media-downloader/modules/youtube_channel_monitor.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

2180 lines
88 KiB
Python

#!/usr/bin/env python3
"""
YouTube Channel Monitor Module
Monitors specified YouTube channels for new videos matching global phrases,
then automatically adds matching videos to the download queue.
Design:
- Global settings (phrases, interval, quality) apply to ALL channels
- Channels are just URLs to monitor - no per-channel configuration
- All channels are checked together when the interval triggers
"""
import asyncio
import json
import random
import re
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from modules.universal_logger import get_logger
logger = get_logger('YouTubeMonitor')
class YouTubeChannelMonitor:
"""
Background monitor for YouTube channels.
Uses global phrases and interval settings for all channels.
"""
def __init__(self, db_path: str, activity_manager=None):
"""
Initialize the YouTube Channel Monitor.
Args:
db_path: Path to the SQLite database
activity_manager: Optional activity manager for status updates
"""
self.db_path = db_path
self.activity_manager = activity_manager
self.yt_dlp_path = '/opt/media-downloader/venv/bin/yt-dlp'
self.default_output_path = '/opt/immich/md/youtube/'
def _get_connection(self) -> sqlite3.Connection:
"""Get a database connection with row factory."""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
return conn
# =========================================================================
# GLOBAL SETTINGS METHODS
# =========================================================================
def get_global_settings(self) -> Dict:
"""Get the global monitor settings."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT phrases, check_interval_hours, quality, enabled, last_checked, updated_at,
auto_start_queue, notifications_enabled, auto_pause_threshold_months,
paused_check_interval_days, max_results_per_phrase
FROM youtube_monitor_settings
WHERE id = 1
''')
row = cursor.fetchone()
if row:
settings = dict(row)
try:
settings['phrases'] = json.loads(settings['phrases'])
except (json.JSONDecodeError, TypeError, ValueError):
settings['phrases'] = []
# Ensure all fields are present with defaults
if 'auto_start_queue' not in settings:
settings['auto_start_queue'] = 0
if 'notifications_enabled' not in settings:
settings['notifications_enabled'] = 1
if 'auto_pause_threshold_months' not in settings:
settings['auto_pause_threshold_months'] = 24
if 'paused_check_interval_days' not in settings:
settings['paused_check_interval_days'] = 14
if 'max_results_per_phrase' not in settings:
settings['max_results_per_phrase'] = 100
return settings
# Return defaults if no row exists
return {
'phrases': [],
'check_interval_hours': 6,
'quality': 'best',
'enabled': 1,
'last_checked': None,
'updated_at': None,
'auto_start_queue': 0,
'notifications_enabled': 1,
'auto_pause_threshold_months': 24,
'paused_check_interval_days': 14,
'max_results_per_phrase': 100
}
finally:
conn.close()
def update_global_settings(self, phrases: List[str] = None,
check_interval_hours: int = None,
quality: str = None,
enabled: bool = None,
auto_start_queue: bool = None,
notifications_enabled: bool = None,
auto_pause_threshold_months: int = None,
paused_check_interval_days: int = None,
max_results_per_phrase: int = None) -> bool:
"""
Update global monitor settings.
Args:
phrases: List of phrases to match in video titles/descriptions
check_interval_hours: How often to check all channels
quality: Video quality preference
enabled: Whether monitoring is enabled globally
auto_start_queue: Whether to auto-start the download queue after adding videos
notifications_enabled: Whether to send notifications when videos are added
auto_pause_threshold_months: Months of inactivity before auto-pausing channels
paused_check_interval_days: Days between re-checking paused channels
max_results_per_phrase: Maximum number of videos to process per search phrase
Returns:
True if update was successful
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Build update parts
updates = []
values = []
if phrases is not None:
updates.append('phrases = ?')
values.append(json.dumps(phrases))
if check_interval_hours is not None:
updates.append('check_interval_hours = ?')
values.append(check_interval_hours)
if quality is not None:
updates.append('quality = ?')
values.append(quality)
if enabled is not None:
updates.append('enabled = ?')
values.append(1 if enabled else 0)
if auto_start_queue is not None:
updates.append('auto_start_queue = ?')
values.append(1 if auto_start_queue else 0)
if notifications_enabled is not None:
updates.append('notifications_enabled = ?')
values.append(1 if notifications_enabled else 0)
if auto_pause_threshold_months is not None:
updates.append('auto_pause_threshold_months = ?')
values.append(auto_pause_threshold_months)
if paused_check_interval_days is not None:
updates.append('paused_check_interval_days = ?')
values.append(paused_check_interval_days)
if max_results_per_phrase is not None:
updates.append('max_results_per_phrase = ?')
values.append(max_results_per_phrase)
if not updates:
return False
updates.append('updated_at = ?')
values.append(datetime.now().isoformat())
cursor.execute(f'''
UPDATE youtube_monitor_settings
SET {', '.join(updates)}
WHERE id = 1
''', values)
conn.commit()
logger.info(f"Updated global YouTube monitor settings")
return cursor.rowcount > 0
finally:
conn.close()
def _update_last_checked(self):
"""Update the last_checked timestamp in global settings."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
UPDATE youtube_monitor_settings
SET last_checked = ?
WHERE id = 1
''', (datetime.now().isoformat(),))
conn.commit()
finally:
conn.close()
# =========================================================================
# CHANNEL MANAGEMENT METHODS
# =========================================================================
def get_all_channels(self) -> List[Dict]:
"""Get all YouTube channel monitors."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
status, always_active, last_video_date, last_check_date,
paused_date, paused_reason, total_videos_found
FROM youtube_channel_monitors
ORDER BY created_at DESC
''')
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def get_enabled_channels(self) -> List[Dict]:
"""Get all enabled YouTube channels."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
status, always_active, last_video_date, last_check_date,
paused_date, paused_reason, total_videos_found
FROM youtube_channel_monitors
WHERE status = 'active'
ORDER BY channel_name, channel_url
''')
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def get_channel(self, channel_id: int) -> Optional[Dict]:
"""Get a specific channel by ID."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at
FROM youtube_channel_monitors
WHERE id = ?
''', (channel_id,))
row = cursor.fetchone()
return dict(row) if row else None
finally:
conn.close()
def add_channel(self, channel_url: str, channel_name: str = None, enabled: bool = True) -> int:
"""
Add a new YouTube channel to monitor.
Args:
channel_url: YouTube channel URL
channel_name: Optional display name for the channel
enabled: Whether the channel is enabled
Returns:
The ID of the created channel
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
INSERT INTO youtube_channel_monitors (channel_url, channel_name, enabled)
VALUES (?, ?, ?)
''', (channel_url, channel_name, 1 if enabled else 0))
conn.commit()
channel_id = cursor.lastrowid
logger.info(f"Added YouTube channel {channel_id}: {channel_name or channel_url}")
return channel_id
finally:
conn.close()
def update_channel(self, channel_id: int, **kwargs) -> bool:
"""
Update a YouTube channel.
Args:
channel_id: ID of the channel to update
**kwargs: Fields to update (channel_url, channel_name, enabled)
Returns:
True if update was successful
"""
allowed_fields = {'channel_url', 'channel_name', 'enabled'}
updates = {}
for key, value in kwargs.items():
if key in allowed_fields:
if key == 'enabled':
updates[key] = 1 if value else 0
else:
updates[key] = value
if not updates:
return False
conn = self._get_connection()
try:
set_clause = ', '.join(f'{k} = ?' for k in updates.keys())
values = list(updates.values()) + [channel_id]
cursor = conn.cursor()
cursor.execute(f'''
UPDATE youtube_channel_monitors
SET {set_clause}
WHERE id = ?
''', values)
conn.commit()
logger.info(f"Updated YouTube channel {channel_id}")
return cursor.rowcount > 0
finally:
conn.close()
def delete_channel(self, channel_id: int) -> bool:
"""
Delete a YouTube channel and its history.
Args:
channel_id: ID of the channel to delete
Returns:
True if deletion was successful
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Delete history first
cursor.execute('DELETE FROM youtube_monitor_history WHERE monitor_id = ?', (channel_id,))
# Delete channel
cursor.execute('DELETE FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
conn.commit()
logger.info(f"Deleted YouTube channel {channel_id}")
return cursor.rowcount > 0
finally:
conn.close()
async def fetch_channel_id(self, channel_url: str) -> Optional[str]:
"""
Fetch YouTube channel ID from URL using yt-dlp, with curl/grep fallback.
Args:
channel_url: YouTube channel URL
Returns:
Channel ID (UC...) or None if not found
"""
# Method 1: Try yt-dlp first
try:
cmd = [
self.yt_dlp_path,
'--dump-json',
'--playlist-end', '1',
f'{channel_url}/videos'
]
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)
if stdout:
data = json.loads(stdout.decode('utf-8'))
channel_id = data.get('channel_id')
if channel_id and channel_id.startswith('UC'):
logger.debug(f"Fetched channel ID via yt-dlp: {channel_id}")
return channel_id
except (asyncio.TimeoutError, json.JSONDecodeError, Exception) as e:
logger.debug(f"yt-dlp method failed for {channel_url}: {e}")
# Method 2: Fallback to curl/grep method
try:
cmd = [
'curl', '-Ls', channel_url
]
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)
if stdout:
html = stdout.decode('utf-8')
# Look for channel ID patterns in the HTML
pattern = r'"(?:browseId|externalId|channelId)":"(UC[^"]+)"'
match = re.search(pattern, html)
if match:
channel_id = match.group(1)
logger.debug(f"Fetched channel ID via curl/grep: {channel_id}")
return channel_id
except (asyncio.TimeoutError, Exception) as e:
logger.debug(f"curl/grep method failed for {channel_url}: {e}")
logger.warning(f"Could not fetch channel ID for {channel_url}")
return None
# =========================================================================
# STATUS MANAGEMENT METHODS (v11.20.0)
# =========================================================================
def get_active_channels(self) -> List[Dict]:
"""Get channels with status='active'."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
status, always_active, last_video_date, last_check_date,
paused_date, paused_reason, total_videos_found
FROM youtube_channel_monitors
WHERE status = 'active'
ORDER BY channel_name, channel_url
''')
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def get_paused_channels(self) -> List[Dict]:
"""Get channels with status like 'paused_%'."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
status, always_active, last_video_date, last_check_date,
paused_date, paused_reason, total_videos_found
FROM youtube_channel_monitors
WHERE status LIKE 'paused_%'
ORDER BY paused_date DESC
''')
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def get_channels_filtered(self,
status_filter: str = None,
always_active_filter: str = None,
search: str = None,
sort_field: str = 'name',
sort_ascending: bool = True,
limit: int = None,
offset: int = 0) -> Dict:
"""
Get channels with server-side filtering, searching, sorting, and pagination.
Args:
status_filter: 'all', 'active', 'paused_manual', 'paused_auto', 'paused_all'
always_active_filter: 'all', 'always_active', 'regular'
search: Search term for channel name or URL
sort_field: 'name', 'last_checked', 'last_video_date', 'videos_found', 'created_at'
sort_ascending: Sort direction
limit: Maximum number of results
offset: Offset for pagination
Returns:
Dict with 'channels' list and 'total' count
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Build WHERE clause
where_clauses = []
params = []
# Status filter
if status_filter and status_filter != 'all':
if status_filter == 'active':
where_clauses.append("status = 'active'")
elif status_filter == 'paused_manual':
where_clauses.append("status = 'paused_manual'")
elif status_filter == 'paused_auto':
where_clauses.append("status = 'paused_auto'")
elif status_filter == 'paused_all':
where_clauses.append("status LIKE 'paused_%'")
# Always active filter
if always_active_filter and always_active_filter != 'all':
if always_active_filter == 'always_active':
where_clauses.append("always_active = 1")
elif always_active_filter == 'regular':
where_clauses.append("(always_active = 0 OR always_active IS NULL)")
# Search filter
if search:
where_clauses.append("(channel_name LIKE ? OR channel_url LIKE ?)")
search_param = f"%{search}%"
params.extend([search_param, search_param])
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
# Get total count
count_query = f"SELECT COUNT(*) FROM youtube_channel_monitors {where_sql}"
cursor.execute(count_query, params)
total = cursor.fetchone()[0]
# Build ORDER BY clause
sort_columns = {
'name': 'LOWER(COALESCE(channel_name, channel_url))',
'last_checked': 'last_check_date',
'last_video_date': 'last_video_date',
'videos_found': 'total_videos_found',
'created_at': 'created_at'
}
sort_column = sort_columns.get(sort_field, 'LOWER(COALESCE(channel_name, channel_url))')
sort_direction = 'ASC' if sort_ascending else 'DESC'
order_by = f"ORDER BY {sort_column} {sort_direction}"
# Build main query with pagination (using parameterized queries for security)
limit_sql = "LIMIT ? OFFSET ?" if limit else ""
query = f'''
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
status, always_active, last_video_date, last_check_date,
paused_date, paused_reason, total_videos_found, channel_id
FROM youtube_channel_monitors
{where_sql}
{order_by}
{limit_sql}
'''
# Add limit/offset to params if pagination is used
query_params = list(params)
if limit:
query_params.extend([limit, offset])
cursor.execute(query, query_params)
channels = [dict(row) for row in cursor.fetchall()]
return {
'channels': channels,
'total': total
}
finally:
conn.close()
def pause_channel(self, channel_id: int, reason: str = None, auto: bool = False) -> bool:
"""
Pause a channel manually or automatically.
Args:
channel_id: ID of the channel to pause
reason: Optional reason for pausing
auto: If True, set status to 'paused_auto', otherwise 'paused_manual'
Returns:
True if pause was successful
"""
status = 'paused_auto' if auto else 'paused_manual'
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
UPDATE youtube_channel_monitors
SET status = ?, paused_date = ?, paused_reason = ?
WHERE id = ?
''', (status, datetime.now().isoformat(), reason, channel_id))
conn.commit()
logger.info(f"{'Auto-' if auto else ''}Paused channel {channel_id}: {reason}")
return cursor.rowcount > 0
finally:
conn.close()
def resume_channel(self, channel_id: int) -> bool:
"""
Resume a paused channel.
Args:
channel_id: ID of the channel to resume
Returns:
True if resume was successful
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
UPDATE youtube_channel_monitors
SET status = 'active', paused_date = NULL, paused_reason = NULL
WHERE id = ?
''', (channel_id,))
conn.commit()
logger.info(f"Resumed channel {channel_id}")
return cursor.rowcount > 0
finally:
conn.close()
def toggle_always_active(self, channel_id: int, value: bool) -> bool:
"""
Toggle always_active flag for a channel.
Args:
channel_id: ID of the channel
value: True to enable always_active, False to disable
Returns:
True if toggle was successful
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
UPDATE youtube_channel_monitors
SET always_active = ?
WHERE id = ?
''', (1 if value else 0, channel_id))
conn.commit()
logger.info(f"Set always_active={value} for channel {channel_id}")
return cursor.rowcount > 0
finally:
conn.close()
def get_statistics(self) -> Dict:
"""Get monitor statistics."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT
COUNT(*) as total,
SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active,
SUM(CASE WHEN status = 'paused_manual' THEN 1 ELSE 0 END) as paused_manual,
SUM(CASE WHEN status = 'paused_auto' THEN 1 ELSE 0 END) as paused_auto,
SUM(CASE WHEN always_active = 1 THEN 1 ELSE 0 END) as always_active_count,
SUM(COALESCE(total_videos_found, 0)) as total_videos
FROM youtube_channel_monitors
''')
row = cursor.fetchone()
return dict(row) if row else {
'total': 0,
'active': 0,
'paused_manual': 0,
'paused_auto': 0,
'always_active_count': 0,
'total_videos': 0
}
finally:
conn.close()
# =========================================================================
# AUTO-PAUSE AND PAUSED-CHECK LOGIC (v11.20.0)
# =========================================================================
async def _check_channel_for_auto_pause(self, channel_id: int) -> bool:
"""
Check if a single channel should be auto-paused based on inactivity or no matched videos.
Called immediately after checking each channel.
Args:
channel_id: ID of the channel to check
Returns:
True if channel was auto-paused, False otherwise
"""
from datetime import timedelta
settings = self.get_global_settings()
threshold_months = settings.get('auto_pause_threshold_months', 24)
# Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
cutoff_str = cutoff_date.strftime('%Y%m%d')
conn = self._get_connection()
try:
cursor = conn.cursor()
# Check if this specific channel should be auto-paused
cursor.execute('''
SELECT id, channel_name, last_video_date, always_active, status,
videos_found, last_check_date
FROM youtube_channel_monitors
WHERE id = ?
''', (channel_id,))
row = cursor.fetchone()
if not row:
return False
channel_name = row['channel_name']
last_video_date = row['last_video_date']
always_active = row['always_active']
status = row['status']
videos_found = row['videos_found']
last_check_date = row['last_check_date']
# Don't auto-pause if already paused or always_active
if status != 'active' or always_active == 1:
return False
# Auto-pause if channel has been checked but has 0 matched videos
if videos_found == 0 and last_check_date:
reason = "No matching videos found"
cursor.execute('''
UPDATE youtube_channel_monitors
SET status = 'paused_auto',
paused_date = ?,
paused_reason = ?
WHERE id = ?
''', (datetime.now().isoformat(), reason, channel_id))
conn.commit()
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
return True
# Auto-pause if channel is inactive (no uploads in threshold period)
if last_video_date and last_video_date < cutoff_str:
# Calculate days since last upload for the pause reason
try:
if len(last_video_date) == 8 and last_video_date.isdigit():
last_upload = datetime.strptime(last_video_date, '%Y%m%d')
else:
last_upload = datetime.fromisoformat(last_video_date)
days_inactive = (datetime.now() - last_upload).days
reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"
cursor.execute('''
UPDATE youtube_channel_monitors
SET status = 'paused_auto',
paused_date = ?,
paused_reason = ?
WHERE id = ?
''', (datetime.now().isoformat(), reason, channel_id))
conn.commit()
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
return True
except (ValueError, TypeError) as e:
logger.error(f"Error parsing date for channel {channel_id}: {e}")
return False
return False
finally:
conn.close()
async def check_for_inactive_channels(self) -> int:
"""
Check for channels that should be auto-paused based on inactivity.
Returns:
Number of channels auto-paused
"""
from datetime import timedelta
settings = self.get_global_settings()
threshold_months = settings.get('auto_pause_threshold_months', 24)
# Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
cutoff_str = cutoff_date.strftime('%Y%m%d')
conn = self._get_connection()
try:
cursor = conn.cursor()
# Find active channels that haven't posted in threshold period
# Note: Comparing YYYYMMDD strings works correctly (20231225 < 20241227)
cursor.execute('''
SELECT id, channel_name, last_video_date
FROM youtube_channel_monitors
WHERE status = 'active'
AND always_active = 0
AND last_video_date IS NOT NULL
AND last_video_date < ?
''', (cutoff_str,))
inactive_channels = cursor.fetchall()
paused_count = 0
for row in inactive_channels:
channel_id = row['id']
channel_name = row['channel_name']
last_video_date = row['last_video_date']
# Calculate days since last upload
try:
# Handle both YYYYMMDD format (from yt-dlp) and ISO format
if len(last_video_date) == 8 and last_video_date.isdigit():
# YYYYMMDD format from yt-dlp
last_upload = datetime.strptime(last_video_date, '%Y%m%d')
else:
# ISO format
last_upload = datetime.fromisoformat(last_video_date)
days_inactive = (datetime.now() - last_upload).days
reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"
cursor.execute('''
UPDATE youtube_channel_monitors
SET status = 'paused_auto',
paused_date = ?,
paused_reason = ?
WHERE id = ?
''', (datetime.now().isoformat(), reason, channel_id))
paused_count += 1
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
except (ValueError, TypeError) as e:
logger.error(f"Error parsing date for channel {channel_id}: {e}")
continue
conn.commit()
return paused_count
finally:
conn.close()
async def check_paused_channels(self) -> int:
"""
Periodically check paused channels to see if they've resumed posting.
Returns:
Number of channels auto-resumed
"""
from datetime import timedelta
settings = self.get_global_settings()
check_interval_days = settings.get('paused_check_interval_days', 14)
threshold_months = settings.get('auto_pause_threshold_months', 24)
# Find paused channels that need checking
cutoff_date = datetime.now() - timedelta(days=check_interval_days)
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, channel_url, channel_name, status
FROM youtube_channel_monitors
WHERE status LIKE 'paused_%'
AND (last_check_date IS NULL OR last_check_date < ?)
''', (cutoff_date.isoformat(),))
paused_channels = [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
resumed_count = 0
for channel in paused_channels:
try:
# Check if channel has new videos (light check - just get latest 5)
videos = await self.get_channel_videos(channel['channel_url'], max_results=5)
most_recent_upload = None
if videos:
# Find most recent video
for video in videos:
upload_date_str = video.get('upload_date')
if upload_date_str:
if not most_recent_upload or upload_date_str > most_recent_upload:
most_recent_upload = upload_date_str
# Update last_check_date and last_video_date
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
UPDATE youtube_channel_monitors
SET last_check_date = ?,
last_video_date = ?
WHERE id = ?
''', (datetime.now().isoformat(), most_recent_upload, channel['id']))
conn.commit()
finally:
conn.close()
# Check if auto-paused channel should be resumed
if channel['status'] == 'paused_auto' and most_recent_upload:
try:
# Parse date (yt-dlp format: YYYYMMDD)
if len(most_recent_upload) == 8:
upload_datetime = datetime.strptime(most_recent_upload, '%Y%m%d')
else:
upload_datetime = datetime.fromisoformat(most_recent_upload)
days_since_upload = (datetime.now() - upload_datetime).days
# If upload is recent (within threshold), auto-resume
if days_since_upload < (threshold_months * 30):
self.resume_channel(channel['id'])
resumed_count += 1
logger.info(f"Auto-resumed channel '{channel['channel_name']}' - new upload detected ({days_since_upload} days old)")
except (ValueError, TypeError) as e:
logger.error(f"Error parsing upload date for channel {channel['id']}: {e}")
except Exception as e:
logger.error(f"Error checking paused channel {channel['id']} ({channel['channel_name']}): {e}")
continue
return resumed_count
def check_paused_channels_sync(self) -> int:
"""Synchronous wrapper for check_paused_channels."""
import asyncio
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(self.check_paused_channels())
finally:
loop.close()
# =========================================================================
# HISTORY METHODS
# =========================================================================
def get_channel_history(self, channel_id: int, limit: int = 50) -> List[Dict]:
"""Get history for a specific channel."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT id, monitor_id, video_id, video_title, matched_phrase, action, created_at
FROM youtube_monitor_history
WHERE monitor_id = ?
ORDER BY created_at DESC
LIMIT ?
''', (channel_id, limit))
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def get_all_history(self, limit: int = 100) -> List[Dict]:
"""Get combined history for all channels."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT h.id, h.monitor_id, h.video_id, h.video_title, h.matched_phrase,
h.action, h.created_at, c.channel_name, c.channel_url
FROM youtube_monitor_history h
LEFT JOIN youtube_channel_monitors c ON h.monitor_id = c.id
ORDER BY h.created_at DESC
LIMIT ?
''', (limit,))
return [dict(row) for row in cursor.fetchall()]
finally:
conn.close()
def _is_video_processed(self, channel_id: int, video_id: str) -> bool:
"""Check if a video has already been processed for a channel."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
SELECT 1 FROM youtube_monitor_history
WHERE monitor_id = ? AND video_id = ?
''', (channel_id, video_id))
return cursor.fetchone() is not None
finally:
conn.close()
def _record_video_processed(self, channel_id: int, video_id: str,
video_title: str, matched_phrase: str, action: str):
"""Record that a video has been processed."""
conn = self._get_connection()
try:
cursor = conn.cursor()
cursor.execute('''
INSERT OR IGNORE INTO youtube_monitor_history
(monitor_id, video_id, video_title, matched_phrase, action)
VALUES (?, ?, ?, ?, ?)
''', (channel_id, video_id, video_title, matched_phrase, action))
conn.commit()
except Exception as e:
logger.error(f"Failed to record video processed: {e}")
finally:
conn.close()
def _update_channel_stats(self, channel_id: int, videos_added: int, most_recent_upload: str = None):
"""Update channel statistics after a check."""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Get channel info for accurate matching
cursor.execute('SELECT channel_id, channel_name FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
row = cursor.fetchone()
if not row:
return
yt_channel_id, channel_name = row[0], row[1]
# Count videos from celebrity_discovered_videos (Internet Discovery database)
# This is the shared database that shows on the Internet Discovery page
if yt_channel_id:
cursor.execute('''
SELECT COUNT(*)
FROM celebrity_discovered_videos
WHERE (
-- Match by channel_id
channel_id = ?
OR
-- Fallback: match by name if video has no channel_id
(channel_id IS NULL OR channel_id = '')
AND REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
)
AND platform = 'youtube'
''', (yt_channel_id, channel_name))
total_count = cursor.fetchone()[0]
else:
# Fallback to name-only matching if monitor has no channel_id
cursor.execute('''
SELECT COUNT(*)
FROM celebrity_discovered_videos
WHERE REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
AND platform = 'youtube'
''', (channel_name,))
total_count = cursor.fetchone()[0]
# Update last_video_date if we have a new value, or keep existing if we don't
cursor.execute('''
UPDATE youtube_channel_monitors
SET last_checked = ?,
last_check_date = ?,
videos_found = ?,
total_videos_found = ?,
last_video_date = CASE
WHEN ? IS NOT NULL THEN ?
ELSE last_video_date
END
WHERE id = ?
''', (
datetime.now().isoformat(),
datetime.now().isoformat(),
total_count,
total_count,
most_recent_upload,
most_recent_upload,
channel_id
))
conn.commit()
finally:
conn.close()
# =========================================================================
# VIDEO FETCHING AND MATCHING
# =========================================================================
async def _get_channel_latest_upload_date(self, channel_url: str) -> str:
"""
Get the upload date of the most recent video on a channel.
Uses full metadata fetch (not flat-playlist) to get accurate upload_date.
Tries multiple URL formats if the first attempt fails.
Args:
channel_url: URL of the YouTube channel
Returns:
Upload date string in YYYYMMDD format, or None if not found
"""
# Try multiple URL formats
urls_to_try = []
# First try: /videos suffix
base_url = channel_url.rstrip('/')
for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
if base_url.endswith(suffix):
base_url = base_url[:-len(suffix)]
break
urls_to_try.append(f"{base_url}/videos")
# Second try: base URL without suffix
urls_to_try.append(base_url)
# Third try: /streams suffix (for channels that primarily stream)
urls_to_try.append(f"{base_url}/streams")
for url_attempt in urls_to_try:
cmd = [
self.yt_dlp_path,
'--playlist-end', '1', # Only get the most recent video
'--dump-json',
'--no-warnings',
'--ignore-errors',
'--skip-download',
url_attempt
]
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)
# Check stderr for specific errors
stderr_text = stderr.decode().strip()
if 'does not have a' in stderr_text.lower() or '404' in stderr_text:
# Try next URL format
continue
for line in stdout.decode().strip().split('\n'):
if line:
try:
data = json.loads(line)
upload_date = data.get('upload_date')
if upload_date:
logger.debug(f"Successfully fetched upload date {upload_date} from {url_attempt}")
return upload_date
except json.JSONDecodeError:
pass
except asyncio.TimeoutError:
logger.debug(f"Timeout fetching latest upload date from {url_attempt}")
continue
except Exception as e:
logger.debug(f"Error fetching latest upload date from {url_attempt}: {e}")
continue
logger.warning(f"Could not fetch latest upload date from {base_url} after trying all URL formats")
return None
async def get_channel_videos(self, channel_url: str, max_results: int = 20, search_phrase: str = None) -> List[Dict]:
"""
Fetch videos from a YouTube channel using yt-dlp.
Args:
channel_url: URL of the YouTube channel
max_results: Maximum number of videos to fetch
search_phrase: Optional phrase to search within the channel
Returns:
List of video metadata dictionaries (basic info from flat-playlist)
"""
# Build the URL based on whether we're searching or fetching recent
if search_phrase:
# Use channel search URL to find videos matching the phrase
# Remove any trailing path from channel URL
base_url = channel_url.rstrip('/')
for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
if base_url.endswith(suffix):
base_url = base_url[:-len(suffix)]
# URL encode the search phrase
import urllib.parse
encoded_phrase = urllib.parse.quote(search_phrase)
channel_url = f"{base_url}/search?query={encoded_phrase}"
else:
# Ensure URL ends with /videos for recent uploads
if not channel_url.endswith('/videos'):
if channel_url.endswith('/'):
channel_url = channel_url + 'videos'
else:
channel_url = channel_url + '/videos'
cmd = [
self.yt_dlp_path,
'--flat-playlist',
'--dump-json',
'--playlist-end', str(max_results),
'--no-warnings',
'--ignore-errors',
channel_url
]
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=90)
videos = []
for line in stdout.decode().strip().split('\n'):
if line:
try:
data = json.loads(line)
videos.append({
'video_id': data.get('id'),
'title': data.get('title', ''),
'channel_name': data.get('uploader', data.get('channel', '')),
'channel_id': data.get('channel_id', ''),
'upload_date': data.get('upload_date'),
'duration': data.get('duration', 0),
'view_count': data.get('view_count', 0),
'thumbnail': data.get('thumbnail', ''),
'description': data.get('description', ''),
'url': f"https://www.youtube.com/watch?v={data.get('id')}"
})
except json.JSONDecodeError:
pass
logger.debug(f"Fetched {len(videos)} videos from {channel_url}")
return videos
except asyncio.TimeoutError:
logger.error(f"Timeout fetching videos from {channel_url}")
return []
except Exception as e:
logger.error(f"Error fetching videos from {channel_url}: {e}")
return []
async def fetch_video_metadata(self, video_id: str) -> Dict:
"""
Fetch full metadata for a single video including upload date, resolution, and thumbnail.
Args:
video_id: YouTube video ID
Returns:
Dictionary with full video metadata
"""
try:
cmd = [
self.yt_dlp_path,
f'https://www.youtube.com/watch?v={video_id}',
'--dump-json',
'--no-download',
'--no-warnings',
'--ignore-errors'
]
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)
if stdout:
data = json.loads(stdout.decode().strip())
# Extract max resolution (height) and corresponding width from formats
max_resolution = 0
max_width = 0
formats = data.get('formats', [])
for fmt in formats:
height = fmt.get('height')
if height and isinstance(height, int) and height > max_resolution:
# Only count video formats (not audio-only)
if fmt.get('vcodec', 'none') != 'none':
max_resolution = height
# Get the width for this format
width = fmt.get('width')
if width and isinstance(width, int):
max_width = width
# Get best thumbnail - prefer jpg over webp for better compatibility
thumbnail = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
thumbnails = data.get('thumbnails', [])
if thumbnails:
# Filter for jpg thumbnails only (webp may not exist for all videos)
jpg_thumbs = [t for t in thumbnails if t.get('url', '').endswith('.jpg')]
if jpg_thumbs:
# Get highest quality jpg thumbnail
best_thumb = max(jpg_thumbs, key=lambda t: t.get('height', 0) or 0)
thumbnail = best_thumb.get('url', thumbnail)
return {
'video_id': video_id,
'title': data.get('title', ''),
'channel_name': data.get('uploader', data.get('channel', '')),
'channel_id': data.get('channel_id', ''),
'upload_date': data.get('upload_date', ''),
'duration': data.get('duration', 0),
'view_count': data.get('view_count', 0),
'thumbnail': thumbnail,
'description': data.get('description', '')[:500] if data.get('description') else '',
'max_resolution': max_resolution if max_resolution > 0 else None,
'max_width': max_width if max_width > 0 else None,
'url': f"https://www.youtube.com/watch?v={video_id}"
}
except asyncio.TimeoutError:
logger.warning(f"Timeout fetching metadata for {video_id}")
except Exception as e:
logger.warning(f"Failed to fetch metadata for {video_id}: {e}")
return {}
def _matches_phrase(self, title: str, description: str, phrases: List[str]) -> Optional[str]:
"""
Check if video matches any phrase.
Also checks hashtag variations (e.g., "Eva Longoria" matches "#EvaLongoria").
Args:
title: Video title
description: Video description
phrases: List of phrases to match
Returns:
The matched phrase, or None if no match
"""
text = f"{title} {description}".lower()
for phrase in phrases:
phrase_lower = phrase.lower()
# Check direct match
if phrase_lower in text:
return phrase
# Check hashtag variation (e.g., "Eva Longoria" -> "#evalongoria")
# Remove spaces, hyphens, underscores from phrase for hashtag matching
hashtag_phrase = '#' + phrase_lower.replace(' ', '').replace('-', '').replace('_', '')
if hashtag_phrase in text:
return phrase
return None
def _add_to_download_queue(self, video: Dict, channel: Dict, quality: str) -> bool:
"""
Add a matching video directly to the video_download_queue.
Args:
video: Video metadata dictionary (should be full metadata from fetch_video_metadata)
channel: Channel dictionary
quality: Video quality from global settings
Returns:
True if successfully added to queue
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Check if video already exists in queue
cursor.execute('''
SELECT 1 FROM video_download_queue
WHERE platform = 'youtube' AND video_id = ?
''', (video['video_id'],))
if cursor.fetchone():
logger.debug(f"Video {video['video_id']} already in queue")
return False
# Parse upload date if available (format: YYYYMMDD)
upload_date = None
if video.get('upload_date'):
try:
upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
except (ValueError, TypeError):
pass # Skip invalid date formats
# Insert into queue with all metadata fields matching celebrity discovery
cursor.execute('''
INSERT INTO video_download_queue
(platform, video_id, url, title, channel_name, thumbnail, duration,
upload_date, view_count, max_resolution, max_width, description, source_type,
source_name, priority, status, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
'youtube',
video['video_id'],
video['url'],
video['title'],
video.get('channel_name', channel.get('channel_name', '')),
video.get('thumbnail', ''),
video.get('duration', 0),
upload_date,
video.get('view_count', 0),
video.get('max_resolution'), # Now included from full metadata
video.get('max_width'), # Video width for aspect ratio
video.get('description', '')[:500] if video.get('description') else None,
'youtube_monitor',
f"Monitor: {channel.get('channel_name', channel['channel_url'])}",
5, # Default priority
'pending',
json.dumps({
'channel_id': channel['id'],
'quality': quality,
'output_path': self.default_output_path,
'matched_from': 'youtube_channel_monitor'
})
))
conn.commit()
logger.info(f"Added video '{video['title'][:50]}' to download queue (res: {video.get('max_resolution', 'N/A')}p)")
return True
except sqlite3.IntegrityError:
logger.debug(f"Video {video['video_id']} already exists in queue (integrity error)")
return False
except Exception as e:
logger.error(f"Failed to add video to queue: {e}")
return False
finally:
conn.close()
def _get_or_create_monitor_preset(self, celebrity_id: int, channel_name: str) -> int:
"""Get or create a preset for YouTube Monitor videos."""
conn = self._get_connection()
try:
cursor = conn.cursor()
preset_name = f"YT Monitor: {channel_name[:30]}"
# Check if preset exists
cursor.execute('''
SELECT id FROM celebrity_search_presets
WHERE celebrity_id = ? AND name = ?
''', (celebrity_id, preset_name))
row = cursor.fetchone()
if row:
return row[0]
# Create new preset
cursor.execute('''
INSERT INTO celebrity_search_presets
(name, celebrity_id, source_type, source_value, platform, enabled, category)
VALUES (?, ?, 'youtube_monitor', ?, 'youtube', 1, 'youtube_monitor')
''', (preset_name, celebrity_id, channel_name))
conn.commit()
return cursor.lastrowid
finally:
conn.close()
def _find_celebrity_by_phrase(self, phrase: str) -> Optional[int]:
"""Find a celebrity ID that matches the phrase (by name)."""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Try exact match first
cursor.execute('''
SELECT id FROM celebrity_profiles
WHERE LOWER(name) = LOWER(?)
''', (phrase,))
row = cursor.fetchone()
if row:
return row[0]
# Try partial match
cursor.execute('''
SELECT id FROM celebrity_profiles
WHERE LOWER(name) LIKE LOWER(?)
''', (f'%{phrase}%',))
row = cursor.fetchone()
return row[0] if row else None
finally:
conn.close()
def _add_to_discovery(self, video: Dict, channel: Dict, matched_phrase: str) -> bool:
"""
Add a matching video to the celebrity discovery page.
Args:
video: Video metadata dictionary
channel: Channel dictionary
matched_phrase: The phrase that matched (used to find celebrity)
Returns:
True if successfully added
"""
conn = self._get_connection()
try:
cursor = conn.cursor()
# Find celebrity by phrase
celebrity_id = self._find_celebrity_by_phrase(matched_phrase)
if not celebrity_id:
logger.warning(f"No celebrity found for phrase '{matched_phrase}' - skipping")
return False
# Get or create preset for this channel
channel_name = channel.get('channel_name', channel['channel_url'].split('@')[-1])
preset_id = self._get_or_create_monitor_preset(celebrity_id, channel_name)
# Check if video already exists in discovery
cursor.execute('''
SELECT 1 FROM celebrity_discovered_videos
WHERE video_id = ? AND platform = 'youtube'
''', (video['video_id'],))
if cursor.fetchone():
logger.debug(f"Video {video['video_id']} already in discovery")
return False
# Parse upload date if available (format: YYYYMMDD)
upload_date = None
if video.get('upload_date'):
try:
upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
except (ValueError, TypeError):
pass # Skip invalid date formats
# Insert into celebrity_discovered_videos
cursor.execute('''
INSERT INTO celebrity_discovered_videos
(preset_id, celebrity_id, video_id, platform, url, title, channel_name,
channel_id, thumbnail, duration, upload_date, view_count, description,
content_type, status, max_resolution, max_width, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
preset_id,
celebrity_id,
video['video_id'],
'youtube',
video['url'],
video['title'],
video.get('channel_name', channel.get('channel_name', '')),
video.get('channel_id', ''),
video.get('thumbnail', ''),
video.get('duration', 0),
upload_date,
video.get('view_count', 0),
video.get('description', '')[:500] if video.get('description') else None,
'youtube_monitor',
'new',
video.get('max_resolution'),
video.get('max_width'),
json.dumps({
'monitor_channel_id': channel['id'],
'monitor_channel_name': channel_name,
'matched_phrase': matched_phrase
})
))
conn.commit()
# Pre-cache thumbnail for faster page loading
thumbnail_url = video.get('thumbnail', '')
if thumbnail_url:
self._cache_thumbnail(video['video_id'], thumbnail_url, cursor, conn)
# Update monitor's channel_id if not set (for accurate future matching)
if video.get('channel_id'):
cursor.execute('''
UPDATE youtube_channel_monitors
SET channel_id = ?
WHERE id = ? AND (channel_id IS NULL OR channel_id = '')
''', (video['channel_id'], channel['id']))
conn.commit()
logger.info(f"Added video '{video['title'][:50]}' to discovery (res: {video.get('max_resolution', 'N/A')}p)")
return True
except sqlite3.IntegrityError:
logger.debug(f"Video {video['video_id']} already exists in discovery (integrity error)")
return False
except Exception as e:
logger.error(f"Failed to add video to discovery: {e}")
return False
finally:
conn.close()
def _cache_thumbnail(self, video_id: str, thumbnail_url: str, cursor, conn) -> None:
"""
Pre-cache thumbnail by fetching from URL and storing in database.
This speeds up Internet Discovery page loading.
"""
try:
import requests
response = requests.get(thumbnail_url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
})
if response.status_code == 200 and response.content:
cursor.execute('''
UPDATE celebrity_discovered_videos
SET thumbnail_data = ?
WHERE video_id = ?
''', (response.content, video_id))
conn.commit()
logger.debug(f"Cached thumbnail for {video_id}")
except Exception as e:
logger.debug(f"Failed to cache thumbnail for {video_id}: {e}")
# =========================================================================
# MAIN CHECK METHODS
# =========================================================================
async def check_channel(self, channel: Dict, phrases: List[str], quality: str) -> int:
"""
Check a single channel for matching videos by searching for each phrase.
Args:
channel: Channel dictionary
phrases: Global phrases to search for
quality: Global quality setting
Returns:
Number of new videos added to Internet Discovery
"""
channel_name = channel.get('channel_name') or channel['channel_url']
if not phrases:
logger.warning(f"No global phrases configured - skipping check")
return 0
logger.info(f"Checking channel: {channel_name}")
videos_added = 0
seen_video_ids = set()
most_recent_upload = None # Track most recent video upload date
# First, get the channel's latest videos (no search) to track last upload date
# This is needed for auto-pause logic even if no videos match the search phrases
try:
# Get just the most recent video with full metadata to get upload_date
most_recent_upload = await self._get_channel_latest_upload_date(channel['channel_url'])
if most_recent_upload:
logger.debug(f"Channel latest upload: {most_recent_upload}")
except Exception as e:
logger.debug(f"Could not fetch latest upload date for {channel_name}: {e}")
# Search for each phrase on the channel
settings = self.get_global_settings()
max_results = settings.get('max_results_per_phrase', 100)
for phrase in phrases:
# Search the channel for this phrase
videos = await self.get_channel_videos(channel['channel_url'], max_results=max_results, search_phrase=phrase)
logger.debug(f"Found {len(videos)} videos searching for '{phrase}' on {channel_name}")
for video in videos:
video_id = video.get('video_id')
if not video_id:
continue
# Track most recent upload date
upload_date = video.get('upload_date')
if upload_date:
if not most_recent_upload or upload_date > most_recent_upload:
most_recent_upload = upload_date
# Skip duplicates within this check (same video found by multiple phrases)
if video_id in seen_video_ids:
continue
seen_video_ids.add(video_id)
# Skip if already processed
if self._is_video_processed(channel['id'], video_id):
continue
# Verify the phrase actually appears in title (not description)
# YouTube search can return related/recommended content
matched_phrase = self._matches_phrase(
video.get('title', ''),
'', # Only match on title, not description
[phrase]
)
if matched_phrase:
# Fetch full metadata for matching video (includes resolution, thumbnail, etc.)
logger.info(f"Matched phrase '{matched_phrase}' - fetching full metadata for: {video['title'][:60]}")
full_video = await self.fetch_video_metadata(video_id)
if full_video:
# IMPORTANT: Verify the video is actually from the monitored channel
# YouTube's channel search can return videos from other channels
video_channel_name = (full_video.get('channel_name') or '').lower().strip()
monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
video_channel_id = (full_video.get('channel_id') or '').lower().strip()
# Check if channel matches (by name or by channel ID in URL)
channel_url_lower = (channel.get('channel_url') or '').lower()
channel_matches = (
video_channel_name == monitored_channel_name or
(video_channel_id and video_channel_id in channel_url_lower) or
(video_channel_name and video_channel_name in channel_url_lower)
)
if not channel_matches:
logger.debug(f"Skipping video from different channel: '{full_video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
self._record_video_processed(
channel['id'], video_id,
full_video.get('title', video.get('title', '')),
matched_phrase, 'wrong_channel'
)
continue
# Use full metadata - add to discovery page
if self._add_to_discovery(full_video, channel, matched_phrase):
self._record_video_processed(
channel['id'], video_id,
full_video.get('title', video.get('title', '')),
matched_phrase, 'discovered'
)
videos_added += 1
else:
# Already in discovery or failed
self._record_video_processed(
channel['id'], video_id,
full_video.get('title', video.get('title', '')),
matched_phrase, 'skipped'
)
else:
# Fallback to basic info if full metadata fetch fails
logger.warning(f"Could not fetch full metadata for {video_id}, using basic info")
# Still verify channel matches using basic info
video_channel_name = (video.get('channel_name') or '').lower().strip()
monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
channel_url_lower = (channel.get('channel_url') or '').lower()
channel_matches = (
video_channel_name == monitored_channel_name or
(video_channel_name and video_channel_name in channel_url_lower)
)
if not channel_matches:
logger.debug(f"Skipping video from different channel: '{video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
self._record_video_processed(
channel['id'], video_id,
video.get('title', ''), matched_phrase, 'wrong_channel'
)
continue
if self._add_to_discovery(video, channel, matched_phrase):
self._record_video_processed(
channel['id'], video_id,
video.get('title', ''), matched_phrase, 'discovered'
)
videos_added += 1
else:
self._record_video_processed(
channel['id'], video_id,
video.get('title', ''), matched_phrase, 'skipped'
)
# Small delay between metadata fetches to avoid rate limiting
await asyncio.sleep(1)
# Small delay between phrase searches
if len(phrases) > 1:
await asyncio.sleep(2)
# Update channel stats
self._update_channel_stats(channel['id'], videos_added, most_recent_upload)
# Check if this channel should be auto-paused due to inactivity
await self._check_channel_for_auto_pause(channel['id'])
return videos_added
async def check_single_channel(self, channel_id: int) -> int:
"""
Check a single channel by ID (for manual trigger).
Args:
channel_id: ID of the channel to check
Returns:
Number of new videos added to Internet Discovery
"""
channel = self.get_channel(channel_id)
if not channel:
logger.error(f"Channel {channel_id} not found")
return 0
settings = self.get_global_settings()
phrases = settings.get('phrases', [])
quality = settings.get('quality', 'best')
if not phrases:
logger.warning(f"No global phrases configured")
return 0
videos_added = await self.check_channel(channel, phrases, quality)
# Auto-start queue if enabled and videos were added
if videos_added > 0 and settings.get('auto_start_queue'):
await self._trigger_queue_start()
return videos_added
async def run_check_cycle(self) -> int:
"""
Main entry point - check all enabled channels using global settings.
Returns:
Total number of new videos added to Internet Discovery
"""
settings = self.get_global_settings()
if not settings.get('enabled'):
logger.debug("YouTube channel monitoring is disabled globally")
return 0
phrases = settings.get('phrases', [])
quality = settings.get('quality', 'best')
if not phrases:
logger.debug("No global phrases configured for YouTube monitoring")
return 0
channels = self.get_active_channels()
if not channels:
logger.debug("No active YouTube channels to monitor")
return 0
total_added = 0
total_channels = len(channels)
logger.info(f"Running YouTube channel monitor: {total_channels} channels, phrases: {phrases}")
# Start background task tracking (separate from main scheduler activity)
if self.activity_manager:
self.activity_manager.start_background_task(
'youtube_monitor',
'youtube_channel_monitor',
'YouTube Channel Monitor',
'Running',
{'total_channels': total_channels, 'videos_found': 0}
)
# Randomize order to avoid detection patterns
random.shuffle(channels)
for idx, channel in enumerate(channels, 1):
try:
channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')
# Update background task status
if self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Checking: {channel_name}',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
)
videos_added = await self.check_channel(channel, phrases, quality)
# Update status if we found new videos
if videos_added > 0 and self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Found {videos_added} new in {channel_name}',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
)
total_added += videos_added
# Delay between channel checks (with jitter to avoid detection)
base_delay = 4 + random.uniform(0, 2) # 4-6 seconds
await asyncio.sleep(base_delay)
# Batch pause every 50 channels to reduce rate limiting
if idx % 50 == 0 and idx < total_channels:
logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
if self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Rate limit pause ({idx}/{total_channels})',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
)
await asyncio.sleep(30)
except Exception as e:
logger.error(f"Error checking channel {channel['id']}: {e}")
# Update global last_checked
self._update_last_checked()
# Note: Auto-pause now happens per-channel in check_channel() for real-time feedback
# Stop background task tracking
if self.activity_manager:
self.activity_manager.stop_background_task('youtube_monitor')
if total_added > 0:
logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")
# Send notification if enabled
if settings.get('notifications_enabled'):
self._send_notification(total_added)
# Auto-start queue if enabled
if settings.get('auto_start_queue'):
await self._trigger_queue_start()
else:
logger.debug("YouTube channel monitor complete: no new matching videos")
return total_added
async def _trigger_queue_start(self):
"""Trigger the video download queue to start processing."""
try:
# Try direct access first (works when running within API process)
from web.backend.routers.video_queue import queue_processor, get_app_state
import asyncio
if queue_processor.is_running and not queue_processor.is_paused:
logger.info("Auto-start: Queue processor already running")
return
if queue_processor.is_paused:
queue_processor.resume()
logger.info("Auto-start: Queue processor resumed")
return
app_state = get_app_state()
# Check if app_state.db is available (may be None when running from scheduler)
if app_state is None or app_state.db is None:
logger.debug("Auto-start: app_state.db not available, skipping")
return
# Check if there are pending items
with app_state.db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM video_download_queue WHERE status = 'pending'")
pending_count = cursor.fetchone()[0]
if pending_count == 0:
logger.debug("Auto-start: No pending items in queue")
return
# Start the processor
queue_processor.start(app_state.db)
logger.info(f"Auto-started download queue: {pending_count} pending items")
except ImportError:
# Running from scheduler process - fall back to HTTP (won't work without auth)
logger.debug("Auto-start: Running outside API process, queue must be started manually")
except Exception as e:
logger.warning(f"Could not auto-start download queue: {e}")
def _send_notification(self, videos_added: int):
"""
Send a Pushover notification about new videos added.
Args:
videos_added: Number of videos added to Internet Discovery
"""
try:
import random
from modules.pushover_notifier import PushoverNotifier
from modules.settings_manager import SettingsManager
from modules.unified_database import UnifiedDatabase
# Get pushover config from settings
settings_manager = SettingsManager(self.db_path)
pushover_config = settings_manager.get('pushover', {})
if not pushover_config.get('enabled'):
logger.debug("Pushover notifications disabled globally")
return
# Create unified_db for recording notification to database
unified_db = UnifiedDatabase(self.db_path)
# Create notifier with unified_db so notification is recorded
notifier = PushoverNotifier(
api_token=pushover_config.get('api_token'),
user_key=pushover_config.get('user_key'),
unified_db=unified_db
)
# Get thumbnail and channel summary from videos just added to Internet Discovery
image_path = None
channel_summary = ""
channel_list = []
conn = self._get_connection()
try:
cursor = conn.cursor()
# Get only the videos from this batch (most recent N videos from celebrity_discovered_videos)
cursor.execute('''
SELECT thumbnail, title, channel_name FROM celebrity_discovered_videos
WHERE content_type = 'youtube_monitor' AND thumbnail IS NOT NULL AND thumbnail != ''
ORDER BY discovered_at DESC
LIMIT ?
''', (videos_added,))
rows = cursor.fetchall()
if rows:
# Pick a random thumbnail from this batch
selected = random.choice(rows)
thumbnail_url = selected['thumbnail']
# Build channel summary from unique channels in THIS batch only
uploaders = {}
for row in rows:
uploader = row['channel_name'] or 'Unknown'
uploaders[uploader] = uploaders.get(uploader, 0) + 1
if uploader not in channel_list:
channel_list.append(uploader)
# Format: "Channel1 (3), Channel2 (2)" - only if count > 1
channel_parts = [f"{name} ({count})" if count > 1 else name
for name, count in sorted(uploaders.items(), key=lambda x: -x[1])[:5]]
if channel_parts:
channel_summary = "\n\nFrom: " + ", ".join(channel_parts)
# Download thumbnail to temp file
if thumbnail_url:
import urllib.request
import tempfile
try:
temp_dir = tempfile.gettempdir()
temp_path = f"{temp_dir}/yt_thumb_{random.randint(1000, 9999)}.jpg"
urllib.request.urlretrieve(thumbnail_url, temp_path)
image_path = temp_path
logger.debug(f"Downloaded thumbnail for notification: {temp_path}")
except Exception as e:
logger.debug(f"Could not download thumbnail: {e}")
finally:
conn.close()
# Build message
title = "YouTube Monitor"
message = f"Added {videos_added} new video{'s' if videos_added > 1 else ''} to Internet Discovery{channel_summary}"
# Set notification context for database recording
notifier._current_notification_context = {
'platform': 'youtube',
'source': 'youtube_monitor',
'content_type': 'video',
'download_count': videos_added,
'metadata': {'channels': channel_list}
}
# Send notification
success = notifier.send_notification(
title=title,
message=message,
priority=0, # Normal priority
image_path=image_path
)
# Clean up temp file
if image_path:
try:
import os
os.unlink(image_path)
except OSError:
pass # Best effort cleanup of temp file
if success:
logger.info(f"Sent notification: {videos_added} videos added")
else:
logger.debug("Notification not sent (disabled or failed)")
except Exception as e:
logger.warning(f"Could not send notification: {e}")
async def check_all_now(self, from_scheduler: bool = False) -> int:
"""
Force check all channels immediately (ignoring interval).
Args:
from_scheduler: If True, send push notifications (scheduler runs only)
Returns:
Total number of new videos added to Internet Discovery
"""
settings = self.get_global_settings()
phrases = settings.get('phrases', [])
quality = settings.get('quality', 'best')
if not phrases:
logger.warning("No global phrases configured")
return 0
channels = self.get_enabled_channels()
if not channels:
logger.warning("No enabled YouTube channels to monitor")
return 0
total_added = 0
total_channels = len(channels)
logger.info(f"Force checking all YouTube channels: {total_channels} channels")
# Crash recovery checkpoint
from modules.task_checkpoint import TaskCheckpoint
checkpoint = TaskCheckpoint('youtube_channel_monitor', 'background')
checkpoint.start(total_items=total_channels)
if checkpoint.is_recovering():
logger.info(f"YouTube monitor: recovering — skipping already-checked channels")
# Start background task tracking (separate from main scheduler activity)
if self.activity_manager:
self.activity_manager.start_background_task(
'youtube_monitor',
'youtube_channel_monitor',
'YouTube Channel Monitor',
'Running',
{'total_channels': total_channels, 'videos_found': 0}
)
# Randomize order to avoid detection patterns
random.shuffle(channels)
for idx, channel in enumerate(channels, 1):
try:
channel_id = str(channel.get('id', ''))
channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')
if checkpoint.is_completed(channel_id):
continue
checkpoint.set_current(channel_id)
if self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Checking: {channel_name}',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
)
videos_added = await self.check_channel(channel, phrases, quality)
# Update status if we found new videos
if videos_added > 0 and self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Found {videos_added} new in {channel_name}',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
)
total_added += videos_added
checkpoint.mark_completed(channel_id)
# Delay between channel checks (with jitter to avoid detection)
base_delay = 4 + random.uniform(0, 2) # 4-6 seconds
await asyncio.sleep(base_delay)
# Batch pause every 50 channels to reduce rate limiting
if idx % 50 == 0 and idx < total_channels:
logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
if self.activity_manager:
self.activity_manager.update_background_task(
'youtube_monitor',
f'Rate limit pause ({idx}/{total_channels})',
idx, total_channels,
{'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
)
await asyncio.sleep(30)
except Exception as e:
logger.error(f"Error checking channel {channel['id']}: {e}")
# Update global last_checked
self._update_last_checked()
# Checkpoint complete
checkpoint.finish()
# Stop background task tracking
if self.activity_manager:
self.activity_manager.stop_background_task('youtube_monitor')
if total_added > 0:
logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")
# Send notification if enabled (only for scheduler runs)
if from_scheduler and settings.get('notifications_enabled'):
self._send_notification(total_added)
# Auto-start the download queue if configured
if settings.get('auto_start_queue'):
await self._trigger_queue_start()
else:
logger.debug("YouTube channel monitor complete: no new matching videos")
return total_added
def run_sync(self) -> int:
"""
Synchronous wrapper for run_check_cycle.
Used by scheduler which expects synchronous callbacks.
Returns:
Total number of new videos added to Internet Discovery
"""
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(self.run_check_cycle())
# Convenience function for external use
def create_youtube_monitor(db_path: str, activity_manager=None) -> YouTubeChannelMonitor:
"""Create a YouTubeChannelMonitor instance."""
return YouTubeChannelMonitor(db_path, activity_manager)