2180 lines
88 KiB
Python
2180 lines
88 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
YouTube Channel Monitor Module
|
|
|
|
Monitors specified YouTube channels for new videos matching global phrases,
|
|
then automatically adds matching videos to the download queue.
|
|
|
|
Design:
|
|
- Global settings (phrases, interval, quality) apply to ALL channels
|
|
- Channels are just URLs to monitor - no per-channel configuration
|
|
- All channels are checked together when the interval triggers
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import random
|
|
import re
|
|
import sqlite3
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('YouTubeMonitor')
|
|
|
|
|
|
class YouTubeChannelMonitor:
|
|
"""
|
|
Background monitor for YouTube channels.
|
|
Uses global phrases and interval settings for all channels.
|
|
"""
|
|
|
|
def __init__(self, db_path: str, activity_manager=None):
|
|
"""
|
|
Initialize the YouTube Channel Monitor.
|
|
|
|
Args:
|
|
db_path: Path to the SQLite database
|
|
activity_manager: Optional activity manager for status updates
|
|
"""
|
|
self.db_path = db_path
|
|
self.activity_manager = activity_manager
|
|
self.yt_dlp_path = '/opt/media-downloader/venv/bin/yt-dlp'
|
|
self.default_output_path = '/opt/immich/md/youtube/'
|
|
|
|
def _get_connection(self) -> sqlite3.Connection:
|
|
"""Get a database connection with row factory."""
|
|
conn = sqlite3.connect(self.db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
# =========================================================================
|
|
# GLOBAL SETTINGS METHODS
|
|
# =========================================================================
|
|
|
|
def get_global_settings(self) -> Dict:
|
|
"""Get the global monitor settings."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT phrases, check_interval_hours, quality, enabled, last_checked, updated_at,
|
|
auto_start_queue, notifications_enabled, auto_pause_threshold_months,
|
|
paused_check_interval_days, max_results_per_phrase
|
|
FROM youtube_monitor_settings
|
|
WHERE id = 1
|
|
''')
|
|
row = cursor.fetchone()
|
|
if row:
|
|
settings = dict(row)
|
|
try:
|
|
settings['phrases'] = json.loads(settings['phrases'])
|
|
except (json.JSONDecodeError, TypeError, ValueError):
|
|
settings['phrases'] = []
|
|
# Ensure all fields are present with defaults
|
|
if 'auto_start_queue' not in settings:
|
|
settings['auto_start_queue'] = 0
|
|
if 'notifications_enabled' not in settings:
|
|
settings['notifications_enabled'] = 1
|
|
if 'auto_pause_threshold_months' not in settings:
|
|
settings['auto_pause_threshold_months'] = 24
|
|
if 'paused_check_interval_days' not in settings:
|
|
settings['paused_check_interval_days'] = 14
|
|
if 'max_results_per_phrase' not in settings:
|
|
settings['max_results_per_phrase'] = 100
|
|
return settings
|
|
# Return defaults if no row exists
|
|
return {
|
|
'phrases': [],
|
|
'check_interval_hours': 6,
|
|
'quality': 'best',
|
|
'enabled': 1,
|
|
'last_checked': None,
|
|
'updated_at': None,
|
|
'auto_start_queue': 0,
|
|
'notifications_enabled': 1,
|
|
'auto_pause_threshold_months': 24,
|
|
'paused_check_interval_days': 14,
|
|
'max_results_per_phrase': 100
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
def update_global_settings(self, phrases: List[str] = None,
|
|
check_interval_hours: int = None,
|
|
quality: str = None,
|
|
enabled: bool = None,
|
|
auto_start_queue: bool = None,
|
|
notifications_enabled: bool = None,
|
|
auto_pause_threshold_months: int = None,
|
|
paused_check_interval_days: int = None,
|
|
max_results_per_phrase: int = None) -> bool:
|
|
"""
|
|
Update global monitor settings.
|
|
|
|
Args:
|
|
phrases: List of phrases to match in video titles/descriptions
|
|
check_interval_hours: How often to check all channels
|
|
quality: Video quality preference
|
|
enabled: Whether monitoring is enabled globally
|
|
auto_start_queue: Whether to auto-start the download queue after adding videos
|
|
notifications_enabled: Whether to send notifications when videos are added
|
|
auto_pause_threshold_months: Months of inactivity before auto-pausing channels
|
|
paused_check_interval_days: Days between re-checking paused channels
|
|
max_results_per_phrase: Maximum number of videos to process per search phrase
|
|
|
|
Returns:
|
|
True if update was successful
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Build update parts
|
|
updates = []
|
|
values = []
|
|
|
|
if phrases is not None:
|
|
updates.append('phrases = ?')
|
|
values.append(json.dumps(phrases))
|
|
if check_interval_hours is not None:
|
|
updates.append('check_interval_hours = ?')
|
|
values.append(check_interval_hours)
|
|
if quality is not None:
|
|
updates.append('quality = ?')
|
|
values.append(quality)
|
|
if enabled is not None:
|
|
updates.append('enabled = ?')
|
|
values.append(1 if enabled else 0)
|
|
if auto_start_queue is not None:
|
|
updates.append('auto_start_queue = ?')
|
|
values.append(1 if auto_start_queue else 0)
|
|
if notifications_enabled is not None:
|
|
updates.append('notifications_enabled = ?')
|
|
values.append(1 if notifications_enabled else 0)
|
|
if auto_pause_threshold_months is not None:
|
|
updates.append('auto_pause_threshold_months = ?')
|
|
values.append(auto_pause_threshold_months)
|
|
if paused_check_interval_days is not None:
|
|
updates.append('paused_check_interval_days = ?')
|
|
values.append(paused_check_interval_days)
|
|
if max_results_per_phrase is not None:
|
|
updates.append('max_results_per_phrase = ?')
|
|
values.append(max_results_per_phrase)
|
|
|
|
if not updates:
|
|
return False
|
|
|
|
updates.append('updated_at = ?')
|
|
values.append(datetime.now().isoformat())
|
|
|
|
cursor.execute(f'''
|
|
UPDATE youtube_monitor_settings
|
|
SET {', '.join(updates)}
|
|
WHERE id = 1
|
|
''', values)
|
|
conn.commit()
|
|
|
|
logger.info(f"Updated global YouTube monitor settings")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
def _update_last_checked(self):
|
|
"""Update the last_checked timestamp in global settings."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE youtube_monitor_settings
|
|
SET last_checked = ?
|
|
WHERE id = 1
|
|
''', (datetime.now().isoformat(),))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
# =========================================================================
|
|
# CHANNEL MANAGEMENT METHODS
|
|
# =========================================================================
|
|
|
|
def get_all_channels(self) -> List[Dict]:
|
|
"""Get all YouTube channel monitors."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
|
|
status, always_active, last_video_date, last_check_date,
|
|
paused_date, paused_reason, total_videos_found
|
|
FROM youtube_channel_monitors
|
|
ORDER BY created_at DESC
|
|
''')
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_enabled_channels(self) -> List[Dict]:
|
|
"""Get all enabled YouTube channels."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
|
|
status, always_active, last_video_date, last_check_date,
|
|
paused_date, paused_reason, total_videos_found
|
|
FROM youtube_channel_monitors
|
|
WHERE status = 'active'
|
|
ORDER BY channel_name, channel_url
|
|
''')
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_channel(self, channel_id: int) -> Optional[Dict]:
|
|
"""Get a specific channel by ID."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at
|
|
FROM youtube_channel_monitors
|
|
WHERE id = ?
|
|
''', (channel_id,))
|
|
row = cursor.fetchone()
|
|
return dict(row) if row else None
|
|
finally:
|
|
conn.close()
|
|
|
|
def add_channel(self, channel_url: str, channel_name: str = None, enabled: bool = True) -> int:
|
|
"""
|
|
Add a new YouTube channel to monitor.
|
|
|
|
Args:
|
|
channel_url: YouTube channel URL
|
|
channel_name: Optional display name for the channel
|
|
enabled: Whether the channel is enabled
|
|
|
|
Returns:
|
|
The ID of the created channel
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT INTO youtube_channel_monitors (channel_url, channel_name, enabled)
|
|
VALUES (?, ?, ?)
|
|
''', (channel_url, channel_name, 1 if enabled else 0))
|
|
conn.commit()
|
|
channel_id = cursor.lastrowid
|
|
logger.info(f"Added YouTube channel {channel_id}: {channel_name or channel_url}")
|
|
return channel_id
|
|
finally:
|
|
conn.close()
|
|
|
|
def update_channel(self, channel_id: int, **kwargs) -> bool:
|
|
"""
|
|
Update a YouTube channel.
|
|
|
|
Args:
|
|
channel_id: ID of the channel to update
|
|
**kwargs: Fields to update (channel_url, channel_name, enabled)
|
|
|
|
Returns:
|
|
True if update was successful
|
|
"""
|
|
allowed_fields = {'channel_url', 'channel_name', 'enabled'}
|
|
|
|
updates = {}
|
|
for key, value in kwargs.items():
|
|
if key in allowed_fields:
|
|
if key == 'enabled':
|
|
updates[key] = 1 if value else 0
|
|
else:
|
|
updates[key] = value
|
|
|
|
if not updates:
|
|
return False
|
|
|
|
conn = self._get_connection()
|
|
try:
|
|
set_clause = ', '.join(f'{k} = ?' for k in updates.keys())
|
|
values = list(updates.values()) + [channel_id]
|
|
|
|
cursor = conn.cursor()
|
|
cursor.execute(f'''
|
|
UPDATE youtube_channel_monitors
|
|
SET {set_clause}
|
|
WHERE id = ?
|
|
''', values)
|
|
conn.commit()
|
|
logger.info(f"Updated YouTube channel {channel_id}")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
def delete_channel(self, channel_id: int) -> bool:
|
|
"""
|
|
Delete a YouTube channel and its history.
|
|
|
|
Args:
|
|
channel_id: ID of the channel to delete
|
|
|
|
Returns:
|
|
True if deletion was successful
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
# Delete history first
|
|
cursor.execute('DELETE FROM youtube_monitor_history WHERE monitor_id = ?', (channel_id,))
|
|
# Delete channel
|
|
cursor.execute('DELETE FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
|
|
conn.commit()
|
|
logger.info(f"Deleted YouTube channel {channel_id}")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
async def fetch_channel_id(self, channel_url: str) -> Optional[str]:
|
|
"""
|
|
Fetch YouTube channel ID from URL using yt-dlp, with curl/grep fallback.
|
|
|
|
Args:
|
|
channel_url: YouTube channel URL
|
|
|
|
Returns:
|
|
Channel ID (UC...) or None if not found
|
|
"""
|
|
# Method 1: Try yt-dlp first
|
|
try:
|
|
cmd = [
|
|
self.yt_dlp_path,
|
|
'--dump-json',
|
|
'--playlist-end', '1',
|
|
f'{channel_url}/videos'
|
|
]
|
|
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)
|
|
|
|
if stdout:
|
|
data = json.loads(stdout.decode('utf-8'))
|
|
channel_id = data.get('channel_id')
|
|
if channel_id and channel_id.startswith('UC'):
|
|
logger.debug(f"Fetched channel ID via yt-dlp: {channel_id}")
|
|
return channel_id
|
|
except (asyncio.TimeoutError, json.JSONDecodeError, Exception) as e:
|
|
logger.debug(f"yt-dlp method failed for {channel_url}: {e}")
|
|
|
|
# Method 2: Fallback to curl/grep method
|
|
try:
|
|
cmd = [
|
|
'curl', '-Ls', channel_url
|
|
]
|
|
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
|
|
stdout, _ = await asyncio.wait_for(process.communicate(), timeout=10)
|
|
|
|
if stdout:
|
|
html = stdout.decode('utf-8')
|
|
# Look for channel ID patterns in the HTML
|
|
pattern = r'"(?:browseId|externalId|channelId)":"(UC[^"]+)"'
|
|
match = re.search(pattern, html)
|
|
if match:
|
|
channel_id = match.group(1)
|
|
logger.debug(f"Fetched channel ID via curl/grep: {channel_id}")
|
|
return channel_id
|
|
except (asyncio.TimeoutError, Exception) as e:
|
|
logger.debug(f"curl/grep method failed for {channel_url}: {e}")
|
|
|
|
logger.warning(f"Could not fetch channel ID for {channel_url}")
|
|
return None
|
|
|
|
# =========================================================================
|
|
# STATUS MANAGEMENT METHODS (v11.20.0)
|
|
# =========================================================================
|
|
|
|
def get_active_channels(self) -> List[Dict]:
|
|
"""Get channels with status='active'."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
|
|
status, always_active, last_video_date, last_check_date,
|
|
paused_date, paused_reason, total_videos_found
|
|
FROM youtube_channel_monitors
|
|
WHERE status = 'active'
|
|
ORDER BY channel_name, channel_url
|
|
''')
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_paused_channels(self) -> List[Dict]:
|
|
"""Get channels with status like 'paused_%'."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
|
|
status, always_active, last_video_date, last_check_date,
|
|
paused_date, paused_reason, total_videos_found
|
|
FROM youtube_channel_monitors
|
|
WHERE status LIKE 'paused_%'
|
|
ORDER BY paused_date DESC
|
|
''')
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_channels_filtered(self,
|
|
status_filter: str = None,
|
|
always_active_filter: str = None,
|
|
search: str = None,
|
|
sort_field: str = 'name',
|
|
sort_ascending: bool = True,
|
|
limit: int = None,
|
|
offset: int = 0) -> Dict:
|
|
"""
|
|
Get channels with server-side filtering, searching, sorting, and pagination.
|
|
|
|
Args:
|
|
status_filter: 'all', 'active', 'paused_manual', 'paused_auto', 'paused_all'
|
|
always_active_filter: 'all', 'always_active', 'regular'
|
|
search: Search term for channel name or URL
|
|
sort_field: 'name', 'last_checked', 'last_video_date', 'videos_found', 'created_at'
|
|
sort_ascending: Sort direction
|
|
limit: Maximum number of results
|
|
offset: Offset for pagination
|
|
|
|
Returns:
|
|
Dict with 'channels' list and 'total' count
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Build WHERE clause
|
|
where_clauses = []
|
|
params = []
|
|
|
|
# Status filter
|
|
if status_filter and status_filter != 'all':
|
|
if status_filter == 'active':
|
|
where_clauses.append("status = 'active'")
|
|
elif status_filter == 'paused_manual':
|
|
where_clauses.append("status = 'paused_manual'")
|
|
elif status_filter == 'paused_auto':
|
|
where_clauses.append("status = 'paused_auto'")
|
|
elif status_filter == 'paused_all':
|
|
where_clauses.append("status LIKE 'paused_%'")
|
|
|
|
# Always active filter
|
|
if always_active_filter and always_active_filter != 'all':
|
|
if always_active_filter == 'always_active':
|
|
where_clauses.append("always_active = 1")
|
|
elif always_active_filter == 'regular':
|
|
where_clauses.append("(always_active = 0 OR always_active IS NULL)")
|
|
|
|
# Search filter
|
|
if search:
|
|
where_clauses.append("(channel_name LIKE ? OR channel_url LIKE ?)")
|
|
search_param = f"%{search}%"
|
|
params.extend([search_param, search_param])
|
|
|
|
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
|
|
|
|
# Get total count
|
|
count_query = f"SELECT COUNT(*) FROM youtube_channel_monitors {where_sql}"
|
|
cursor.execute(count_query, params)
|
|
total = cursor.fetchone()[0]
|
|
|
|
# Build ORDER BY clause
|
|
sort_columns = {
|
|
'name': 'LOWER(COALESCE(channel_name, channel_url))',
|
|
'last_checked': 'last_check_date',
|
|
'last_video_date': 'last_video_date',
|
|
'videos_found': 'total_videos_found',
|
|
'created_at': 'created_at'
|
|
}
|
|
sort_column = sort_columns.get(sort_field, 'LOWER(COALESCE(channel_name, channel_url))')
|
|
sort_direction = 'ASC' if sort_ascending else 'DESC'
|
|
order_by = f"ORDER BY {sort_column} {sort_direction}"
|
|
|
|
# Build main query with pagination (using parameterized queries for security)
|
|
limit_sql = "LIMIT ? OFFSET ?" if limit else ""
|
|
|
|
query = f'''
|
|
SELECT id, channel_url, channel_name, enabled, last_checked, videos_found, created_at,
|
|
status, always_active, last_video_date, last_check_date,
|
|
paused_date, paused_reason, total_videos_found, channel_id
|
|
FROM youtube_channel_monitors
|
|
{where_sql}
|
|
{order_by}
|
|
{limit_sql}
|
|
'''
|
|
|
|
# Add limit/offset to params if pagination is used
|
|
query_params = list(params)
|
|
if limit:
|
|
query_params.extend([limit, offset])
|
|
|
|
cursor.execute(query, query_params)
|
|
channels = [dict(row) for row in cursor.fetchall()]
|
|
|
|
return {
|
|
'channels': channels,
|
|
'total': total
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
def pause_channel(self, channel_id: int, reason: str = None, auto: bool = False) -> bool:
|
|
"""
|
|
Pause a channel manually or automatically.
|
|
|
|
Args:
|
|
channel_id: ID of the channel to pause
|
|
reason: Optional reason for pausing
|
|
auto: If True, set status to 'paused_auto', otherwise 'paused_manual'
|
|
|
|
Returns:
|
|
True if pause was successful
|
|
"""
|
|
status = 'paused_auto' if auto else 'paused_manual'
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET status = ?, paused_date = ?, paused_reason = ?
|
|
WHERE id = ?
|
|
''', (status, datetime.now().isoformat(), reason, channel_id))
|
|
conn.commit()
|
|
logger.info(f"{'Auto-' if auto else ''}Paused channel {channel_id}: {reason}")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
def resume_channel(self, channel_id: int) -> bool:
|
|
"""
|
|
Resume a paused channel.
|
|
|
|
Args:
|
|
channel_id: ID of the channel to resume
|
|
|
|
Returns:
|
|
True if resume was successful
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET status = 'active', paused_date = NULL, paused_reason = NULL
|
|
WHERE id = ?
|
|
''', (channel_id,))
|
|
conn.commit()
|
|
logger.info(f"Resumed channel {channel_id}")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
def toggle_always_active(self, channel_id: int, value: bool) -> bool:
|
|
"""
|
|
Toggle always_active flag for a channel.
|
|
|
|
Args:
|
|
channel_id: ID of the channel
|
|
value: True to enable always_active, False to disable
|
|
|
|
Returns:
|
|
True if toggle was successful
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET always_active = ?
|
|
WHERE id = ?
|
|
''', (1 if value else 0, channel_id))
|
|
conn.commit()
|
|
logger.info(f"Set always_active={value} for channel {channel_id}")
|
|
return cursor.rowcount > 0
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_statistics(self) -> Dict:
|
|
"""Get monitor statistics."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT
|
|
COUNT(*) as total,
|
|
SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active,
|
|
SUM(CASE WHEN status = 'paused_manual' THEN 1 ELSE 0 END) as paused_manual,
|
|
SUM(CASE WHEN status = 'paused_auto' THEN 1 ELSE 0 END) as paused_auto,
|
|
SUM(CASE WHEN always_active = 1 THEN 1 ELSE 0 END) as always_active_count,
|
|
SUM(COALESCE(total_videos_found, 0)) as total_videos
|
|
FROM youtube_channel_monitors
|
|
''')
|
|
row = cursor.fetchone()
|
|
return dict(row) if row else {
|
|
'total': 0,
|
|
'active': 0,
|
|
'paused_manual': 0,
|
|
'paused_auto': 0,
|
|
'always_active_count': 0,
|
|
'total_videos': 0
|
|
}
|
|
finally:
|
|
conn.close()
|
|
|
|
# =========================================================================
|
|
# AUTO-PAUSE AND PAUSED-CHECK LOGIC (v11.20.0)
|
|
# =========================================================================
|
|
|
|
async def _check_channel_for_auto_pause(self, channel_id: int) -> bool:
|
|
"""
|
|
Check if a single channel should be auto-paused based on inactivity or no matched videos.
|
|
Called immediately after checking each channel.
|
|
|
|
Args:
|
|
channel_id: ID of the channel to check
|
|
|
|
Returns:
|
|
True if channel was auto-paused, False otherwise
|
|
"""
|
|
from datetime import timedelta
|
|
|
|
settings = self.get_global_settings()
|
|
threshold_months = settings.get('auto_pause_threshold_months', 24)
|
|
|
|
# Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
|
|
cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
|
|
cutoff_str = cutoff_date.strftime('%Y%m%d')
|
|
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if this specific channel should be auto-paused
|
|
cursor.execute('''
|
|
SELECT id, channel_name, last_video_date, always_active, status,
|
|
videos_found, last_check_date
|
|
FROM youtube_channel_monitors
|
|
WHERE id = ?
|
|
''', (channel_id,))
|
|
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return False
|
|
|
|
channel_name = row['channel_name']
|
|
last_video_date = row['last_video_date']
|
|
always_active = row['always_active']
|
|
status = row['status']
|
|
videos_found = row['videos_found']
|
|
last_check_date = row['last_check_date']
|
|
|
|
# Don't auto-pause if already paused or always_active
|
|
if status != 'active' or always_active == 1:
|
|
return False
|
|
|
|
# Auto-pause if channel has been checked but has 0 matched videos
|
|
if videos_found == 0 and last_check_date:
|
|
reason = "No matching videos found"
|
|
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET status = 'paused_auto',
|
|
paused_date = ?,
|
|
paused_reason = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), reason, channel_id))
|
|
|
|
conn.commit()
|
|
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
|
|
return True
|
|
|
|
# Auto-pause if channel is inactive (no uploads in threshold period)
|
|
if last_video_date and last_video_date < cutoff_str:
|
|
# Calculate days since last upload for the pause reason
|
|
try:
|
|
if len(last_video_date) == 8 and last_video_date.isdigit():
|
|
last_upload = datetime.strptime(last_video_date, '%Y%m%d')
|
|
else:
|
|
last_upload = datetime.fromisoformat(last_video_date)
|
|
|
|
days_inactive = (datetime.now() - last_upload).days
|
|
reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"
|
|
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET status = 'paused_auto',
|
|
paused_date = ?,
|
|
paused_reason = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), reason, channel_id))
|
|
|
|
conn.commit()
|
|
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
|
|
return True
|
|
except (ValueError, TypeError) as e:
|
|
logger.error(f"Error parsing date for channel {channel_id}: {e}")
|
|
return False
|
|
|
|
return False
|
|
finally:
|
|
conn.close()
|
|
|
|
async def check_for_inactive_channels(self) -> int:
|
|
"""
|
|
Check for channels that should be auto-paused based on inactivity.
|
|
|
|
Returns:
|
|
Number of channels auto-paused
|
|
"""
|
|
from datetime import timedelta
|
|
|
|
settings = self.get_global_settings()
|
|
threshold_months = settings.get('auto_pause_threshold_months', 24)
|
|
|
|
# Calculate cutoff date (in YYYYMMDD format to match yt-dlp dates)
|
|
cutoff_date = datetime.now() - timedelta(days=threshold_months * 30)
|
|
cutoff_str = cutoff_date.strftime('%Y%m%d')
|
|
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Find active channels that haven't posted in threshold period
|
|
# Note: Comparing YYYYMMDD strings works correctly (20231225 < 20241227)
|
|
cursor.execute('''
|
|
SELECT id, channel_name, last_video_date
|
|
FROM youtube_channel_monitors
|
|
WHERE status = 'active'
|
|
AND always_active = 0
|
|
AND last_video_date IS NOT NULL
|
|
AND last_video_date < ?
|
|
''', (cutoff_str,))
|
|
|
|
inactive_channels = cursor.fetchall()
|
|
paused_count = 0
|
|
|
|
for row in inactive_channels:
|
|
channel_id = row['id']
|
|
channel_name = row['channel_name']
|
|
last_video_date = row['last_video_date']
|
|
|
|
# Calculate days since last upload
|
|
try:
|
|
# Handle both YYYYMMDD format (from yt-dlp) and ISO format
|
|
if len(last_video_date) == 8 and last_video_date.isdigit():
|
|
# YYYYMMDD format from yt-dlp
|
|
last_upload = datetime.strptime(last_video_date, '%Y%m%d')
|
|
else:
|
|
# ISO format
|
|
last_upload = datetime.fromisoformat(last_video_date)
|
|
|
|
days_inactive = (datetime.now() - last_upload).days
|
|
|
|
reason = f"No uploads in {days_inactive} days (threshold: {threshold_months * 30} days)"
|
|
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET status = 'paused_auto',
|
|
paused_date = ?,
|
|
paused_reason = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), reason, channel_id))
|
|
|
|
paused_count += 1
|
|
logger.info(f"Auto-paused channel '{channel_name}': {reason}")
|
|
except (ValueError, TypeError) as e:
|
|
logger.error(f"Error parsing date for channel {channel_id}: {e}")
|
|
continue
|
|
|
|
conn.commit()
|
|
return paused_count
|
|
finally:
|
|
conn.close()
|
|
|
|
async def check_paused_channels(self) -> int:
|
|
"""
|
|
Periodically check paused channels to see if they've resumed posting.
|
|
|
|
Returns:
|
|
Number of channels auto-resumed
|
|
"""
|
|
from datetime import timedelta
|
|
|
|
settings = self.get_global_settings()
|
|
check_interval_days = settings.get('paused_check_interval_days', 14)
|
|
threshold_months = settings.get('auto_pause_threshold_months', 24)
|
|
|
|
# Find paused channels that need checking
|
|
cutoff_date = datetime.now() - timedelta(days=check_interval_days)
|
|
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT id, channel_url, channel_name, status
|
|
FROM youtube_channel_monitors
|
|
WHERE status LIKE 'paused_%'
|
|
AND (last_check_date IS NULL OR last_check_date < ?)
|
|
''', (cutoff_date.isoformat(),))
|
|
|
|
paused_channels = [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
resumed_count = 0
|
|
for channel in paused_channels:
|
|
try:
|
|
# Check if channel has new videos (light check - just get latest 5)
|
|
videos = await self.get_channel_videos(channel['channel_url'], max_results=5)
|
|
|
|
most_recent_upload = None
|
|
if videos:
|
|
# Find most recent video
|
|
for video in videos:
|
|
upload_date_str = video.get('upload_date')
|
|
if upload_date_str:
|
|
if not most_recent_upload or upload_date_str > most_recent_upload:
|
|
most_recent_upload = upload_date_str
|
|
|
|
# Update last_check_date and last_video_date
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET last_check_date = ?,
|
|
last_video_date = ?
|
|
WHERE id = ?
|
|
''', (datetime.now().isoformat(), most_recent_upload, channel['id']))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
# Check if auto-paused channel should be resumed
|
|
if channel['status'] == 'paused_auto' and most_recent_upload:
|
|
try:
|
|
# Parse date (yt-dlp format: YYYYMMDD)
|
|
if len(most_recent_upload) == 8:
|
|
upload_datetime = datetime.strptime(most_recent_upload, '%Y%m%d')
|
|
else:
|
|
upload_datetime = datetime.fromisoformat(most_recent_upload)
|
|
|
|
days_since_upload = (datetime.now() - upload_datetime).days
|
|
|
|
# If upload is recent (within threshold), auto-resume
|
|
if days_since_upload < (threshold_months * 30):
|
|
self.resume_channel(channel['id'])
|
|
resumed_count += 1
|
|
logger.info(f"Auto-resumed channel '{channel['channel_name']}' - new upload detected ({days_since_upload} days old)")
|
|
except (ValueError, TypeError) as e:
|
|
logger.error(f"Error parsing upload date for channel {channel['id']}: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking paused channel {channel['id']} ({channel['channel_name']}): {e}")
|
|
continue
|
|
|
|
return resumed_count
|
|
|
|
def check_paused_channels_sync(self) -> int:
|
|
"""Synchronous wrapper for check_paused_channels."""
|
|
import asyncio
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
return loop.run_until_complete(self.check_paused_channels())
|
|
finally:
|
|
loop.close()
|
|
|
|
# =========================================================================
|
|
# HISTORY METHODS
|
|
# =========================================================================
|
|
|
|
def get_channel_history(self, channel_id: int, limit: int = 50) -> List[Dict]:
|
|
"""Get history for a specific channel."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT id, monitor_id, video_id, video_title, matched_phrase, action, created_at
|
|
FROM youtube_monitor_history
|
|
WHERE monitor_id = ?
|
|
ORDER BY created_at DESC
|
|
LIMIT ?
|
|
''', (channel_id, limit))
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def get_all_history(self, limit: int = 100) -> List[Dict]:
|
|
"""Get combined history for all channels."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT h.id, h.monitor_id, h.video_id, h.video_title, h.matched_phrase,
|
|
h.action, h.created_at, c.channel_name, c.channel_url
|
|
FROM youtube_monitor_history h
|
|
LEFT JOIN youtube_channel_monitors c ON h.monitor_id = c.id
|
|
ORDER BY h.created_at DESC
|
|
LIMIT ?
|
|
''', (limit,))
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
|
|
def _is_video_processed(self, channel_id: int, video_id: str) -> bool:
|
|
"""Check if a video has already been processed for a channel."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT 1 FROM youtube_monitor_history
|
|
WHERE monitor_id = ? AND video_id = ?
|
|
''', (channel_id, video_id))
|
|
return cursor.fetchone() is not None
|
|
finally:
|
|
conn.close()
|
|
|
|
def _record_video_processed(self, channel_id: int, video_id: str,
|
|
video_title: str, matched_phrase: str, action: str):
|
|
"""Record that a video has been processed."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO youtube_monitor_history
|
|
(monitor_id, video_id, video_title, matched_phrase, action)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
''', (channel_id, video_id, video_title, matched_phrase, action))
|
|
conn.commit()
|
|
except Exception as e:
|
|
logger.error(f"Failed to record video processed: {e}")
|
|
finally:
|
|
conn.close()
|
|
|
|
def _update_channel_stats(self, channel_id: int, videos_added: int, most_recent_upload: str = None):
|
|
"""Update channel statistics after a check."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Get channel info for accurate matching
|
|
cursor.execute('SELECT channel_id, channel_name FROM youtube_channel_monitors WHERE id = ?', (channel_id,))
|
|
row = cursor.fetchone()
|
|
if not row:
|
|
return
|
|
yt_channel_id, channel_name = row[0], row[1]
|
|
|
|
# Count videos from celebrity_discovered_videos (Internet Discovery database)
|
|
# This is the shared database that shows on the Internet Discovery page
|
|
if yt_channel_id:
|
|
cursor.execute('''
|
|
SELECT COUNT(*)
|
|
FROM celebrity_discovered_videos
|
|
WHERE (
|
|
-- Match by channel_id
|
|
channel_id = ?
|
|
OR
|
|
-- Fallback: match by name if video has no channel_id
|
|
(channel_id IS NULL OR channel_id = '')
|
|
AND REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
|
|
)
|
|
AND platform = 'youtube'
|
|
''', (yt_channel_id, channel_name))
|
|
total_count = cursor.fetchone()[0]
|
|
else:
|
|
# Fallback to name-only matching if monitor has no channel_id
|
|
cursor.execute('''
|
|
SELECT COUNT(*)
|
|
FROM celebrity_discovered_videos
|
|
WHERE REPLACE(LOWER(channel_name), ' ', '') = REPLACE(LOWER(?), ' ', '')
|
|
AND platform = 'youtube'
|
|
''', (channel_name,))
|
|
total_count = cursor.fetchone()[0]
|
|
|
|
# Update last_video_date if we have a new value, or keep existing if we don't
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET last_checked = ?,
|
|
last_check_date = ?,
|
|
videos_found = ?,
|
|
total_videos_found = ?,
|
|
last_video_date = CASE
|
|
WHEN ? IS NOT NULL THEN ?
|
|
ELSE last_video_date
|
|
END
|
|
WHERE id = ?
|
|
''', (
|
|
datetime.now().isoformat(),
|
|
datetime.now().isoformat(),
|
|
total_count,
|
|
total_count,
|
|
most_recent_upload,
|
|
most_recent_upload,
|
|
channel_id
|
|
))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
# =========================================================================
|
|
# VIDEO FETCHING AND MATCHING
|
|
# =========================================================================
|
|
|
|
async def _get_channel_latest_upload_date(self, channel_url: str) -> str:
|
|
"""
|
|
Get the upload date of the most recent video on a channel.
|
|
Uses full metadata fetch (not flat-playlist) to get accurate upload_date.
|
|
Tries multiple URL formats if the first attempt fails.
|
|
|
|
Args:
|
|
channel_url: URL of the YouTube channel
|
|
|
|
Returns:
|
|
Upload date string in YYYYMMDD format, or None if not found
|
|
"""
|
|
# Try multiple URL formats
|
|
urls_to_try = []
|
|
|
|
# First try: /videos suffix
|
|
base_url = channel_url.rstrip('/')
|
|
for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
|
|
if base_url.endswith(suffix):
|
|
base_url = base_url[:-len(suffix)]
|
|
break
|
|
urls_to_try.append(f"{base_url}/videos")
|
|
|
|
# Second try: base URL without suffix
|
|
urls_to_try.append(base_url)
|
|
|
|
# Third try: /streams suffix (for channels that primarily stream)
|
|
urls_to_try.append(f"{base_url}/streams")
|
|
|
|
for url_attempt in urls_to_try:
|
|
cmd = [
|
|
self.yt_dlp_path,
|
|
'--playlist-end', '1', # Only get the most recent video
|
|
'--dump-json',
|
|
'--no-warnings',
|
|
'--ignore-errors',
|
|
'--skip-download',
|
|
url_attempt
|
|
]
|
|
|
|
try:
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)
|
|
|
|
# Check stderr for specific errors
|
|
stderr_text = stderr.decode().strip()
|
|
if 'does not have a' in stderr_text.lower() or '404' in stderr_text:
|
|
# Try next URL format
|
|
continue
|
|
|
|
for line in stdout.decode().strip().split('\n'):
|
|
if line:
|
|
try:
|
|
data = json.loads(line)
|
|
upload_date = data.get('upload_date')
|
|
if upload_date:
|
|
logger.debug(f"Successfully fetched upload date {upload_date} from {url_attempt}")
|
|
return upload_date
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.debug(f"Timeout fetching latest upload date from {url_attempt}")
|
|
continue
|
|
except Exception as e:
|
|
logger.debug(f"Error fetching latest upload date from {url_attempt}: {e}")
|
|
continue
|
|
|
|
logger.warning(f"Could not fetch latest upload date from {base_url} after trying all URL formats")
|
|
return None
|
|
|
|
async def get_channel_videos(self, channel_url: str, max_results: int = 20, search_phrase: str = None) -> List[Dict]:
|
|
"""
|
|
Fetch videos from a YouTube channel using yt-dlp.
|
|
|
|
Args:
|
|
channel_url: URL of the YouTube channel
|
|
max_results: Maximum number of videos to fetch
|
|
search_phrase: Optional phrase to search within the channel
|
|
|
|
Returns:
|
|
List of video metadata dictionaries (basic info from flat-playlist)
|
|
"""
|
|
# Build the URL based on whether we're searching or fetching recent
|
|
if search_phrase:
|
|
# Use channel search URL to find videos matching the phrase
|
|
# Remove any trailing path from channel URL
|
|
base_url = channel_url.rstrip('/')
|
|
for suffix in ['/videos', '/streams', '/shorts', '/playlists', '/community', '/about']:
|
|
if base_url.endswith(suffix):
|
|
base_url = base_url[:-len(suffix)]
|
|
# URL encode the search phrase
|
|
import urllib.parse
|
|
encoded_phrase = urllib.parse.quote(search_phrase)
|
|
channel_url = f"{base_url}/search?query={encoded_phrase}"
|
|
else:
|
|
# Ensure URL ends with /videos for recent uploads
|
|
if not channel_url.endswith('/videos'):
|
|
if channel_url.endswith('/'):
|
|
channel_url = channel_url + 'videos'
|
|
else:
|
|
channel_url = channel_url + '/videos'
|
|
|
|
cmd = [
|
|
self.yt_dlp_path,
|
|
'--flat-playlist',
|
|
'--dump-json',
|
|
'--playlist-end', str(max_results),
|
|
'--no-warnings',
|
|
'--ignore-errors',
|
|
channel_url
|
|
]
|
|
|
|
try:
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=90)
|
|
|
|
videos = []
|
|
for line in stdout.decode().strip().split('\n'):
|
|
if line:
|
|
try:
|
|
data = json.loads(line)
|
|
videos.append({
|
|
'video_id': data.get('id'),
|
|
'title': data.get('title', ''),
|
|
'channel_name': data.get('uploader', data.get('channel', '')),
|
|
'channel_id': data.get('channel_id', ''),
|
|
'upload_date': data.get('upload_date'),
|
|
'duration': data.get('duration', 0),
|
|
'view_count': data.get('view_count', 0),
|
|
'thumbnail': data.get('thumbnail', ''),
|
|
'description': data.get('description', ''),
|
|
'url': f"https://www.youtube.com/watch?v={data.get('id')}"
|
|
})
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
logger.debug(f"Fetched {len(videos)} videos from {channel_url}")
|
|
return videos
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Timeout fetching videos from {channel_url}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Error fetching videos from {channel_url}: {e}")
|
|
return []
|
|
|
|
async def fetch_video_metadata(self, video_id: str) -> Dict:
|
|
"""
|
|
Fetch full metadata for a single video including upload date, resolution, and thumbnail.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
|
|
Returns:
|
|
Dictionary with full video metadata
|
|
"""
|
|
try:
|
|
cmd = [
|
|
self.yt_dlp_path,
|
|
f'https://www.youtube.com/watch?v={video_id}',
|
|
'--dump-json',
|
|
'--no-download',
|
|
'--no-warnings',
|
|
'--ignore-errors'
|
|
]
|
|
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=30)
|
|
|
|
if stdout:
|
|
data = json.loads(stdout.decode().strip())
|
|
|
|
# Extract max resolution (height) and corresponding width from formats
|
|
max_resolution = 0
|
|
max_width = 0
|
|
formats = data.get('formats', [])
|
|
for fmt in formats:
|
|
height = fmt.get('height')
|
|
if height and isinstance(height, int) and height > max_resolution:
|
|
# Only count video formats (not audio-only)
|
|
if fmt.get('vcodec', 'none') != 'none':
|
|
max_resolution = height
|
|
# Get the width for this format
|
|
width = fmt.get('width')
|
|
if width and isinstance(width, int):
|
|
max_width = width
|
|
|
|
# Get best thumbnail - prefer jpg over webp for better compatibility
|
|
thumbnail = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
|
|
thumbnails = data.get('thumbnails', [])
|
|
if thumbnails:
|
|
# Filter for jpg thumbnails only (webp may not exist for all videos)
|
|
jpg_thumbs = [t for t in thumbnails if t.get('url', '').endswith('.jpg')]
|
|
if jpg_thumbs:
|
|
# Get highest quality jpg thumbnail
|
|
best_thumb = max(jpg_thumbs, key=lambda t: t.get('height', 0) or 0)
|
|
thumbnail = best_thumb.get('url', thumbnail)
|
|
|
|
return {
|
|
'video_id': video_id,
|
|
'title': data.get('title', ''),
|
|
'channel_name': data.get('uploader', data.get('channel', '')),
|
|
'channel_id': data.get('channel_id', ''),
|
|
'upload_date': data.get('upload_date', ''),
|
|
'duration': data.get('duration', 0),
|
|
'view_count': data.get('view_count', 0),
|
|
'thumbnail': thumbnail,
|
|
'description': data.get('description', '')[:500] if data.get('description') else '',
|
|
'max_resolution': max_resolution if max_resolution > 0 else None,
|
|
'max_width': max_width if max_width > 0 else None,
|
|
'url': f"https://www.youtube.com/watch?v={video_id}"
|
|
}
|
|
except asyncio.TimeoutError:
|
|
logger.warning(f"Timeout fetching metadata for {video_id}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to fetch metadata for {video_id}: {e}")
|
|
return {}
|
|
|
|
def _matches_phrase(self, title: str, description: str, phrases: List[str]) -> Optional[str]:
|
|
"""
|
|
Check if video matches any phrase.
|
|
Also checks hashtag variations (e.g., "Eva Longoria" matches "#EvaLongoria").
|
|
|
|
Args:
|
|
title: Video title
|
|
description: Video description
|
|
phrases: List of phrases to match
|
|
|
|
Returns:
|
|
The matched phrase, or None if no match
|
|
"""
|
|
text = f"{title} {description}".lower()
|
|
|
|
for phrase in phrases:
|
|
phrase_lower = phrase.lower()
|
|
|
|
# Check direct match
|
|
if phrase_lower in text:
|
|
return phrase
|
|
|
|
# Check hashtag variation (e.g., "Eva Longoria" -> "#evalongoria")
|
|
# Remove spaces, hyphens, underscores from phrase for hashtag matching
|
|
hashtag_phrase = '#' + phrase_lower.replace(' ', '').replace('-', '').replace('_', '')
|
|
if hashtag_phrase in text:
|
|
return phrase
|
|
|
|
return None
|
|
|
|
def _add_to_download_queue(self, video: Dict, channel: Dict, quality: str) -> bool:
|
|
"""
|
|
Add a matching video directly to the video_download_queue.
|
|
|
|
Args:
|
|
video: Video metadata dictionary (should be full metadata from fetch_video_metadata)
|
|
channel: Channel dictionary
|
|
quality: Video quality from global settings
|
|
|
|
Returns:
|
|
True if successfully added to queue
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Check if video already exists in queue
|
|
cursor.execute('''
|
|
SELECT 1 FROM video_download_queue
|
|
WHERE platform = 'youtube' AND video_id = ?
|
|
''', (video['video_id'],))
|
|
|
|
if cursor.fetchone():
|
|
logger.debug(f"Video {video['video_id']} already in queue")
|
|
return False
|
|
|
|
# Parse upload date if available (format: YYYYMMDD)
|
|
upload_date = None
|
|
if video.get('upload_date'):
|
|
try:
|
|
upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
|
|
except (ValueError, TypeError):
|
|
pass # Skip invalid date formats
|
|
|
|
# Insert into queue with all metadata fields matching celebrity discovery
|
|
cursor.execute('''
|
|
INSERT INTO video_download_queue
|
|
(platform, video_id, url, title, channel_name, thumbnail, duration,
|
|
upload_date, view_count, max_resolution, max_width, description, source_type,
|
|
source_name, priority, status, metadata)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
'youtube',
|
|
video['video_id'],
|
|
video['url'],
|
|
video['title'],
|
|
video.get('channel_name', channel.get('channel_name', '')),
|
|
video.get('thumbnail', ''),
|
|
video.get('duration', 0),
|
|
upload_date,
|
|
video.get('view_count', 0),
|
|
video.get('max_resolution'), # Now included from full metadata
|
|
video.get('max_width'), # Video width for aspect ratio
|
|
video.get('description', '')[:500] if video.get('description') else None,
|
|
'youtube_monitor',
|
|
f"Monitor: {channel.get('channel_name', channel['channel_url'])}",
|
|
5, # Default priority
|
|
'pending',
|
|
json.dumps({
|
|
'channel_id': channel['id'],
|
|
'quality': quality,
|
|
'output_path': self.default_output_path,
|
|
'matched_from': 'youtube_channel_monitor'
|
|
})
|
|
))
|
|
conn.commit()
|
|
logger.info(f"Added video '{video['title'][:50]}' to download queue (res: {video.get('max_resolution', 'N/A')}p)")
|
|
return True
|
|
except sqlite3.IntegrityError:
|
|
logger.debug(f"Video {video['video_id']} already exists in queue (integrity error)")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to add video to queue: {e}")
|
|
return False
|
|
finally:
|
|
conn.close()
|
|
|
|
def _get_or_create_monitor_preset(self, celebrity_id: int, channel_name: str) -> int:
|
|
"""Get or create a preset for YouTube Monitor videos."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
preset_name = f"YT Monitor: {channel_name[:30]}"
|
|
|
|
# Check if preset exists
|
|
cursor.execute('''
|
|
SELECT id FROM celebrity_search_presets
|
|
WHERE celebrity_id = ? AND name = ?
|
|
''', (celebrity_id, preset_name))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
|
|
# Create new preset
|
|
cursor.execute('''
|
|
INSERT INTO celebrity_search_presets
|
|
(name, celebrity_id, source_type, source_value, platform, enabled, category)
|
|
VALUES (?, ?, 'youtube_monitor', ?, 'youtube', 1, 'youtube_monitor')
|
|
''', (preset_name, celebrity_id, channel_name))
|
|
conn.commit()
|
|
return cursor.lastrowid
|
|
finally:
|
|
conn.close()
|
|
|
|
def _find_celebrity_by_phrase(self, phrase: str) -> Optional[int]:
|
|
"""Find a celebrity ID that matches the phrase (by name)."""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
# Try exact match first
|
|
cursor.execute('''
|
|
SELECT id FROM celebrity_profiles
|
|
WHERE LOWER(name) = LOWER(?)
|
|
''', (phrase,))
|
|
row = cursor.fetchone()
|
|
if row:
|
|
return row[0]
|
|
|
|
# Try partial match
|
|
cursor.execute('''
|
|
SELECT id FROM celebrity_profiles
|
|
WHERE LOWER(name) LIKE LOWER(?)
|
|
''', (f'%{phrase}%',))
|
|
row = cursor.fetchone()
|
|
return row[0] if row else None
|
|
finally:
|
|
conn.close()
|
|
|
|
def _add_to_discovery(self, video: Dict, channel: Dict, matched_phrase: str) -> bool:
|
|
"""
|
|
Add a matching video to the celebrity discovery page.
|
|
|
|
Args:
|
|
video: Video metadata dictionary
|
|
channel: Channel dictionary
|
|
matched_phrase: The phrase that matched (used to find celebrity)
|
|
|
|
Returns:
|
|
True if successfully added
|
|
"""
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
|
|
# Find celebrity by phrase
|
|
celebrity_id = self._find_celebrity_by_phrase(matched_phrase)
|
|
if not celebrity_id:
|
|
logger.warning(f"No celebrity found for phrase '{matched_phrase}' - skipping")
|
|
return False
|
|
|
|
# Get or create preset for this channel
|
|
channel_name = channel.get('channel_name', channel['channel_url'].split('@')[-1])
|
|
preset_id = self._get_or_create_monitor_preset(celebrity_id, channel_name)
|
|
|
|
# Check if video already exists in discovery
|
|
cursor.execute('''
|
|
SELECT 1 FROM celebrity_discovered_videos
|
|
WHERE video_id = ? AND platform = 'youtube'
|
|
''', (video['video_id'],))
|
|
|
|
if cursor.fetchone():
|
|
logger.debug(f"Video {video['video_id']} already in discovery")
|
|
return False
|
|
|
|
# Parse upload date if available (format: YYYYMMDD)
|
|
upload_date = None
|
|
if video.get('upload_date'):
|
|
try:
|
|
upload_date = datetime.strptime(video['upload_date'], '%Y%m%d').isoformat()
|
|
except (ValueError, TypeError):
|
|
pass # Skip invalid date formats
|
|
|
|
# Insert into celebrity_discovered_videos
|
|
cursor.execute('''
|
|
INSERT INTO celebrity_discovered_videos
|
|
(preset_id, celebrity_id, video_id, platform, url, title, channel_name,
|
|
channel_id, thumbnail, duration, upload_date, view_count, description,
|
|
content_type, status, max_resolution, max_width, metadata)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
preset_id,
|
|
celebrity_id,
|
|
video['video_id'],
|
|
'youtube',
|
|
video['url'],
|
|
video['title'],
|
|
video.get('channel_name', channel.get('channel_name', '')),
|
|
video.get('channel_id', ''),
|
|
video.get('thumbnail', ''),
|
|
video.get('duration', 0),
|
|
upload_date,
|
|
video.get('view_count', 0),
|
|
video.get('description', '')[:500] if video.get('description') else None,
|
|
'youtube_monitor',
|
|
'new',
|
|
video.get('max_resolution'),
|
|
video.get('max_width'),
|
|
json.dumps({
|
|
'monitor_channel_id': channel['id'],
|
|
'monitor_channel_name': channel_name,
|
|
'matched_phrase': matched_phrase
|
|
})
|
|
))
|
|
conn.commit()
|
|
|
|
# Pre-cache thumbnail for faster page loading
|
|
thumbnail_url = video.get('thumbnail', '')
|
|
if thumbnail_url:
|
|
self._cache_thumbnail(video['video_id'], thumbnail_url, cursor, conn)
|
|
|
|
# Update monitor's channel_id if not set (for accurate future matching)
|
|
if video.get('channel_id'):
|
|
cursor.execute('''
|
|
UPDATE youtube_channel_monitors
|
|
SET channel_id = ?
|
|
WHERE id = ? AND (channel_id IS NULL OR channel_id = '')
|
|
''', (video['channel_id'], channel['id']))
|
|
conn.commit()
|
|
|
|
logger.info(f"Added video '{video['title'][:50]}' to discovery (res: {video.get('max_resolution', 'N/A')}p)")
|
|
return True
|
|
except sqlite3.IntegrityError:
|
|
logger.debug(f"Video {video['video_id']} already exists in discovery (integrity error)")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"Failed to add video to discovery: {e}")
|
|
return False
|
|
finally:
|
|
conn.close()
|
|
|
|
def _cache_thumbnail(self, video_id: str, thumbnail_url: str, cursor, conn) -> None:
|
|
"""
|
|
Pre-cache thumbnail by fetching from URL and storing in database.
|
|
This speeds up Internet Discovery page loading.
|
|
"""
|
|
try:
|
|
import requests
|
|
response = requests.get(thumbnail_url, timeout=10, headers={
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
})
|
|
if response.status_code == 200 and response.content:
|
|
cursor.execute('''
|
|
UPDATE celebrity_discovered_videos
|
|
SET thumbnail_data = ?
|
|
WHERE video_id = ?
|
|
''', (response.content, video_id))
|
|
conn.commit()
|
|
logger.debug(f"Cached thumbnail for {video_id}")
|
|
except Exception as e:
|
|
logger.debug(f"Failed to cache thumbnail for {video_id}: {e}")
|
|
|
|
# =========================================================================
|
|
# MAIN CHECK METHODS
|
|
# =========================================================================
|
|
|
|
async def check_channel(self, channel: Dict, phrases: List[str], quality: str) -> int:
|
|
"""
|
|
Check a single channel for matching videos by searching for each phrase.
|
|
|
|
Args:
|
|
channel: Channel dictionary
|
|
phrases: Global phrases to search for
|
|
quality: Global quality setting
|
|
|
|
Returns:
|
|
Number of new videos added to Internet Discovery
|
|
"""
|
|
channel_name = channel.get('channel_name') or channel['channel_url']
|
|
|
|
if not phrases:
|
|
logger.warning(f"No global phrases configured - skipping check")
|
|
return 0
|
|
|
|
logger.info(f"Checking channel: {channel_name}")
|
|
|
|
videos_added = 0
|
|
seen_video_ids = set()
|
|
most_recent_upload = None # Track most recent video upload date
|
|
|
|
# First, get the channel's latest videos (no search) to track last upload date
|
|
# This is needed for auto-pause logic even if no videos match the search phrases
|
|
try:
|
|
# Get just the most recent video with full metadata to get upload_date
|
|
most_recent_upload = await self._get_channel_latest_upload_date(channel['channel_url'])
|
|
if most_recent_upload:
|
|
logger.debug(f"Channel latest upload: {most_recent_upload}")
|
|
except Exception as e:
|
|
logger.debug(f"Could not fetch latest upload date for {channel_name}: {e}")
|
|
|
|
# Search for each phrase on the channel
|
|
settings = self.get_global_settings()
|
|
max_results = settings.get('max_results_per_phrase', 100)
|
|
|
|
for phrase in phrases:
|
|
# Search the channel for this phrase
|
|
videos = await self.get_channel_videos(channel['channel_url'], max_results=max_results, search_phrase=phrase)
|
|
|
|
logger.debug(f"Found {len(videos)} videos searching for '{phrase}' on {channel_name}")
|
|
|
|
for video in videos:
|
|
video_id = video.get('video_id')
|
|
if not video_id:
|
|
continue
|
|
|
|
# Track most recent upload date
|
|
upload_date = video.get('upload_date')
|
|
if upload_date:
|
|
if not most_recent_upload or upload_date > most_recent_upload:
|
|
most_recent_upload = upload_date
|
|
|
|
# Skip duplicates within this check (same video found by multiple phrases)
|
|
if video_id in seen_video_ids:
|
|
continue
|
|
seen_video_ids.add(video_id)
|
|
|
|
# Skip if already processed
|
|
if self._is_video_processed(channel['id'], video_id):
|
|
continue
|
|
|
|
# Verify the phrase actually appears in title (not description)
|
|
# YouTube search can return related/recommended content
|
|
matched_phrase = self._matches_phrase(
|
|
video.get('title', ''),
|
|
'', # Only match on title, not description
|
|
[phrase]
|
|
)
|
|
|
|
if matched_phrase:
|
|
# Fetch full metadata for matching video (includes resolution, thumbnail, etc.)
|
|
logger.info(f"Matched phrase '{matched_phrase}' - fetching full metadata for: {video['title'][:60]}")
|
|
full_video = await self.fetch_video_metadata(video_id)
|
|
|
|
if full_video:
|
|
# IMPORTANT: Verify the video is actually from the monitored channel
|
|
# YouTube's channel search can return videos from other channels
|
|
video_channel_name = (full_video.get('channel_name') or '').lower().strip()
|
|
monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
|
|
video_channel_id = (full_video.get('channel_id') or '').lower().strip()
|
|
|
|
# Check if channel matches (by name or by channel ID in URL)
|
|
channel_url_lower = (channel.get('channel_url') or '').lower()
|
|
channel_matches = (
|
|
video_channel_name == monitored_channel_name or
|
|
(video_channel_id and video_channel_id in channel_url_lower) or
|
|
(video_channel_name and video_channel_name in channel_url_lower)
|
|
)
|
|
|
|
if not channel_matches:
|
|
logger.debug(f"Skipping video from different channel: '{full_video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
full_video.get('title', video.get('title', '')),
|
|
matched_phrase, 'wrong_channel'
|
|
)
|
|
continue
|
|
|
|
# Use full metadata - add to discovery page
|
|
if self._add_to_discovery(full_video, channel, matched_phrase):
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
full_video.get('title', video.get('title', '')),
|
|
matched_phrase, 'discovered'
|
|
)
|
|
videos_added += 1
|
|
else:
|
|
# Already in discovery or failed
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
full_video.get('title', video.get('title', '')),
|
|
matched_phrase, 'skipped'
|
|
)
|
|
else:
|
|
# Fallback to basic info if full metadata fetch fails
|
|
logger.warning(f"Could not fetch full metadata for {video_id}, using basic info")
|
|
|
|
# Still verify channel matches using basic info
|
|
video_channel_name = (video.get('channel_name') or '').lower().strip()
|
|
monitored_channel_name = (channel.get('channel_name') or '').lower().strip()
|
|
channel_url_lower = (channel.get('channel_url') or '').lower()
|
|
|
|
channel_matches = (
|
|
video_channel_name == monitored_channel_name or
|
|
(video_channel_name and video_channel_name in channel_url_lower)
|
|
)
|
|
|
|
if not channel_matches:
|
|
logger.debug(f"Skipping video from different channel: '{video.get('channel_name')}' (expected '{channel.get('channel_name')}')")
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
video.get('title', ''), matched_phrase, 'wrong_channel'
|
|
)
|
|
continue
|
|
|
|
if self._add_to_discovery(video, channel, matched_phrase):
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
video.get('title', ''), matched_phrase, 'discovered'
|
|
)
|
|
videos_added += 1
|
|
else:
|
|
self._record_video_processed(
|
|
channel['id'], video_id,
|
|
video.get('title', ''), matched_phrase, 'skipped'
|
|
)
|
|
|
|
# Small delay between metadata fetches to avoid rate limiting
|
|
await asyncio.sleep(1)
|
|
|
|
# Small delay between phrase searches
|
|
if len(phrases) > 1:
|
|
await asyncio.sleep(2)
|
|
|
|
# Update channel stats
|
|
self._update_channel_stats(channel['id'], videos_added, most_recent_upload)
|
|
|
|
# Check if this channel should be auto-paused due to inactivity
|
|
await self._check_channel_for_auto_pause(channel['id'])
|
|
|
|
return videos_added
|
|
|
|
async def check_single_channel(self, channel_id: int) -> int:
|
|
"""
|
|
Check a single channel by ID (for manual trigger).
|
|
|
|
Args:
|
|
channel_id: ID of the channel to check
|
|
|
|
Returns:
|
|
Number of new videos added to Internet Discovery
|
|
"""
|
|
channel = self.get_channel(channel_id)
|
|
if not channel:
|
|
logger.error(f"Channel {channel_id} not found")
|
|
return 0
|
|
|
|
settings = self.get_global_settings()
|
|
phrases = settings.get('phrases', [])
|
|
quality = settings.get('quality', 'best')
|
|
|
|
if not phrases:
|
|
logger.warning(f"No global phrases configured")
|
|
return 0
|
|
|
|
videos_added = await self.check_channel(channel, phrases, quality)
|
|
|
|
# Auto-start queue if enabled and videos were added
|
|
if videos_added > 0 and settings.get('auto_start_queue'):
|
|
await self._trigger_queue_start()
|
|
|
|
return videos_added
|
|
|
|
async def run_check_cycle(self) -> int:
|
|
"""
|
|
Main entry point - check all enabled channels using global settings.
|
|
|
|
Returns:
|
|
Total number of new videos added to Internet Discovery
|
|
"""
|
|
settings = self.get_global_settings()
|
|
|
|
if not settings.get('enabled'):
|
|
logger.debug("YouTube channel monitoring is disabled globally")
|
|
return 0
|
|
|
|
phrases = settings.get('phrases', [])
|
|
quality = settings.get('quality', 'best')
|
|
|
|
if not phrases:
|
|
logger.debug("No global phrases configured for YouTube monitoring")
|
|
return 0
|
|
|
|
channels = self.get_active_channels()
|
|
|
|
if not channels:
|
|
logger.debug("No active YouTube channels to monitor")
|
|
return 0
|
|
|
|
total_added = 0
|
|
total_channels = len(channels)
|
|
|
|
logger.info(f"Running YouTube channel monitor: {total_channels} channels, phrases: {phrases}")
|
|
|
|
# Start background task tracking (separate from main scheduler activity)
|
|
if self.activity_manager:
|
|
self.activity_manager.start_background_task(
|
|
'youtube_monitor',
|
|
'youtube_channel_monitor',
|
|
'YouTube Channel Monitor',
|
|
'Running',
|
|
{'total_channels': total_channels, 'videos_found': 0}
|
|
)
|
|
|
|
# Randomize order to avoid detection patterns
|
|
random.shuffle(channels)
|
|
|
|
for idx, channel in enumerate(channels, 1):
|
|
try:
|
|
channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')
|
|
|
|
# Update background task status
|
|
if self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Checking: {channel_name}',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
|
|
)
|
|
|
|
videos_added = await self.check_channel(channel, phrases, quality)
|
|
|
|
# Update status if we found new videos
|
|
if videos_added > 0 and self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Found {videos_added} new in {channel_name}',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
|
|
)
|
|
|
|
total_added += videos_added
|
|
|
|
# Delay between channel checks (with jitter to avoid detection)
|
|
base_delay = 4 + random.uniform(0, 2) # 4-6 seconds
|
|
await asyncio.sleep(base_delay)
|
|
|
|
# Batch pause every 50 channels to reduce rate limiting
|
|
if idx % 50 == 0 and idx < total_channels:
|
|
logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
|
|
if self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Rate limit pause ({idx}/{total_channels})',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
|
|
)
|
|
await asyncio.sleep(30)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking channel {channel['id']}: {e}")
|
|
|
|
# Update global last_checked
|
|
self._update_last_checked()
|
|
|
|
# Note: Auto-pause now happens per-channel in check_channel() for real-time feedback
|
|
|
|
# Stop background task tracking
|
|
if self.activity_manager:
|
|
self.activity_manager.stop_background_task('youtube_monitor')
|
|
|
|
if total_added > 0:
|
|
logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")
|
|
|
|
# Send notification if enabled
|
|
if settings.get('notifications_enabled'):
|
|
self._send_notification(total_added)
|
|
|
|
# Auto-start queue if enabled
|
|
if settings.get('auto_start_queue'):
|
|
await self._trigger_queue_start()
|
|
else:
|
|
logger.debug("YouTube channel monitor complete: no new matching videos")
|
|
|
|
return total_added
|
|
|
|
async def _trigger_queue_start(self):
|
|
"""Trigger the video download queue to start processing."""
|
|
try:
|
|
# Try direct access first (works when running within API process)
|
|
from web.backend.routers.video_queue import queue_processor, get_app_state
|
|
import asyncio
|
|
|
|
if queue_processor.is_running and not queue_processor.is_paused:
|
|
logger.info("Auto-start: Queue processor already running")
|
|
return
|
|
|
|
if queue_processor.is_paused:
|
|
queue_processor.resume()
|
|
logger.info("Auto-start: Queue processor resumed")
|
|
return
|
|
|
|
app_state = get_app_state()
|
|
|
|
# Check if app_state.db is available (may be None when running from scheduler)
|
|
if app_state is None or app_state.db is None:
|
|
logger.debug("Auto-start: app_state.db not available, skipping")
|
|
return
|
|
|
|
# Check if there are pending items
|
|
with app_state.db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT COUNT(*) FROM video_download_queue WHERE status = 'pending'")
|
|
pending_count = cursor.fetchone()[0]
|
|
|
|
if pending_count == 0:
|
|
logger.debug("Auto-start: No pending items in queue")
|
|
return
|
|
|
|
# Start the processor
|
|
queue_processor.start(app_state.db)
|
|
logger.info(f"Auto-started download queue: {pending_count} pending items")
|
|
|
|
except ImportError:
|
|
# Running from scheduler process - fall back to HTTP (won't work without auth)
|
|
logger.debug("Auto-start: Running outside API process, queue must be started manually")
|
|
except Exception as e:
|
|
logger.warning(f"Could not auto-start download queue: {e}")
|
|
|
|
def _send_notification(self, videos_added: int):
|
|
"""
|
|
Send a Pushover notification about new videos added.
|
|
|
|
Args:
|
|
videos_added: Number of videos added to Internet Discovery
|
|
"""
|
|
try:
|
|
import random
|
|
from modules.pushover_notifier import PushoverNotifier
|
|
from modules.settings_manager import SettingsManager
|
|
from modules.unified_database import UnifiedDatabase
|
|
|
|
# Get pushover config from settings
|
|
settings_manager = SettingsManager(self.db_path)
|
|
pushover_config = settings_manager.get('pushover', {})
|
|
|
|
if not pushover_config.get('enabled'):
|
|
logger.debug("Pushover notifications disabled globally")
|
|
return
|
|
|
|
# Create unified_db for recording notification to database
|
|
unified_db = UnifiedDatabase(self.db_path)
|
|
|
|
# Create notifier with unified_db so notification is recorded
|
|
notifier = PushoverNotifier(
|
|
api_token=pushover_config.get('api_token'),
|
|
user_key=pushover_config.get('user_key'),
|
|
unified_db=unified_db
|
|
)
|
|
|
|
# Get thumbnail and channel summary from videos just added to Internet Discovery
|
|
image_path = None
|
|
channel_summary = ""
|
|
channel_list = []
|
|
conn = self._get_connection()
|
|
try:
|
|
cursor = conn.cursor()
|
|
# Get only the videos from this batch (most recent N videos from celebrity_discovered_videos)
|
|
cursor.execute('''
|
|
SELECT thumbnail, title, channel_name FROM celebrity_discovered_videos
|
|
WHERE content_type = 'youtube_monitor' AND thumbnail IS NOT NULL AND thumbnail != ''
|
|
ORDER BY discovered_at DESC
|
|
LIMIT ?
|
|
''', (videos_added,))
|
|
rows = cursor.fetchall()
|
|
if rows:
|
|
# Pick a random thumbnail from this batch
|
|
selected = random.choice(rows)
|
|
thumbnail_url = selected['thumbnail']
|
|
|
|
# Build channel summary from unique channels in THIS batch only
|
|
uploaders = {}
|
|
for row in rows:
|
|
uploader = row['channel_name'] or 'Unknown'
|
|
uploaders[uploader] = uploaders.get(uploader, 0) + 1
|
|
if uploader not in channel_list:
|
|
channel_list.append(uploader)
|
|
|
|
# Format: "Channel1 (3), Channel2 (2)" - only if count > 1
|
|
channel_parts = [f"{name} ({count})" if count > 1 else name
|
|
for name, count in sorted(uploaders.items(), key=lambda x: -x[1])[:5]]
|
|
if channel_parts:
|
|
channel_summary = "\n\nFrom: " + ", ".join(channel_parts)
|
|
|
|
# Download thumbnail to temp file
|
|
if thumbnail_url:
|
|
import urllib.request
|
|
import tempfile
|
|
try:
|
|
temp_dir = tempfile.gettempdir()
|
|
temp_path = f"{temp_dir}/yt_thumb_{random.randint(1000, 9999)}.jpg"
|
|
urllib.request.urlretrieve(thumbnail_url, temp_path)
|
|
image_path = temp_path
|
|
logger.debug(f"Downloaded thumbnail for notification: {temp_path}")
|
|
except Exception as e:
|
|
logger.debug(f"Could not download thumbnail: {e}")
|
|
finally:
|
|
conn.close()
|
|
|
|
# Build message
|
|
title = "YouTube Monitor"
|
|
message = f"Added {videos_added} new video{'s' if videos_added > 1 else ''} to Internet Discovery{channel_summary}"
|
|
|
|
# Set notification context for database recording
|
|
notifier._current_notification_context = {
|
|
'platform': 'youtube',
|
|
'source': 'youtube_monitor',
|
|
'content_type': 'video',
|
|
'download_count': videos_added,
|
|
'metadata': {'channels': channel_list}
|
|
}
|
|
|
|
# Send notification
|
|
success = notifier.send_notification(
|
|
title=title,
|
|
message=message,
|
|
priority=0, # Normal priority
|
|
image_path=image_path
|
|
)
|
|
|
|
# Clean up temp file
|
|
if image_path:
|
|
try:
|
|
import os
|
|
os.unlink(image_path)
|
|
except OSError:
|
|
pass # Best effort cleanup of temp file
|
|
|
|
if success:
|
|
logger.info(f"Sent notification: {videos_added} videos added")
|
|
else:
|
|
logger.debug("Notification not sent (disabled or failed)")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not send notification: {e}")
|
|
|
|
async def check_all_now(self, from_scheduler: bool = False) -> int:
|
|
"""
|
|
Force check all channels immediately (ignoring interval).
|
|
|
|
Args:
|
|
from_scheduler: If True, send push notifications (scheduler runs only)
|
|
|
|
Returns:
|
|
Total number of new videos added to Internet Discovery
|
|
"""
|
|
settings = self.get_global_settings()
|
|
phrases = settings.get('phrases', [])
|
|
quality = settings.get('quality', 'best')
|
|
|
|
if not phrases:
|
|
logger.warning("No global phrases configured")
|
|
return 0
|
|
|
|
channels = self.get_enabled_channels()
|
|
|
|
if not channels:
|
|
logger.warning("No enabled YouTube channels to monitor")
|
|
return 0
|
|
|
|
total_added = 0
|
|
total_channels = len(channels)
|
|
|
|
logger.info(f"Force checking all YouTube channels: {total_channels} channels")
|
|
|
|
# Crash recovery checkpoint
|
|
from modules.task_checkpoint import TaskCheckpoint
|
|
checkpoint = TaskCheckpoint('youtube_channel_monitor', 'background')
|
|
checkpoint.start(total_items=total_channels)
|
|
if checkpoint.is_recovering():
|
|
logger.info(f"YouTube monitor: recovering — skipping already-checked channels")
|
|
|
|
# Start background task tracking (separate from main scheduler activity)
|
|
if self.activity_manager:
|
|
self.activity_manager.start_background_task(
|
|
'youtube_monitor',
|
|
'youtube_channel_monitor',
|
|
'YouTube Channel Monitor',
|
|
'Running',
|
|
{'total_channels': total_channels, 'videos_found': 0}
|
|
)
|
|
|
|
# Randomize order to avoid detection patterns
|
|
random.shuffle(channels)
|
|
|
|
for idx, channel in enumerate(channels, 1):
|
|
try:
|
|
channel_id = str(channel.get('id', ''))
|
|
channel_name = channel.get('channel_name') or channel['channel_url'].replace('https://www.youtube.com/@', '@')
|
|
|
|
if checkpoint.is_completed(channel_id):
|
|
continue
|
|
|
|
checkpoint.set_current(channel_id)
|
|
|
|
if self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Checking: {channel_name}',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added, 'current_channel': channel_name}
|
|
)
|
|
|
|
videos_added = await self.check_channel(channel, phrases, quality)
|
|
|
|
# Update status if we found new videos
|
|
if videos_added > 0 and self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Found {videos_added} new in {channel_name}',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added + videos_added, 'current_channel': channel_name, 'last_found': videos_added}
|
|
)
|
|
|
|
total_added += videos_added
|
|
|
|
checkpoint.mark_completed(channel_id)
|
|
|
|
# Delay between channel checks (with jitter to avoid detection)
|
|
base_delay = 4 + random.uniform(0, 2) # 4-6 seconds
|
|
await asyncio.sleep(base_delay)
|
|
|
|
# Batch pause every 50 channels to reduce rate limiting
|
|
if idx % 50 == 0 and idx < total_channels:
|
|
logger.info(f"YouTube monitor: Pausing 30s after {idx} channels to avoid rate limiting")
|
|
if self.activity_manager:
|
|
self.activity_manager.update_background_task(
|
|
'youtube_monitor',
|
|
f'Rate limit pause ({idx}/{total_channels})',
|
|
idx, total_channels,
|
|
{'total_channels': total_channels, 'videos_found': total_added, 'status': 'pausing'}
|
|
)
|
|
await asyncio.sleep(30)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking channel {channel['id']}: {e}")
|
|
|
|
# Update global last_checked
|
|
self._update_last_checked()
|
|
|
|
# Checkpoint complete
|
|
checkpoint.finish()
|
|
|
|
# Stop background task tracking
|
|
if self.activity_manager:
|
|
self.activity_manager.stop_background_task('youtube_monitor')
|
|
|
|
if total_added > 0:
|
|
logger.info(f"YouTube channel monitor complete: added {total_added} videos to queue")
|
|
|
|
# Send notification if enabled (only for scheduler runs)
|
|
if from_scheduler and settings.get('notifications_enabled'):
|
|
self._send_notification(total_added)
|
|
|
|
# Auto-start the download queue if configured
|
|
if settings.get('auto_start_queue'):
|
|
await self._trigger_queue_start()
|
|
else:
|
|
logger.debug("YouTube channel monitor complete: no new matching videos")
|
|
|
|
return total_added
|
|
|
|
def run_sync(self) -> int:
|
|
"""
|
|
Synchronous wrapper for run_check_cycle.
|
|
Used by scheduler which expects synchronous callbacks.
|
|
|
|
Returns:
|
|
Total number of new videos added to Internet Discovery
|
|
"""
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
except RuntimeError:
|
|
loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(loop)
|
|
|
|
return loop.run_until_complete(self.run_check_cycle())
|
|
|
|
|
|
# Convenience function for external use
|
|
def create_youtube_monitor(db_path: str, activity_manager=None) -> YouTubeChannelMonitor:
|
|
"""Create a YouTubeChannelMonitor instance."""
|
|
return YouTubeChannelMonitor(db_path, activity_manager)
|