Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,194 @@
#!/usr/bin/env python3
"""
Thread-safe WebSocket event emitter for scraper monitoring
Provides real-time events for the scraping monitor page:
- Scraper sessions starting/completing
- File downloads and movements
- Progress updates
"""
from datetime import datetime
from typing import Optional, Dict, Any
class ScraperEventEmitter:
"""Emits WebSocket events for real-time scraper monitoring"""
def __init__(self, websocket_manager=None, app_state=None):
"""
Initialize event emitter
Args:
websocket_manager: WebSocket connection manager (optional)
app_state: Application state for tracking active sessions (optional)
"""
self.websocket_manager = websocket_manager
self.app_state = app_state
def emit_scraper_started(self, session_id: str, platform: str, account: str,
content_type: str, estimated_count: int = 0, accounts_list: list = None):
"""
Emit when scraper session begins
Args:
session_id: Unique session identifier
platform: Platform name (instagram, snapchat, etc.)
account: Account/username being scraped (or comma-separated list)
content_type: Type of content (stories, posts, etc.)
estimated_count: Estimated number of items to download
accounts_list: Optional list of all accounts to be processed
"""
event_data = {
'session_id': session_id,
'platform': platform,
'account': account,
'content_type': content_type,
'estimated_count': estimated_count,
'timestamp': datetime.now().isoformat()
}
# Include accounts list if provided
if accounts_list:
event_data['accounts_list'] = accounts_list
# Store session in app_state for API retrieval
# Match the scheduler's data structure exactly
if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
self.app_state.active_scraper_sessions[session_id] = {
'session_id': session_id,
'platform': platform,
'account': account,
'content_type': content_type,
'start_time': datetime.now().isoformat(),
'status': 'Starting...',
'detailed_status': 'Starting...',
'progress': {
'current': 0,
'total': estimated_count or 100
},
'stats': {'media': 0, 'review': 0, 'failed': 0}
}
self._broadcast({
'type': 'scraper_started',
'data': event_data
})
def emit_scraper_progress(self, session_id: str, status: str,
current: int, total: int, current_account: str = None,
completed_accounts: list = None):
"""
Emit progress update
Args:
session_id: Session identifier
status: Status message (e.g., "Downloading stories...")
current: Current item count
total: Total item count
current_account: Currently active account/forum name (optional)
completed_accounts: List of completed accounts (optional)
"""
event_data = {
'session_id': session_id,
'status': status,
'progress_current': current,
'progress_total': total,
'timestamp': datetime.now().isoformat()
}
# Include current account if provided
if current_account:
event_data['current_account'] = current_account
# Include completed accounts if provided
if completed_accounts:
event_data['completed_accounts'] = completed_accounts
# Update session in app_state - match scheduler structure
if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
if session_id in self.app_state.active_scraper_sessions:
session = self.app_state.active_scraper_sessions[session_id]
session['status'] = status
session['detailed_status'] = status
# Update account to current account if provided
if current_account:
session['account'] = current_account
# Use nested progress structure to match scheduler
session['progress'] = {
'current': current,
'total': total
}
if completed_accounts:
session['completed_accounts'] = completed_accounts
self._broadcast({
'type': 'scraper_progress',
'data': event_data
})
def emit_scraper_completed(self, session_id: str, stats: Dict[str, int]):
"""
Emit when scraper session completes
Args:
session_id: Session identifier
stats: Statistics dict with keys: total_downloaded, moved, review, duplicates, failed
"""
# Remove session from app_state
if self.app_state and hasattr(self.app_state, 'active_scraper_sessions'):
self.app_state.active_scraper_sessions.pop(session_id, None)
self._broadcast({
'type': 'scraper_completed',
'data': {
'session_id': session_id,
'stats': stats,
'timestamp': datetime.now().isoformat()
}
})
def emit_file_moved(self, session_id: str, platform: str, account: str,
filename: str, media_type: str, destination_type: str,
destination_path: str, thumbnail_url: str = None,
face_match: Dict[str, Any] = None):
"""
Emit when file is moved to destination
Args:
session_id: Session identifier
platform: Platform name
account: Account/username
filename: File name
media_type: 'image' or 'video'
destination_type: 'media', 'review', or 'recycle'
destination_path: Full path to destination file
thumbnail_url: URL to thumbnail (optional)
face_match: Face recognition result dict (optional)
"""
self._broadcast({
'type': 'file_moved',
'data': {
'session_id': session_id,
'platform': platform,
'account': account,
'filename': filename,
'media_type': media_type,
'destination_type': destination_type,
'destination_path': destination_path,
'thumbnail_url': thumbnail_url,
'face_match': face_match or {'matched': False},
'timestamp': datetime.now().isoformat()
}
})
def _broadcast(self, message: dict):
"""
Thread-safe broadcast to WebSocket clients
Args:
message: Event message dict
"""
if self.websocket_manager:
# Use broadcast_sync for thread-safe emission from background threads
self.websocket_manager.broadcast_sync(message)