#!/usr/bin/env python3 """ Forum Database Adapter for Unified Database Provides compatibility layer for forum_downloader to use UnifiedDatabase """ import sqlite3 import json import hashlib from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Any import time from modules.universal_logger import get_logger logger = get_logger('ForumAdapter') class ForumDatabaseAdapter: """ Adapter to allow forum_downloader to use UnifiedDatabase Mimics the original forum database interface """ def __init__(self, unified_db, db_path=None): """ Initialize the adapter Args: unified_db: UnifiedDatabase instance db_path: Ignored - kept for compatibility """ self.unified_db = unified_db self.db_path = db_path # Keep for compatibility but not used def get_file_hash(self, file_path: str) -> Optional[str]: """Calculate SHA256 hash of a file (delegates to UnifiedDatabase)""" from modules.unified_database import UnifiedDatabase return UnifiedDatabase.get_file_hash(file_path) def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]: """Get download record by file hash (delegates to UnifiedDatabase)""" return self.unified_db.get_download_by_file_hash(file_hash) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): pass def _get_connection(self): """Get a connection from unified database""" return self.unified_db.get_connection(for_write=True) def _execute_with_retry(self, operation, retries: int = 3, for_write: bool = False): """ Execute a database operation with retry logic for lock/deadlock errors. Args: operation: A callable that takes a connection and returns a result retries: Number of retry attempts for_write: Whether this is a write operation Returns: The result of the operation Raises: sqlite3.OperationalError: If operation fails after all retries """ for attempt in range(retries): try: with self.unified_db.get_connection(for_write=for_write) as conn: return operation(conn) except sqlite3.OperationalError as e: if ("locked" in str(e) or "deadlock" in str(e).lower()) and attempt < retries - 1: delay = 1 + attempt * 2 # Exponential backoff logger.debug(f"Database locked, retrying in {delay} seconds...") time.sleep(delay) continue else: logger.error(f"Database operation failed after {attempt + 1} attempts: {e}") raise # This point should never be reached due to the raise above, # but raise explicitly to satisfy type checkers raise sqlite3.OperationalError("Database operation failed after all retries") def db_add_thread(self, thread_id: str, forum_name: str, thread_url: str, thread_title: str = None, monitor_until: datetime = None) -> bool: """Add a forum thread to tracking""" def operation(conn): cursor = conn.cursor() cursor.execute(''' INSERT OR IGNORE INTO forum_threads (thread_id, forum_name, thread_url, thread_title, created_date, last_checked, status, monitor_until) VALUES (?, ?, ?, ?, ?, ?, 'active', ?) ''', (thread_id, forum_name, thread_url, thread_title, datetime.now(), datetime.now(), monitor_until)) conn.commit() return cursor.rowcount > 0 try: return self._execute_with_retry(operation, for_write=True) except Exception as e: logger.error(f"Error adding thread: {e}") return False def db_update_thread(self, thread_id: str, last_post_date: datetime = None, post_count: int = None) -> bool: """Update thread information""" # Build updates list outside the operation for clarity updates = ["last_checked = ?"] params = [datetime.now()] if last_post_date: updates.append("last_post_date = ?") params.append(last_post_date) if post_count is not None: updates.append("post_count = ?") params.append(post_count) params.append(thread_id) # Pre-build the SQL query to avoid f-string inside operation sql = f'UPDATE forum_threads SET {", ".join(updates)} WHERE thread_id = ?' def operation(conn): cursor = conn.cursor() cursor.execute(sql, params) conn.commit() return cursor.rowcount > 0 try: return self._execute_with_retry(operation, for_write=True) except Exception as e: logger.error(f"Error updating thread {thread_id}: {e}") return False def db_update_thread_last_checked(self, thread_id: str) -> bool: """Update the last_checked timestamp for a forum thread""" def operation(conn): cursor = conn.cursor() cursor.execute(''' UPDATE forum_threads SET last_checked = ? WHERE thread_id = ? ''', (datetime.now(), thread_id)) conn.commit() return cursor.rowcount > 0 try: return self._execute_with_retry(operation, for_write=True) except Exception as e: logger.error(f"Error updating last_checked for thread {thread_id}: {e}") return False def db_get_thread(self, thread_id: str) -> Optional[Dict]: """Get thread information""" def operation(conn): cursor = conn.cursor() cursor.execute( "SELECT * FROM forum_threads WHERE thread_id = ?", (thread_id,) ) row = cursor.fetchone() return dict(row) if row else None try: return self._execute_with_retry(operation, for_write=False) except Exception as e: logger.error(f"Error getting thread {thread_id}: {e}") return None def db_add_post(self, post_id: str, thread_id: str, post_url: str = None, author: str = None, post_date: datetime = None, has_images: bool = False) -> bool: """Add a forum post""" with self._get_connection() as conn: cursor = conn.cursor() try: content_hash = hashlib.sha256(f"{thread_id}:{post_id}".encode()).hexdigest() cursor.execute(''' INSERT INTO forum_posts (post_id, thread_id, post_url, author, post_date, content_hash, has_images) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT (post_id) DO UPDATE SET thread_id = EXCLUDED.thread_id, post_url = EXCLUDED.post_url, author = EXCLUDED.author, post_date = EXCLUDED.post_date, content_hash = EXCLUDED.content_hash, has_images = EXCLUDED.has_images ''', (post_id, thread_id, post_url, author, post_date, content_hash, has_images)) conn.commit() return True except Exception as e: logger.error(f"Error adding post: {e}") return False def db_get_image_id(self, img_url: str) -> Optional[int]: """Check if image already exists in downloads""" url_hash = self.unified_db.get_url_hash(img_url) def operation(conn): cursor = conn.cursor() cursor.execute( "SELECT id FROM downloads WHERE url_hash = ? AND platform = 'forums'", (url_hash,) ) row = cursor.fetchone() return row[0] if row else None try: return self._execute_with_retry(operation, for_write=False) except Exception as e: logger.error(f"Error checking image existence: {e}") return None def db_add_image(self, img_url: str, thread_id: str, post_id: str, filename: str, file_path: str, forum_name: str) -> bool: """Add image to downloads""" metadata = { 'thread_id': thread_id, 'post_id': post_id, 'forum_name': forum_name } return self.unified_db.record_download( url=img_url, platform='forums', source=forum_name, content_type='image', filename=filename, file_path=file_path, metadata=metadata ) def db_search_exists(self, search_id: str) -> bool: """Check if search already exists""" def operation(conn): cursor = conn.cursor() cursor.execute( "SELECT 1 FROM search_monitors WHERE search_id = ?", (search_id,) ) return cursor.fetchone() is not None try: return self._execute_with_retry(operation, for_write=False) except Exception as e: logger.error(f"Error checking search existence: {e}") return False def db_add_search(self, search_id: str, forum_name: str, search_query: str, search_url: str = None, check_frequency_hours: int = 24) -> bool: """Add or update search monitor""" with self._get_connection() as conn: cursor = conn.cursor() try: cursor.execute(''' INSERT OR REPLACE INTO search_monitors (search_id, platform, source, search_query, search_url, last_checked, check_frequency_hours, active) VALUES (?, 'forums', ?, ?, ?, ?, ?, 1) ''', (search_id, forum_name, search_query, search_url, datetime.now(), check_frequency_hours)) conn.commit() return True except Exception as e: logger.error(f"Error adding search: {e}") return False def db_update_search_results(self, search_id: str, results_count: int) -> bool: """Update search results count""" with self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' UPDATE search_monitors SET last_checked = ?, results_found = ? WHERE search_id = ? ''', (datetime.now(), results_count, search_id)) conn.commit() return cursor.rowcount > 0 def add_to_download_queue(self, url: str, referer: str = None, save_path: str = None, thread_id: str = None, post_id: str = None, forum_name: str = None, metadata: Dict = None) -> bool: """Add item to download queue""" # Check if already downloaded if self.unified_db.is_downloaded(url, platform='forums'): return False # Check if already in queue (with retry logic) def check_queue(conn): cursor = conn.cursor() cursor.execute( "SELECT status FROM download_queue WHERE url = ?", (url,) ) return cursor.fetchone() try: existing = self._execute_with_retry(check_queue, for_write=False) if existing: if existing[0] == 'completed': return False # Already downloaded elif existing[0] == 'pending': return False # Already in queue except Exception as e: logger.error(f"Error checking download queue: {e}") return False # Add to queue queue_metadata = metadata or {} queue_metadata.update({ 'thread_id': thread_id, 'post_id': post_id, 'forum_name': forum_name }) with self._get_connection() as conn: cursor = conn.cursor() try: cursor.execute(''' INSERT INTO download_queue (url, platform, source, referer, save_path, status, metadata) VALUES (?, 'forums', ?, ?, ?, 'pending', ?) ''', (url, forum_name, referer, str(save_path) if save_path else None, json.dumps(queue_metadata))) conn.commit() return True except sqlite3.IntegrityError: return False # URL already in queue except Exception as e: logger.error(f"Error adding to queue: {e}") return False def is_in_download_queue(self, url: str) -> bool: """Check if URL is in download queue""" with self.unified_db.get_connection() as conn: cursor = conn.cursor() cursor.execute( "SELECT 1 FROM download_queue WHERE url = ? AND status = 'pending'", (url,) ) return cursor.fetchone() is not None def is_already_downloaded(self, url: str, forum_name: str = None) -> bool: """Check if thread URL is already being tracked""" # For thread URLs, check the forum_threads table import hashlib thread_id = hashlib.sha256(url.encode()).hexdigest() with self.unified_db.get_connection() as conn: cursor = conn.cursor() if forum_name: # Check for specific forum cursor.execute(''' SELECT 1 FROM forum_threads WHERE forum_name = ? AND (thread_url = ? OR thread_id = ?) LIMIT 1 ''', (forum_name, url, thread_id)) else: # Check any forum cursor.execute(''' SELECT 1 FROM forum_threads WHERE thread_url = ? OR thread_id = ? LIMIT 1 ''', (url, thread_id)) return cursor.fetchone() is not None def mark_download_complete(self, url: str, filename: str = None, file_path: str = None) -> bool: """Mark download as complete in queue""" with self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' UPDATE download_queue SET status = 'completed', download_date = ? WHERE url = ? ''', (datetime.now(), url)) conn.commit() return cursor.rowcount > 0 def mark_download_failed(self, url: str, error_message: str = None) -> bool: """Mark download as failed in queue""" with self._get_connection() as conn: cursor = conn.cursor() cursor.execute(''' UPDATE download_queue SET status = 'failed', attempts = attempts + 1, error_message = ? WHERE url = ? ''', (error_message, url)) conn.commit() return cursor.rowcount > 0 def record_download(self, url: str, thread_id: str = None, post_id: str = None, filename: str = None, metadata: Dict = None, file_path: str = None, post_date = None) -> bool: """Record a download in the unified database Args: url: URL of the downloaded content thread_id: Forum thread ID post_id: Forum post ID filename: Name of downloaded file metadata: Additional metadata dict file_path: Full path to downloaded file post_date: Date of the forum post (datetime or None) """ # Extract forum name from metadata if available forum_name = metadata.get('forum_name') if metadata else None # Prepare full metadata full_metadata = metadata or {} if thread_id: full_metadata['thread_id'] = thread_id if post_id: full_metadata['post_id'] = post_id # Calculate file hash if file_path provided file_hash = None if file_path: try: from modules.unified_database import UnifiedDatabase file_hash = UnifiedDatabase.get_file_hash(file_path) except Exception: pass # If hash fails, continue without it # Record in unified database return self.unified_db.record_download( url=url, platform='forums', source=forum_name or 'unknown', content_type='image', filename=filename, file_path=file_path, file_hash=file_hash, post_date=post_date, metadata=full_metadata ) def get_pending_downloads(self, limit: int = 100) -> List[Dict]: """Get pending downloads from queue""" with self.unified_db.get_connection() as conn: cursor = conn.cursor() cursor.execute(''' SELECT * FROM download_queue WHERE platform = 'forums' AND status = 'pending' ORDER BY priority, created_date LIMIT ? ''', (limit,)) return [dict(row) for row in cursor.fetchall()] def cleanup_old_data(self, days: int = 180): """Clean up old data""" with self._get_connection() as conn: cursor = conn.cursor() # Clean old downloads cursor.execute(''' DELETE FROM downloads WHERE platform = 'forums' AND download_date < datetime('now', ? || ' days') AND status = 'completed' ''', (-days,)) # Clean old queue items cursor.execute(''' DELETE FROM download_queue WHERE platform = 'forums' AND created_date < datetime('now', ? || ' days') AND status IN ('completed', 'failed') ''', (-days,)) # Expire old monitors cursor.execute(''' UPDATE forum_threads SET status = 'expired' WHERE monitor_until < datetime('now') AND status = 'active' ''') conn.commit()