Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

485
modules/forum_db_adapter.py Executable file
View File

@@ -0,0 +1,485 @@
#!/usr/bin/env python3
"""
Forum Database Adapter for Unified Database
Provides compatibility layer for forum_downloader to use UnifiedDatabase
"""
import sqlite3
import json
import hashlib
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any
import time
from modules.universal_logger import get_logger
logger = get_logger('ForumAdapter')
class ForumDatabaseAdapter:
"""
Adapter to allow forum_downloader to use UnifiedDatabase
Mimics the original forum database interface
"""
def __init__(self, unified_db, db_path=None):
"""
Initialize the adapter
Args:
unified_db: UnifiedDatabase instance
db_path: Ignored - kept for compatibility
"""
self.unified_db = unified_db
self.db_path = db_path # Keep for compatibility but not used
def get_file_hash(self, file_path: str) -> Optional[str]:
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
from modules.unified_database import UnifiedDatabase
return UnifiedDatabase.get_file_hash(file_path)
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
"""Get download record by file hash (delegates to UnifiedDatabase)"""
return self.unified_db.get_download_by_file_hash(file_hash)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
def _get_connection(self):
"""Get a connection from unified database"""
return self.unified_db.get_connection(for_write=True)
def _execute_with_retry(self, operation, retries: int = 3, for_write: bool = False):
"""
Execute a database operation with retry logic for lock/deadlock errors.
Args:
operation: A callable that takes a connection and returns a result
retries: Number of retry attempts
for_write: Whether this is a write operation
Returns:
The result of the operation
Raises:
sqlite3.OperationalError: If operation fails after all retries
"""
for attempt in range(retries):
try:
with self.unified_db.get_connection(for_write=for_write) as conn:
return operation(conn)
except sqlite3.OperationalError as e:
if ("locked" in str(e) or "deadlock" in str(e).lower()) and attempt < retries - 1:
delay = 1 + attempt * 2 # Exponential backoff
logger.debug(f"Database locked, retrying in {delay} seconds...")
time.sleep(delay)
continue
else:
logger.error(f"Database operation failed after {attempt + 1} attempts: {e}")
raise
# This point should never be reached due to the raise above,
# but raise explicitly to satisfy type checkers
raise sqlite3.OperationalError("Database operation failed after all retries")
def db_add_thread(self, thread_id: str, forum_name: str, thread_url: str,
thread_title: str = None, monitor_until: datetime = None) -> bool:
"""Add a forum thread to tracking"""
def operation(conn):
cursor = conn.cursor()
cursor.execute('''
INSERT OR IGNORE INTO forum_threads
(thread_id, forum_name, thread_url, thread_title,
created_date, last_checked, status, monitor_until)
VALUES (?, ?, ?, ?, ?, ?, 'active', ?)
''', (thread_id, forum_name, thread_url, thread_title,
datetime.now(), datetime.now(), monitor_until))
conn.commit()
return cursor.rowcount > 0
try:
return self._execute_with_retry(operation, for_write=True)
except Exception as e:
logger.error(f"Error adding thread: {e}")
return False
def db_update_thread(self, thread_id: str, last_post_date: datetime = None,
post_count: int = None) -> bool:
"""Update thread information"""
# Build updates list outside the operation for clarity
updates = ["last_checked = ?"]
params = [datetime.now()]
if last_post_date:
updates.append("last_post_date = ?")
params.append(last_post_date)
if post_count is not None:
updates.append("post_count = ?")
params.append(post_count)
params.append(thread_id)
# Pre-build the SQL query to avoid f-string inside operation
sql = f'UPDATE forum_threads SET {", ".join(updates)} WHERE thread_id = ?'
def operation(conn):
cursor = conn.cursor()
cursor.execute(sql, params)
conn.commit()
return cursor.rowcount > 0
try:
return self._execute_with_retry(operation, for_write=True)
except Exception as e:
logger.error(f"Error updating thread {thread_id}: {e}")
return False
def db_update_thread_last_checked(self, thread_id: str) -> bool:
"""Update the last_checked timestamp for a forum thread"""
def operation(conn):
cursor = conn.cursor()
cursor.execute('''
UPDATE forum_threads
SET last_checked = ?
WHERE thread_id = ?
''', (datetime.now(), thread_id))
conn.commit()
return cursor.rowcount > 0
try:
return self._execute_with_retry(operation, for_write=True)
except Exception as e:
logger.error(f"Error updating last_checked for thread {thread_id}: {e}")
return False
def db_get_thread(self, thread_id: str) -> Optional[Dict]:
"""Get thread information"""
def operation(conn):
cursor = conn.cursor()
cursor.execute(
"SELECT * FROM forum_threads WHERE thread_id = ?",
(thread_id,)
)
row = cursor.fetchone()
return dict(row) if row else None
try:
return self._execute_with_retry(operation, for_write=False)
except Exception as e:
logger.error(f"Error getting thread {thread_id}: {e}")
return None
def db_add_post(self, post_id: str, thread_id: str, post_url: str = None,
author: str = None, post_date: datetime = None,
has_images: bool = False) -> bool:
"""Add a forum post"""
with self._get_connection() as conn:
cursor = conn.cursor()
try:
content_hash = hashlib.sha256(f"{thread_id}:{post_id}".encode()).hexdigest()
cursor.execute('''
INSERT INTO forum_posts
(post_id, thread_id, post_url, author, post_date,
content_hash, has_images)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT (post_id) DO UPDATE SET
thread_id = EXCLUDED.thread_id,
post_url = EXCLUDED.post_url,
author = EXCLUDED.author,
post_date = EXCLUDED.post_date,
content_hash = EXCLUDED.content_hash,
has_images = EXCLUDED.has_images
''', (post_id, thread_id, post_url, author, post_date,
content_hash, has_images))
conn.commit()
return True
except Exception as e:
logger.error(f"Error adding post: {e}")
return False
def db_get_image_id(self, img_url: str) -> Optional[int]:
"""Check if image already exists in downloads"""
url_hash = self.unified_db.get_url_hash(img_url)
def operation(conn):
cursor = conn.cursor()
cursor.execute(
"SELECT id FROM downloads WHERE url_hash = ? AND platform = 'forums'",
(url_hash,)
)
row = cursor.fetchone()
return row[0] if row else None
try:
return self._execute_with_retry(operation, for_write=False)
except Exception as e:
logger.error(f"Error checking image existence: {e}")
return None
def db_add_image(self, img_url: str, thread_id: str, post_id: str,
filename: str, file_path: str, forum_name: str) -> bool:
"""Add image to downloads"""
metadata = {
'thread_id': thread_id,
'post_id': post_id,
'forum_name': forum_name
}
return self.unified_db.record_download(
url=img_url,
platform='forums',
source=forum_name,
content_type='image',
filename=filename,
file_path=file_path,
metadata=metadata
)
def db_search_exists(self, search_id: str) -> bool:
"""Check if search already exists"""
def operation(conn):
cursor = conn.cursor()
cursor.execute(
"SELECT 1 FROM search_monitors WHERE search_id = ?",
(search_id,)
)
return cursor.fetchone() is not None
try:
return self._execute_with_retry(operation, for_write=False)
except Exception as e:
logger.error(f"Error checking search existence: {e}")
return False
def db_add_search(self, search_id: str, forum_name: str, search_query: str,
search_url: str = None, check_frequency_hours: int = 24) -> bool:
"""Add or update search monitor"""
with self._get_connection() as conn:
cursor = conn.cursor()
try:
cursor.execute('''
INSERT OR REPLACE INTO search_monitors
(search_id, platform, source, search_query, search_url,
last_checked, check_frequency_hours, active)
VALUES (?, 'forums', ?, ?, ?, ?, ?, 1)
''', (search_id, forum_name, search_query, search_url,
datetime.now(), check_frequency_hours))
conn.commit()
return True
except Exception as e:
logger.error(f"Error adding search: {e}")
return False
def db_update_search_results(self, search_id: str, results_count: int) -> bool:
"""Update search results count"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
UPDATE search_monitors
SET last_checked = ?, results_found = ?
WHERE search_id = ?
''', (datetime.now(), results_count, search_id))
conn.commit()
return cursor.rowcount > 0
def add_to_download_queue(self, url: str, referer: str = None, save_path: str = None,
thread_id: str = None, post_id: str = None,
forum_name: str = None, metadata: Dict = None) -> bool:
"""Add item to download queue"""
# Check if already downloaded
if self.unified_db.is_downloaded(url, platform='forums'):
return False
# Check if already in queue (with retry logic)
def check_queue(conn):
cursor = conn.cursor()
cursor.execute(
"SELECT status FROM download_queue WHERE url = ?",
(url,)
)
return cursor.fetchone()
try:
existing = self._execute_with_retry(check_queue, for_write=False)
if existing:
if existing[0] == 'completed':
return False # Already downloaded
elif existing[0] == 'pending':
return False # Already in queue
except Exception as e:
logger.error(f"Error checking download queue: {e}")
return False
# Add to queue
queue_metadata = metadata or {}
queue_metadata.update({
'thread_id': thread_id,
'post_id': post_id,
'forum_name': forum_name
})
with self._get_connection() as conn:
cursor = conn.cursor()
try:
cursor.execute('''
INSERT INTO download_queue
(url, platform, source, referer, save_path, status, metadata)
VALUES (?, 'forums', ?, ?, ?, 'pending', ?)
''', (url, forum_name, referer, str(save_path) if save_path else None, json.dumps(queue_metadata)))
conn.commit()
return True
except sqlite3.IntegrityError:
return False # URL already in queue
except Exception as e:
logger.error(f"Error adding to queue: {e}")
return False
def is_in_download_queue(self, url: str) -> bool:
"""Check if URL is in download queue"""
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT 1 FROM download_queue WHERE url = ? AND status = 'pending'",
(url,)
)
return cursor.fetchone() is not None
def is_already_downloaded(self, url: str, forum_name: str = None) -> bool:
"""Check if thread URL is already being tracked"""
# For thread URLs, check the forum_threads table
import hashlib
thread_id = hashlib.sha256(url.encode()).hexdigest()
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
if forum_name:
# Check for specific forum
cursor.execute('''
SELECT 1 FROM forum_threads
WHERE forum_name = ? AND (thread_url = ? OR thread_id = ?)
LIMIT 1
''', (forum_name, url, thread_id))
else:
# Check any forum
cursor.execute('''
SELECT 1 FROM forum_threads
WHERE thread_url = ? OR thread_id = ?
LIMIT 1
''', (url, thread_id))
return cursor.fetchone() is not None
def mark_download_complete(self, url: str, filename: str = None,
file_path: str = None) -> bool:
"""Mark download as complete in queue"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
UPDATE download_queue
SET status = 'completed', download_date = ?
WHERE url = ?
''', (datetime.now(), url))
conn.commit()
return cursor.rowcount > 0
def mark_download_failed(self, url: str, error_message: str = None) -> bool:
"""Mark download as failed in queue"""
with self._get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
UPDATE download_queue
SET status = 'failed', attempts = attempts + 1, error_message = ?
WHERE url = ?
''', (error_message, url))
conn.commit()
return cursor.rowcount > 0
def record_download(self, url: str, thread_id: str = None, post_id: str = None,
filename: str = None, metadata: Dict = None, file_path: str = None,
post_date = None) -> bool:
"""Record a download in the unified database
Args:
url: URL of the downloaded content
thread_id: Forum thread ID
post_id: Forum post ID
filename: Name of downloaded file
metadata: Additional metadata dict
file_path: Full path to downloaded file
post_date: Date of the forum post (datetime or None)
"""
# Extract forum name from metadata if available
forum_name = metadata.get('forum_name') if metadata else None
# Prepare full metadata
full_metadata = metadata or {}
if thread_id:
full_metadata['thread_id'] = thread_id
if post_id:
full_metadata['post_id'] = post_id
# Calculate file hash if file_path provided
file_hash = None
if file_path:
try:
from modules.unified_database import UnifiedDatabase
file_hash = UnifiedDatabase.get_file_hash(file_path)
except Exception:
pass # If hash fails, continue without it
# Record in unified database
return self.unified_db.record_download(
url=url,
platform='forums',
source=forum_name or 'unknown',
content_type='image',
filename=filename,
file_path=file_path,
file_hash=file_hash,
post_date=post_date,
metadata=full_metadata
)
def get_pending_downloads(self, limit: int = 100) -> List[Dict]:
"""Get pending downloads from queue"""
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT * FROM download_queue
WHERE platform = 'forums' AND status = 'pending'
ORDER BY priority, created_date
LIMIT ?
''', (limit,))
return [dict(row) for row in cursor.fetchall()]
def cleanup_old_data(self, days: int = 180):
"""Clean up old data"""
with self._get_connection() as conn:
cursor = conn.cursor()
# Clean old downloads
cursor.execute('''
DELETE FROM downloads
WHERE platform = 'forums'
AND download_date < datetime('now', ? || ' days')
AND status = 'completed'
''', (-days,))
# Clean old queue items
cursor.execute('''
DELETE FROM download_queue
WHERE platform = 'forums'
AND created_date < datetime('now', ? || ' days')
AND status IN ('completed', 'failed')
''', (-days,))
# Expire old monitors
cursor.execute('''
UPDATE forum_threads
SET status = 'expired'
WHERE monitor_until < datetime('now')
AND status = 'active'
''')
conn.commit()