485
modules/forum_db_adapter.py
Executable file
485
modules/forum_db_adapter.py
Executable file
@@ -0,0 +1,485 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Forum Database Adapter for Unified Database
|
||||
Provides compatibility layer for forum_downloader to use UnifiedDatabase
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
import time
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('ForumAdapter')
|
||||
|
||||
class ForumDatabaseAdapter:
|
||||
"""
|
||||
Adapter to allow forum_downloader to use UnifiedDatabase
|
||||
Mimics the original forum database interface
|
||||
"""
|
||||
|
||||
def __init__(self, unified_db, db_path=None):
|
||||
"""
|
||||
Initialize the adapter
|
||||
|
||||
Args:
|
||||
unified_db: UnifiedDatabase instance
|
||||
db_path: Ignored - kept for compatibility
|
||||
"""
|
||||
self.unified_db = unified_db
|
||||
self.db_path = db_path # Keep for compatibility but not used
|
||||
|
||||
def get_file_hash(self, file_path: str) -> Optional[str]:
|
||||
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
return UnifiedDatabase.get_file_hash(file_path)
|
||||
|
||||
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
||||
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
||||
return self.unified_db.get_download_by_file_hash(file_hash)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
def _get_connection(self):
|
||||
"""Get a connection from unified database"""
|
||||
return self.unified_db.get_connection(for_write=True)
|
||||
|
||||
def _execute_with_retry(self, operation, retries: int = 3, for_write: bool = False):
|
||||
"""
|
||||
Execute a database operation with retry logic for lock/deadlock errors.
|
||||
|
||||
Args:
|
||||
operation: A callable that takes a connection and returns a result
|
||||
retries: Number of retry attempts
|
||||
for_write: Whether this is a write operation
|
||||
|
||||
Returns:
|
||||
The result of the operation
|
||||
|
||||
Raises:
|
||||
sqlite3.OperationalError: If operation fails after all retries
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
with self.unified_db.get_connection(for_write=for_write) as conn:
|
||||
return operation(conn)
|
||||
except sqlite3.OperationalError as e:
|
||||
if ("locked" in str(e) or "deadlock" in str(e).lower()) and attempt < retries - 1:
|
||||
delay = 1 + attempt * 2 # Exponential backoff
|
||||
logger.debug(f"Database locked, retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Database operation failed after {attempt + 1} attempts: {e}")
|
||||
raise
|
||||
# This point should never be reached due to the raise above,
|
||||
# but raise explicitly to satisfy type checkers
|
||||
raise sqlite3.OperationalError("Database operation failed after all retries")
|
||||
|
||||
def db_add_thread(self, thread_id: str, forum_name: str, thread_url: str,
|
||||
thread_title: str = None, monitor_until: datetime = None) -> bool:
|
||||
"""Add a forum thread to tracking"""
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
INSERT OR IGNORE INTO forum_threads
|
||||
(thread_id, forum_name, thread_url, thread_title,
|
||||
created_date, last_checked, status, monitor_until)
|
||||
VALUES (?, ?, ?, ?, ?, ?, 'active', ?)
|
||||
''', (thread_id, forum_name, thread_url, thread_title,
|
||||
datetime.now(), datetime.now(), monitor_until))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding thread: {e}")
|
||||
return False
|
||||
|
||||
def db_update_thread(self, thread_id: str, last_post_date: datetime = None,
|
||||
post_count: int = None) -> bool:
|
||||
"""Update thread information"""
|
||||
# Build updates list outside the operation for clarity
|
||||
updates = ["last_checked = ?"]
|
||||
params = [datetime.now()]
|
||||
|
||||
if last_post_date:
|
||||
updates.append("last_post_date = ?")
|
||||
params.append(last_post_date)
|
||||
|
||||
if post_count is not None:
|
||||
updates.append("post_count = ?")
|
||||
params.append(post_count)
|
||||
|
||||
params.append(thread_id)
|
||||
|
||||
# Pre-build the SQL query to avoid f-string inside operation
|
||||
sql = f'UPDATE forum_threads SET {", ".join(updates)} WHERE thread_id = ?'
|
||||
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, params)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating thread {thread_id}: {e}")
|
||||
return False
|
||||
|
||||
def db_update_thread_last_checked(self, thread_id: str) -> bool:
|
||||
"""Update the last_checked timestamp for a forum thread"""
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE forum_threads
|
||||
SET last_checked = ?
|
||||
WHERE thread_id = ?
|
||||
''', (datetime.now(), thread_id))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating last_checked for thread {thread_id}: {e}")
|
||||
return False
|
||||
|
||||
def db_get_thread(self, thread_id: str) -> Optional[Dict]:
|
||||
"""Get thread information"""
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT * FROM forum_threads WHERE thread_id = ?",
|
||||
(thread_id,)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting thread {thread_id}: {e}")
|
||||
return None
|
||||
|
||||
def db_add_post(self, post_id: str, thread_id: str, post_url: str = None,
|
||||
author: str = None, post_date: datetime = None,
|
||||
has_images: bool = False) -> bool:
|
||||
"""Add a forum post"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
content_hash = hashlib.sha256(f"{thread_id}:{post_id}".encode()).hexdigest()
|
||||
cursor.execute('''
|
||||
INSERT INTO forum_posts
|
||||
(post_id, thread_id, post_url, author, post_date,
|
||||
content_hash, has_images)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT (post_id) DO UPDATE SET
|
||||
thread_id = EXCLUDED.thread_id,
|
||||
post_url = EXCLUDED.post_url,
|
||||
author = EXCLUDED.author,
|
||||
post_date = EXCLUDED.post_date,
|
||||
content_hash = EXCLUDED.content_hash,
|
||||
has_images = EXCLUDED.has_images
|
||||
''', (post_id, thread_id, post_url, author, post_date,
|
||||
content_hash, has_images))
|
||||
conn.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding post: {e}")
|
||||
return False
|
||||
|
||||
def db_get_image_id(self, img_url: str) -> Optional[int]:
|
||||
"""Check if image already exists in downloads"""
|
||||
url_hash = self.unified_db.get_url_hash(img_url)
|
||||
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT id FROM downloads WHERE url_hash = ? AND platform = 'forums'",
|
||||
(url_hash,)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking image existence: {e}")
|
||||
return None
|
||||
|
||||
def db_add_image(self, img_url: str, thread_id: str, post_id: str,
|
||||
filename: str, file_path: str, forum_name: str) -> bool:
|
||||
"""Add image to downloads"""
|
||||
metadata = {
|
||||
'thread_id': thread_id,
|
||||
'post_id': post_id,
|
||||
'forum_name': forum_name
|
||||
}
|
||||
|
||||
return self.unified_db.record_download(
|
||||
url=img_url,
|
||||
platform='forums',
|
||||
source=forum_name,
|
||||
content_type='image',
|
||||
filename=filename,
|
||||
file_path=file_path,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
def db_search_exists(self, search_id: str) -> bool:
|
||||
"""Check if search already exists"""
|
||||
def operation(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT 1 FROM search_monitors WHERE search_id = ?",
|
||||
(search_id,)
|
||||
)
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
try:
|
||||
return self._execute_with_retry(operation, for_write=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking search existence: {e}")
|
||||
return False
|
||||
|
||||
def db_add_search(self, search_id: str, forum_name: str, search_query: str,
|
||||
search_url: str = None, check_frequency_hours: int = 24) -> bool:
|
||||
"""Add or update search monitor"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO search_monitors
|
||||
(search_id, platform, source, search_query, search_url,
|
||||
last_checked, check_frequency_hours, active)
|
||||
VALUES (?, 'forums', ?, ?, ?, ?, ?, 1)
|
||||
''', (search_id, forum_name, search_query, search_url,
|
||||
datetime.now(), check_frequency_hours))
|
||||
conn.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding search: {e}")
|
||||
return False
|
||||
|
||||
def db_update_search_results(self, search_id: str, results_count: int) -> bool:
|
||||
"""Update search results count"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE search_monitors
|
||||
SET last_checked = ?, results_found = ?
|
||||
WHERE search_id = ?
|
||||
''', (datetime.now(), results_count, search_id))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
def add_to_download_queue(self, url: str, referer: str = None, save_path: str = None,
|
||||
thread_id: str = None, post_id: str = None,
|
||||
forum_name: str = None, metadata: Dict = None) -> bool:
|
||||
"""Add item to download queue"""
|
||||
# Check if already downloaded
|
||||
if self.unified_db.is_downloaded(url, platform='forums'):
|
||||
return False
|
||||
|
||||
# Check if already in queue (with retry logic)
|
||||
def check_queue(conn):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT status FROM download_queue WHERE url = ?",
|
||||
(url,)
|
||||
)
|
||||
return cursor.fetchone()
|
||||
|
||||
try:
|
||||
existing = self._execute_with_retry(check_queue, for_write=False)
|
||||
if existing:
|
||||
if existing[0] == 'completed':
|
||||
return False # Already downloaded
|
||||
elif existing[0] == 'pending':
|
||||
return False # Already in queue
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking download queue: {e}")
|
||||
return False
|
||||
|
||||
# Add to queue
|
||||
queue_metadata = metadata or {}
|
||||
queue_metadata.update({
|
||||
'thread_id': thread_id,
|
||||
'post_id': post_id,
|
||||
'forum_name': forum_name
|
||||
})
|
||||
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO download_queue
|
||||
(url, platform, source, referer, save_path, status, metadata)
|
||||
VALUES (?, 'forums', ?, ?, ?, 'pending', ?)
|
||||
''', (url, forum_name, referer, str(save_path) if save_path else None, json.dumps(queue_metadata)))
|
||||
conn.commit()
|
||||
return True
|
||||
except sqlite3.IntegrityError:
|
||||
return False # URL already in queue
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding to queue: {e}")
|
||||
return False
|
||||
|
||||
def is_in_download_queue(self, url: str) -> bool:
|
||||
"""Check if URL is in download queue"""
|
||||
with self.unified_db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT 1 FROM download_queue WHERE url = ? AND status = 'pending'",
|
||||
(url,)
|
||||
)
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
def is_already_downloaded(self, url: str, forum_name: str = None) -> bool:
|
||||
"""Check if thread URL is already being tracked"""
|
||||
# For thread URLs, check the forum_threads table
|
||||
import hashlib
|
||||
thread_id = hashlib.sha256(url.encode()).hexdigest()
|
||||
|
||||
with self.unified_db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
if forum_name:
|
||||
# Check for specific forum
|
||||
cursor.execute('''
|
||||
SELECT 1 FROM forum_threads
|
||||
WHERE forum_name = ? AND (thread_url = ? OR thread_id = ?)
|
||||
LIMIT 1
|
||||
''', (forum_name, url, thread_id))
|
||||
else:
|
||||
# Check any forum
|
||||
cursor.execute('''
|
||||
SELECT 1 FROM forum_threads
|
||||
WHERE thread_url = ? OR thread_id = ?
|
||||
LIMIT 1
|
||||
''', (url, thread_id))
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
def mark_download_complete(self, url: str, filename: str = None,
|
||||
file_path: str = None) -> bool:
|
||||
"""Mark download as complete in queue"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE download_queue
|
||||
SET status = 'completed', download_date = ?
|
||||
WHERE url = ?
|
||||
''', (datetime.now(), url))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
def mark_download_failed(self, url: str, error_message: str = None) -> bool:
|
||||
"""Mark download as failed in queue"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE download_queue
|
||||
SET status = 'failed', attempts = attempts + 1, error_message = ?
|
||||
WHERE url = ?
|
||||
''', (error_message, url))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
def record_download(self, url: str, thread_id: str = None, post_id: str = None,
|
||||
filename: str = None, metadata: Dict = None, file_path: str = None,
|
||||
post_date = None) -> bool:
|
||||
"""Record a download in the unified database
|
||||
|
||||
Args:
|
||||
url: URL of the downloaded content
|
||||
thread_id: Forum thread ID
|
||||
post_id: Forum post ID
|
||||
filename: Name of downloaded file
|
||||
metadata: Additional metadata dict
|
||||
file_path: Full path to downloaded file
|
||||
post_date: Date of the forum post (datetime or None)
|
||||
"""
|
||||
# Extract forum name from metadata if available
|
||||
forum_name = metadata.get('forum_name') if metadata else None
|
||||
|
||||
# Prepare full metadata
|
||||
full_metadata = metadata or {}
|
||||
if thread_id:
|
||||
full_metadata['thread_id'] = thread_id
|
||||
if post_id:
|
||||
full_metadata['post_id'] = post_id
|
||||
|
||||
# Calculate file hash if file_path provided
|
||||
file_hash = None
|
||||
if file_path:
|
||||
try:
|
||||
from modules.unified_database import UnifiedDatabase
|
||||
file_hash = UnifiedDatabase.get_file_hash(file_path)
|
||||
except Exception:
|
||||
pass # If hash fails, continue without it
|
||||
|
||||
# Record in unified database
|
||||
return self.unified_db.record_download(
|
||||
url=url,
|
||||
platform='forums',
|
||||
source=forum_name or 'unknown',
|
||||
content_type='image',
|
||||
filename=filename,
|
||||
file_path=file_path,
|
||||
file_hash=file_hash,
|
||||
post_date=post_date,
|
||||
metadata=full_metadata
|
||||
)
|
||||
|
||||
def get_pending_downloads(self, limit: int = 100) -> List[Dict]:
|
||||
"""Get pending downloads from queue"""
|
||||
with self.unified_db.get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
SELECT * FROM download_queue
|
||||
WHERE platform = 'forums' AND status = 'pending'
|
||||
ORDER BY priority, created_date
|
||||
LIMIT ?
|
||||
''', (limit,))
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def cleanup_old_data(self, days: int = 180):
|
||||
"""Clean up old data"""
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Clean old downloads
|
||||
cursor.execute('''
|
||||
DELETE FROM downloads
|
||||
WHERE platform = 'forums'
|
||||
AND download_date < datetime('now', ? || ' days')
|
||||
AND status = 'completed'
|
||||
''', (-days,))
|
||||
|
||||
# Clean old queue items
|
||||
cursor.execute('''
|
||||
DELETE FROM download_queue
|
||||
WHERE platform = 'forums'
|
||||
AND created_date < datetime('now', ? || ' days')
|
||||
AND status IN ('completed', 'failed')
|
||||
''', (-days,))
|
||||
|
||||
# Expire old monitors
|
||||
cursor.execute('''
|
||||
UPDATE forum_threads
|
||||
SET status = 'expired'
|
||||
WHERE monitor_until < datetime('now')
|
||||
AND status = 'active'
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
Reference in New Issue
Block a user