485 lines
18 KiB
Python
Executable File
485 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Forum Database Adapter for Unified Database
|
|
Provides compatibility layer for forum_downloader to use UnifiedDatabase
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
import hashlib
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any
|
|
import time
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('ForumAdapter')
|
|
|
|
class ForumDatabaseAdapter:
|
|
"""
|
|
Adapter to allow forum_downloader to use UnifiedDatabase
|
|
Mimics the original forum database interface
|
|
"""
|
|
|
|
def __init__(self, unified_db, db_path=None):
|
|
"""
|
|
Initialize the adapter
|
|
|
|
Args:
|
|
unified_db: UnifiedDatabase instance
|
|
db_path: Ignored - kept for compatibility
|
|
"""
|
|
self.unified_db = unified_db
|
|
self.db_path = db_path # Keep for compatibility but not used
|
|
|
|
def get_file_hash(self, file_path: str) -> Optional[str]:
|
|
"""Calculate SHA256 hash of a file (delegates to UnifiedDatabase)"""
|
|
from modules.unified_database import UnifiedDatabase
|
|
return UnifiedDatabase.get_file_hash(file_path)
|
|
|
|
def get_download_by_file_hash(self, file_hash: str) -> Optional[Dict]:
|
|
"""Get download record by file hash (delegates to UnifiedDatabase)"""
|
|
return self.unified_db.get_download_by_file_hash(file_hash)
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
pass
|
|
|
|
def _get_connection(self):
|
|
"""Get a connection from unified database"""
|
|
return self.unified_db.get_connection(for_write=True)
|
|
|
|
def _execute_with_retry(self, operation, retries: int = 3, for_write: bool = False):
|
|
"""
|
|
Execute a database operation with retry logic for lock/deadlock errors.
|
|
|
|
Args:
|
|
operation: A callable that takes a connection and returns a result
|
|
retries: Number of retry attempts
|
|
for_write: Whether this is a write operation
|
|
|
|
Returns:
|
|
The result of the operation
|
|
|
|
Raises:
|
|
sqlite3.OperationalError: If operation fails after all retries
|
|
"""
|
|
for attempt in range(retries):
|
|
try:
|
|
with self.unified_db.get_connection(for_write=for_write) as conn:
|
|
return operation(conn)
|
|
except sqlite3.OperationalError as e:
|
|
if ("locked" in str(e) or "deadlock" in str(e).lower()) and attempt < retries - 1:
|
|
delay = 1 + attempt * 2 # Exponential backoff
|
|
logger.debug(f"Database locked, retrying in {delay} seconds...")
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
logger.error(f"Database operation failed after {attempt + 1} attempts: {e}")
|
|
raise
|
|
# This point should never be reached due to the raise above,
|
|
# but raise explicitly to satisfy type checkers
|
|
raise sqlite3.OperationalError("Database operation failed after all retries")
|
|
|
|
def db_add_thread(self, thread_id: str, forum_name: str, thread_url: str,
|
|
thread_title: str = None, monitor_until: datetime = None) -> bool:
|
|
"""Add a forum thread to tracking"""
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR IGNORE INTO forum_threads
|
|
(thread_id, forum_name, thread_url, thread_title,
|
|
created_date, last_checked, status, monitor_until)
|
|
VALUES (?, ?, ?, ?, ?, ?, 'active', ?)
|
|
''', (thread_id, forum_name, thread_url, thread_title,
|
|
datetime.now(), datetime.now(), monitor_until))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=True)
|
|
except Exception as e:
|
|
logger.error(f"Error adding thread: {e}")
|
|
return False
|
|
|
|
def db_update_thread(self, thread_id: str, last_post_date: datetime = None,
|
|
post_count: int = None) -> bool:
|
|
"""Update thread information"""
|
|
# Build updates list outside the operation for clarity
|
|
updates = ["last_checked = ?"]
|
|
params = [datetime.now()]
|
|
|
|
if last_post_date:
|
|
updates.append("last_post_date = ?")
|
|
params.append(last_post_date)
|
|
|
|
if post_count is not None:
|
|
updates.append("post_count = ?")
|
|
params.append(post_count)
|
|
|
|
params.append(thread_id)
|
|
|
|
# Pre-build the SQL query to avoid f-string inside operation
|
|
sql = f'UPDATE forum_threads SET {", ".join(updates)} WHERE thread_id = ?'
|
|
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute(sql, params)
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=True)
|
|
except Exception as e:
|
|
logger.error(f"Error updating thread {thread_id}: {e}")
|
|
return False
|
|
|
|
def db_update_thread_last_checked(self, thread_id: str) -> bool:
|
|
"""Update the last_checked timestamp for a forum thread"""
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE forum_threads
|
|
SET last_checked = ?
|
|
WHERE thread_id = ?
|
|
''', (datetime.now(), thread_id))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=True)
|
|
except Exception as e:
|
|
logger.error(f"Error updating last_checked for thread {thread_id}: {e}")
|
|
return False
|
|
|
|
def db_get_thread(self, thread_id: str) -> Optional[Dict]:
|
|
"""Get thread information"""
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT * FROM forum_threads WHERE thread_id = ?",
|
|
(thread_id,)
|
|
)
|
|
row = cursor.fetchone()
|
|
return dict(row) if row else None
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=False)
|
|
except Exception as e:
|
|
logger.error(f"Error getting thread {thread_id}: {e}")
|
|
return None
|
|
|
|
def db_add_post(self, post_id: str, thread_id: str, post_url: str = None,
|
|
author: str = None, post_date: datetime = None,
|
|
has_images: bool = False) -> bool:
|
|
"""Add a forum post"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
try:
|
|
content_hash = hashlib.sha256(f"{thread_id}:{post_id}".encode()).hexdigest()
|
|
cursor.execute('''
|
|
INSERT INTO forum_posts
|
|
(post_id, thread_id, post_url, author, post_date,
|
|
content_hash, has_images)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
ON CONFLICT (post_id) DO UPDATE SET
|
|
thread_id = EXCLUDED.thread_id,
|
|
post_url = EXCLUDED.post_url,
|
|
author = EXCLUDED.author,
|
|
post_date = EXCLUDED.post_date,
|
|
content_hash = EXCLUDED.content_hash,
|
|
has_images = EXCLUDED.has_images
|
|
''', (post_id, thread_id, post_url, author, post_date,
|
|
content_hash, has_images))
|
|
conn.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error adding post: {e}")
|
|
return False
|
|
|
|
def db_get_image_id(self, img_url: str) -> Optional[int]:
|
|
"""Check if image already exists in downloads"""
|
|
url_hash = self.unified_db.get_url_hash(img_url)
|
|
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT id FROM downloads WHERE url_hash = ? AND platform = 'forums'",
|
|
(url_hash,)
|
|
)
|
|
row = cursor.fetchone()
|
|
return row[0] if row else None
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=False)
|
|
except Exception as e:
|
|
logger.error(f"Error checking image existence: {e}")
|
|
return None
|
|
|
|
def db_add_image(self, img_url: str, thread_id: str, post_id: str,
|
|
filename: str, file_path: str, forum_name: str) -> bool:
|
|
"""Add image to downloads"""
|
|
metadata = {
|
|
'thread_id': thread_id,
|
|
'post_id': post_id,
|
|
'forum_name': forum_name
|
|
}
|
|
|
|
return self.unified_db.record_download(
|
|
url=img_url,
|
|
platform='forums',
|
|
source=forum_name,
|
|
content_type='image',
|
|
filename=filename,
|
|
file_path=file_path,
|
|
metadata=metadata
|
|
)
|
|
|
|
def db_search_exists(self, search_id: str) -> bool:
|
|
"""Check if search already exists"""
|
|
def operation(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT 1 FROM search_monitors WHERE search_id = ?",
|
|
(search_id,)
|
|
)
|
|
return cursor.fetchone() is not None
|
|
|
|
try:
|
|
return self._execute_with_retry(operation, for_write=False)
|
|
except Exception as e:
|
|
logger.error(f"Error checking search existence: {e}")
|
|
return False
|
|
|
|
def db_add_search(self, search_id: str, forum_name: str, search_query: str,
|
|
search_url: str = None, check_frequency_hours: int = 24) -> bool:
|
|
"""Add or update search monitor"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
try:
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO search_monitors
|
|
(search_id, platform, source, search_query, search_url,
|
|
last_checked, check_frequency_hours, active)
|
|
VALUES (?, 'forums', ?, ?, ?, ?, ?, 1)
|
|
''', (search_id, forum_name, search_query, search_url,
|
|
datetime.now(), check_frequency_hours))
|
|
conn.commit()
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error adding search: {e}")
|
|
return False
|
|
|
|
def db_update_search_results(self, search_id: str, results_count: int) -> bool:
|
|
"""Update search results count"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE search_monitors
|
|
SET last_checked = ?, results_found = ?
|
|
WHERE search_id = ?
|
|
''', (datetime.now(), results_count, search_id))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
def add_to_download_queue(self, url: str, referer: str = None, save_path: str = None,
|
|
thread_id: str = None, post_id: str = None,
|
|
forum_name: str = None, metadata: Dict = None) -> bool:
|
|
"""Add item to download queue"""
|
|
# Check if already downloaded
|
|
if self.unified_db.is_downloaded(url, platform='forums'):
|
|
return False
|
|
|
|
# Check if already in queue (with retry logic)
|
|
def check_queue(conn):
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT status FROM download_queue WHERE url = ?",
|
|
(url,)
|
|
)
|
|
return cursor.fetchone()
|
|
|
|
try:
|
|
existing = self._execute_with_retry(check_queue, for_write=False)
|
|
if existing:
|
|
if existing[0] == 'completed':
|
|
return False # Already downloaded
|
|
elif existing[0] == 'pending':
|
|
return False # Already in queue
|
|
except Exception as e:
|
|
logger.error(f"Error checking download queue: {e}")
|
|
return False
|
|
|
|
# Add to queue
|
|
queue_metadata = metadata or {}
|
|
queue_metadata.update({
|
|
'thread_id': thread_id,
|
|
'post_id': post_id,
|
|
'forum_name': forum_name
|
|
})
|
|
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
try:
|
|
cursor.execute('''
|
|
INSERT INTO download_queue
|
|
(url, platform, source, referer, save_path, status, metadata)
|
|
VALUES (?, 'forums', ?, ?, ?, 'pending', ?)
|
|
''', (url, forum_name, referer, str(save_path) if save_path else None, json.dumps(queue_metadata)))
|
|
conn.commit()
|
|
return True
|
|
except sqlite3.IntegrityError:
|
|
return False # URL already in queue
|
|
except Exception as e:
|
|
logger.error(f"Error adding to queue: {e}")
|
|
return False
|
|
|
|
def is_in_download_queue(self, url: str) -> bool:
|
|
"""Check if URL is in download queue"""
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute(
|
|
"SELECT 1 FROM download_queue WHERE url = ? AND status = 'pending'",
|
|
(url,)
|
|
)
|
|
return cursor.fetchone() is not None
|
|
|
|
def is_already_downloaded(self, url: str, forum_name: str = None) -> bool:
|
|
"""Check if thread URL is already being tracked"""
|
|
# For thread URLs, check the forum_threads table
|
|
import hashlib
|
|
thread_id = hashlib.sha256(url.encode()).hexdigest()
|
|
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
if forum_name:
|
|
# Check for specific forum
|
|
cursor.execute('''
|
|
SELECT 1 FROM forum_threads
|
|
WHERE forum_name = ? AND (thread_url = ? OR thread_id = ?)
|
|
LIMIT 1
|
|
''', (forum_name, url, thread_id))
|
|
else:
|
|
# Check any forum
|
|
cursor.execute('''
|
|
SELECT 1 FROM forum_threads
|
|
WHERE thread_url = ? OR thread_id = ?
|
|
LIMIT 1
|
|
''', (url, thread_id))
|
|
return cursor.fetchone() is not None
|
|
|
|
def mark_download_complete(self, url: str, filename: str = None,
|
|
file_path: str = None) -> bool:
|
|
"""Mark download as complete in queue"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE download_queue
|
|
SET status = 'completed', download_date = ?
|
|
WHERE url = ?
|
|
''', (datetime.now(), url))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
def mark_download_failed(self, url: str, error_message: str = None) -> bool:
|
|
"""Mark download as failed in queue"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
UPDATE download_queue
|
|
SET status = 'failed', attempts = attempts + 1, error_message = ?
|
|
WHERE url = ?
|
|
''', (error_message, url))
|
|
conn.commit()
|
|
return cursor.rowcount > 0
|
|
|
|
def record_download(self, url: str, thread_id: str = None, post_id: str = None,
|
|
filename: str = None, metadata: Dict = None, file_path: str = None,
|
|
post_date = None) -> bool:
|
|
"""Record a download in the unified database
|
|
|
|
Args:
|
|
url: URL of the downloaded content
|
|
thread_id: Forum thread ID
|
|
post_id: Forum post ID
|
|
filename: Name of downloaded file
|
|
metadata: Additional metadata dict
|
|
file_path: Full path to downloaded file
|
|
post_date: Date of the forum post (datetime or None)
|
|
"""
|
|
# Extract forum name from metadata if available
|
|
forum_name = metadata.get('forum_name') if metadata else None
|
|
|
|
# Prepare full metadata
|
|
full_metadata = metadata or {}
|
|
if thread_id:
|
|
full_metadata['thread_id'] = thread_id
|
|
if post_id:
|
|
full_metadata['post_id'] = post_id
|
|
|
|
# Calculate file hash if file_path provided
|
|
file_hash = None
|
|
if file_path:
|
|
try:
|
|
from modules.unified_database import UnifiedDatabase
|
|
file_hash = UnifiedDatabase.get_file_hash(file_path)
|
|
except Exception:
|
|
pass # If hash fails, continue without it
|
|
|
|
# Record in unified database
|
|
return self.unified_db.record_download(
|
|
url=url,
|
|
platform='forums',
|
|
source=forum_name or 'unknown',
|
|
content_type='image',
|
|
filename=filename,
|
|
file_path=file_path,
|
|
file_hash=file_hash,
|
|
post_date=post_date,
|
|
metadata=full_metadata
|
|
)
|
|
|
|
def get_pending_downloads(self, limit: int = 100) -> List[Dict]:
|
|
"""Get pending downloads from queue"""
|
|
with self.unified_db.get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT * FROM download_queue
|
|
WHERE platform = 'forums' AND status = 'pending'
|
|
ORDER BY priority, created_date
|
|
LIMIT ?
|
|
''', (limit,))
|
|
return [dict(row) for row in cursor.fetchall()]
|
|
|
|
def cleanup_old_data(self, days: int = 180):
|
|
"""Clean up old data"""
|
|
with self._get_connection() as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Clean old downloads
|
|
cursor.execute('''
|
|
DELETE FROM downloads
|
|
WHERE platform = 'forums'
|
|
AND download_date < datetime('now', ? || ' days')
|
|
AND status = 'completed'
|
|
''', (-days,))
|
|
|
|
# Clean old queue items
|
|
cursor.execute('''
|
|
DELETE FROM download_queue
|
|
WHERE platform = 'forums'
|
|
AND created_date < datetime('now', ? || ' days')
|
|
AND status IN ('completed', 'failed')
|
|
''', (-days,))
|
|
|
|
# Expire old monitors
|
|
cursor.execute('''
|
|
UPDATE forum_threads
|
|
SET status = 'expired'
|
|
WHERE monitor_until < datetime('now')
|
|
AND status = 'active'
|
|
''')
|
|
|
|
conn.commit() |