640 lines
26 KiB
Python
Executable File
640 lines
26 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Background worker to pre-generate thumbnails and cache metadata for all media files.
|
|
This improves performance by generating thumbnails in advance rather than on-demand.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import hashlib
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from PIL import Image
|
|
import io
|
|
|
|
# Add parent directory to path so we can import modules
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
# Bootstrap database backend (must be before any database imports)
|
|
import modules.db_bootstrap # noqa: E402,F401
|
|
|
|
import sqlite3
|
|
|
|
from modules.universal_logger import get_logger
|
|
|
|
logger = get_logger('ThumbnailCacheBuilder')
|
|
|
|
|
|
class ThumbnailCacheBuilder:
|
|
"""Build and maintain thumbnail and metadata cache for media files"""
|
|
|
|
def __init__(self):
|
|
self.scan_dirs = [
|
|
Path('/opt/immich/md'),
|
|
Path('/opt/immich/review'),
|
|
Path('/opt/immich/recycle')
|
|
]
|
|
self.db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
|
|
self.metadata_db_path = Path(__file__).parent.parent / 'database' / 'media_metadata.db'
|
|
self.unified_db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
|
|
self.max_thumb_size = (300, 300)
|
|
|
|
# Image and video extensions
|
|
self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp'}
|
|
self.video_extensions = {'.mp4', '.mov', '.webm', '.avi', '.mkv', '.flv', '.m4v'}
|
|
|
|
self.stats = {
|
|
'processed': 0,
|
|
'thumbnails_created': 0,
|
|
'thumbnails_cached': 0,
|
|
'metadata_cached': 0,
|
|
'errors': 0,
|
|
'skipped': 0
|
|
}
|
|
|
|
self._init_metadata_db()
|
|
|
|
def _init_metadata_db(self):
|
|
"""Initialize metadata cache database"""
|
|
self.metadata_db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
|
conn.execute('PRAGMA journal_mode=WAL')
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS media_metadata (
|
|
file_hash TEXT PRIMARY KEY,
|
|
file_path TEXT NOT NULL,
|
|
width INTEGER,
|
|
height INTEGER,
|
|
file_size INTEGER,
|
|
duration REAL,
|
|
format TEXT,
|
|
created_at TEXT,
|
|
file_mtime DOUBLE PRECISION
|
|
)
|
|
""")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_meta_file_path ON media_metadata(file_path)")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
logger.info(f"Metadata database initialized at {self.metadata_db_path}", module="Database")
|
|
|
|
def _get_file_hash(self, file_path: Path, content_hash: str = None) -> str:
|
|
"""Generate hash for file path or use content hash
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
content_hash: Optional SHA256 content hash from database (preferred for recycle bin)
|
|
"""
|
|
if content_hash:
|
|
# Use first 64 chars of content hash (full SHA256 for cache key)
|
|
return content_hash[:64]
|
|
# Fall back to path-based hash
|
|
return hashlib.sha256(str(file_path).encode()).hexdigest()
|
|
|
|
def _generate_image_thumbnail(self, file_path: Path) -> tuple:
|
|
"""Generate thumbnail and extract metadata for image
|
|
Returns: (thumbnail_data, width, height, format)
|
|
"""
|
|
try:
|
|
with Image.open(file_path) as img:
|
|
# Get original dimensions
|
|
width, height = img.size
|
|
img_format = img.format
|
|
|
|
# Convert RGBA to RGB if needed
|
|
if img.mode == 'RGBA':
|
|
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
background.paste(img, mask=img.split()[3])
|
|
img = background
|
|
elif img.mode != 'RGB':
|
|
img = img.convert('RGB')
|
|
|
|
# Generate thumbnail
|
|
img.thumbnail(self.max_thumb_size, Image.Resampling.LANCZOS)
|
|
|
|
# Save to bytes
|
|
buffer = io.BytesIO()
|
|
img.save(buffer, format='JPEG', quality=85, optimize=True)
|
|
thumbnail_data = buffer.getvalue()
|
|
|
|
return thumbnail_data, width, height, img_format
|
|
except Exception as e:
|
|
logger.error(f"Error generating image thumbnail for {file_path}: {e}", module="Error")
|
|
return None, None, None, None
|
|
|
|
def _generate_video_thumbnail(self, file_path: Path) -> tuple:
|
|
"""Generate thumbnail and extract metadata for video using ffmpeg
|
|
Returns: (thumbnail_data, width, height, duration)
|
|
"""
|
|
try:
|
|
import subprocess
|
|
import json
|
|
|
|
# Get video metadata using ffprobe
|
|
probe_cmd = [
|
|
'ffprobe',
|
|
'-v', 'quiet',
|
|
'-print_format', 'json',
|
|
'-show_format',
|
|
'-show_streams',
|
|
str(file_path)
|
|
]
|
|
|
|
result = subprocess.run(probe_cmd, capture_output=True, text=True, timeout=30)
|
|
if result.returncode != 0:
|
|
logger.error(f"ffprobe failed for {file_path}", module="Error")
|
|
return None, None, None, None
|
|
|
|
metadata = json.loads(result.stdout)
|
|
|
|
# Extract video stream info
|
|
video_stream = next((s for s in metadata.get('streams', []) if s.get('codec_type') == 'video'), None)
|
|
if not video_stream:
|
|
return None, None, None, None
|
|
|
|
width = video_stream.get('width')
|
|
height = video_stream.get('height')
|
|
duration = float(metadata.get('format', {}).get('duration', 0))
|
|
|
|
# Generate thumbnail - seek to 1s or 0s for very short videos
|
|
temp_output = f"/tmp/thumb_{os.getpid()}.jpg"
|
|
seek_time = '00:00:01' if duration > 1.5 else '00:00:00'
|
|
|
|
thumb_cmd = [
|
|
'ffmpeg',
|
|
'-ss', seek_time,
|
|
'-i', str(file_path),
|
|
'-vframes', '1',
|
|
'-vf', f'scale={self.max_thumb_size[0]}:{self.max_thumb_size[1]}:force_original_aspect_ratio=decrease',
|
|
'-y',
|
|
temp_output
|
|
]
|
|
|
|
result = subprocess.run(thumb_cmd, capture_output=True, timeout=30)
|
|
if result.returncode != 0 or not Path(temp_output).exists():
|
|
logger.error(f"ffmpeg thumbnail generation failed for {file_path}", module="Error")
|
|
return None, width, height, duration
|
|
|
|
# Read thumbnail data
|
|
with open(temp_output, 'rb') as f:
|
|
thumbnail_data = f.read()
|
|
|
|
# Clean up temp file
|
|
Path(temp_output).unlink(missing_ok=True)
|
|
|
|
return thumbnail_data, width, height, duration
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating video thumbnail for {file_path}: {e}", module="Error")
|
|
return None, None, None, None
|
|
|
|
def _cache_thumbnail(self, file_path: Path, thumbnail_data: bytes, content_hash: str = None):
|
|
"""Store thumbnail in cache database
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
thumbnail_data: JPEG thumbnail data
|
|
content_hash: Optional SHA256 content hash from database
|
|
"""
|
|
try:
|
|
file_hash = self._get_file_hash(file_path, content_hash)
|
|
file_mtime = file_path.stat().st_mtime
|
|
|
|
conn = sqlite3.connect(str(self.db_path), timeout=30.0)
|
|
conn.execute('PRAGMA journal_mode=WAL')
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO thumbnails
|
|
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error caching thumbnail for {file_path}: {e}", module="Error")
|
|
return False
|
|
|
|
def _cache_metadata(self, file_path: Path, width: int, height: int, duration: float = None, format_type: str = None, content_hash: str = None):
|
|
"""Store metadata in cache database
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
width: Image/video width
|
|
height: Image/video height
|
|
duration: Video duration (seconds)
|
|
format_type: Media format
|
|
content_hash: Optional SHA256 content hash from database
|
|
"""
|
|
try:
|
|
file_hash = self._get_file_hash(file_path, content_hash)
|
|
file_mtime = file_path.stat().st_mtime
|
|
file_size = file_path.stat().st_size
|
|
|
|
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
|
conn.execute('PRAGMA journal_mode=WAL')
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO media_metadata
|
|
(file_hash, file_path, width, height, file_size, duration, format, created_at, file_mtime)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (file_hash, str(file_path), width, height, file_size, duration, format_type,
|
|
datetime.now().isoformat(), file_mtime))
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error caching metadata for {file_path}: {e}", module="Error")
|
|
return False
|
|
|
|
def _is_cached_valid(self, file_path: Path, content_hash: str = None) -> bool:
|
|
"""Check if file already has valid cached thumbnail and metadata
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
content_hash: Optional SHA256 content hash from database
|
|
"""
|
|
try:
|
|
file_hash = self._get_file_hash(file_path, content_hash)
|
|
file_mtime = file_path.stat().st_mtime
|
|
|
|
# Check thumbnail cache
|
|
conn = sqlite3.connect(str(self.db_path), timeout=30.0)
|
|
conn.execute('PRAGMA journal_mode=WAL')
|
|
cursor = conn.execute(
|
|
"SELECT file_mtime FROM thumbnails WHERE file_hash = ?",
|
|
(file_hash,)
|
|
)
|
|
thumb_result = cursor.fetchone()
|
|
conn.close()
|
|
|
|
if not thumb_result or abs(thumb_result[0] - file_mtime) > 1:
|
|
return False
|
|
|
|
# Check metadata cache
|
|
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
|
conn.execute('PRAGMA journal_mode=WAL')
|
|
cursor = conn.execute(
|
|
"SELECT file_mtime FROM media_metadata WHERE file_hash = ?",
|
|
(file_hash,)
|
|
)
|
|
meta_result = cursor.fetchone()
|
|
conn.close()
|
|
|
|
if not meta_result or abs(meta_result[0] - file_mtime) > 1:
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking cache for {file_path}: {e}", module="Error")
|
|
return False
|
|
|
|
def process_file(self, file_path: Path, content_hash: str = None) -> bool:
|
|
"""Process a single file - generate thumbnail and cache metadata
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
content_hash: Optional SHA256 content hash from database (preferred for cache key)
|
|
"""
|
|
try:
|
|
if not file_path.exists():
|
|
self.stats['skipped'] += 1
|
|
return True
|
|
|
|
# Check if already cached and up-to-date
|
|
if self._is_cached_valid(file_path, content_hash):
|
|
self.stats['skipped'] += 1
|
|
return True
|
|
|
|
file_ext = file_path.suffix.lower()
|
|
|
|
if file_ext in self.image_extensions:
|
|
# Process image
|
|
thumbnail_data, width, height, format_type = self._generate_image_thumbnail(file_path)
|
|
|
|
if thumbnail_data and width and height:
|
|
# Cache thumbnail
|
|
if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
|
|
self.stats['thumbnails_created'] += 1
|
|
|
|
# Cache metadata
|
|
if self._cache_metadata(file_path, width, height, format_type=format_type, content_hash=content_hash):
|
|
self.stats['metadata_cached'] += 1
|
|
|
|
return True
|
|
else:
|
|
self.stats['errors'] += 1
|
|
return False
|
|
|
|
elif file_ext in self.video_extensions:
|
|
# Process video
|
|
thumbnail_data, width, height, duration = self._generate_video_thumbnail(file_path)
|
|
|
|
# Cache thumbnail if generated
|
|
if thumbnail_data:
|
|
if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
|
|
self.stats['thumbnails_created'] += 1
|
|
|
|
# Cache metadata if we have dimensions
|
|
if width and height:
|
|
if self._cache_metadata(file_path, width, height, duration=duration, format_type='video', content_hash=content_hash):
|
|
self.stats['metadata_cached'] += 1
|
|
|
|
# Consider successful even if thumbnail failed (metadata might still be cached)
|
|
if width and height:
|
|
return True
|
|
else:
|
|
self.stats['errors'] += 1
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing file {file_path}: {e}", module="Error")
|
|
self.stats['errors'] += 1
|
|
return False
|
|
|
|
def _get_files_from_inventory(self) -> list:
|
|
"""Query file_inventory table for all media files (database-first)
|
|
Returns: List of tuples (file_path, content_hash or None)
|
|
"""
|
|
try:
|
|
conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
# Query all files from file_inventory (any location: final, review, recycle)
|
|
# Include file_hash from recycle_bin if file is in recycle location
|
|
cursor.execute("""
|
|
SELECT
|
|
fi.file_path,
|
|
fi.content_type,
|
|
fi.location,
|
|
rb.file_hash as content_hash
|
|
FROM file_inventory fi
|
|
LEFT JOIN recycle_bin rb ON fi.file_path = rb.recycle_path
|
|
ORDER BY fi.created_date DESC
|
|
""")
|
|
|
|
rows = cursor.fetchall()
|
|
conn.close()
|
|
|
|
# Convert to Path objects and filter by extension
|
|
all_extensions = list(self.image_extensions) + list(self.video_extensions)
|
|
files = []
|
|
|
|
for row in rows:
|
|
file_path = Path(row['file_path'])
|
|
if file_path.suffix.lower() in all_extensions and file_path.exists():
|
|
# Return tuple: (file_path, content_hash or None)
|
|
content_hash = row['content_hash'] if row['content_hash'] else None
|
|
files.append((file_path, content_hash))
|
|
|
|
return files
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error querying file_inventory: {e}", module="Error")
|
|
# Fallback to filesystem scan if database query fails
|
|
logger.warning("Falling back to filesystem scan...", module="Warning")
|
|
return self._fallback_filesystem_scan()
|
|
|
|
def _fallback_filesystem_scan(self) -> list:
|
|
"""Fallback: Scan filesystem if database query fails
|
|
Returns: List of tuples (file_path, None) - no content_hash available from filesystem
|
|
"""
|
|
all_files = []
|
|
for scan_dir in self.scan_dirs:
|
|
if not scan_dir.exists():
|
|
continue
|
|
for ext in list(self.image_extensions) + list(self.video_extensions):
|
|
# Return tuples: (file_path, None) - no content hash from filesystem scan
|
|
all_files.extend([(f, None) for f in scan_dir.rglob(f"*{ext}")])
|
|
return all_files
|
|
|
|
def scan_and_process(self):
|
|
"""Query file_inventory and process all files (database-first)"""
|
|
logger.info("Starting thumbnail and metadata cache build...", module="Core")
|
|
logger.info("Querying file_inventory table (database-first architecture)...", module="Core")
|
|
|
|
start_time = time.time()
|
|
|
|
# Query file_inventory instead of scanning filesystem
|
|
# Returns list of tuples: (file_path, content_hash or None)
|
|
all_files = self._get_files_from_inventory()
|
|
|
|
total_files = len(all_files)
|
|
logger.info(f"Found {total_files} media files to process from file_inventory", module="Core")
|
|
|
|
# Count how many have content hashes (from recycle bin)
|
|
files_with_hash = sum(1 for _, content_hash in all_files if content_hash)
|
|
if files_with_hash > 0:
|
|
logger.info(f" - {files_with_hash} files have content hash (from recycle bin - cache survives moves)", module="Core")
|
|
|
|
# Process files with progress updates
|
|
for i, (file_path, content_hash) in enumerate(all_files, 1):
|
|
self.process_file(file_path, content_hash)
|
|
self.stats['processed'] += 1
|
|
|
|
# Progress update every 100 files
|
|
if i % 100 == 0 or i == total_files:
|
|
elapsed = time.time() - start_time
|
|
rate = i / elapsed if elapsed > 0 else 0
|
|
eta = (total_files - i) / rate if rate > 0 else 0
|
|
|
|
logger.info(f"Progress: {i}/{total_files} ({i/total_files*100:.1f}%) - "
|
|
f"Rate: {rate:.1f} files/sec - ETA: {eta/60:.1f} min", module="Core")
|
|
|
|
# Final statistics
|
|
elapsed = time.time() - start_time
|
|
logger.info("=" * 60, module="Core")
|
|
logger.info("Thumbnail and Metadata Cache Build Complete", module="Core")
|
|
logger.info("=" * 60, module="Core")
|
|
logger.info(f"Total files processed: {self.stats['processed']}", module="Core")
|
|
logger.info(f"Thumbnails created: {self.stats['thumbnails_created']}", module="Core")
|
|
logger.info(f"Metadata cached: {self.stats['metadata_cached']}", module="Core")
|
|
logger.info(f"Files skipped (already cached): {self.stats['skipped']}", module="Core")
|
|
logger.info(f"Errors: {self.stats['errors']}", module="Core")
|
|
logger.info(f"Total time: {elapsed/60:.1f} minutes", module="Core")
|
|
logger.info(f"Average rate: {self.stats['processed']/elapsed:.1f} files/sec", module="Core")
|
|
logger.info("=" * 60, module="Core")
|
|
|
|
def cleanup_orphaned_records(self):
|
|
"""Clean up orphaned database records for files that no longer exist"""
|
|
logger.info("Starting database cleanup for orphaned records...", module="Cleanup")
|
|
cleanup_stats = {
|
|
'face_recognition_scans': 0,
|
|
'downloads': 0,
|
|
'media_metadata': 0,
|
|
'thumbnail_cache': 0
|
|
}
|
|
|
|
conn = None
|
|
meta_conn = None
|
|
thumb_conn = None
|
|
main_conn = None
|
|
|
|
try:
|
|
# Clean up face_recognition_scans for files not in file_inventory
|
|
conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
|
cursor = conn.cursor()
|
|
|
|
# Find orphaned face_recognition_scans (files not in file_inventory)
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM face_recognition_scans frs
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM file_inventory fi WHERE fi.file_path = frs.file_path
|
|
)
|
|
""")
|
|
orphaned_count = cursor.fetchone()[0]
|
|
|
|
if orphaned_count > 0:
|
|
cursor.execute("""
|
|
DELETE FROM face_recognition_scans
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM file_inventory fi WHERE fi.file_path = face_recognition_scans.file_path
|
|
)
|
|
""")
|
|
conn.commit()
|
|
cleanup_stats['face_recognition_scans'] = orphaned_count
|
|
logger.info(f"Removed {orphaned_count} orphaned face_recognition_scans records", module="Cleanup")
|
|
|
|
# Clean up downloads for files not in file_inventory
|
|
cursor.execute("""
|
|
SELECT COUNT(*) FROM downloads d
|
|
WHERE d.file_path IS NOT NULL AND d.file_path != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM file_inventory fi WHERE fi.file_path = d.file_path
|
|
)
|
|
""")
|
|
orphaned_downloads = cursor.fetchone()[0]
|
|
|
|
if orphaned_downloads > 0:
|
|
cursor.execute("""
|
|
DELETE FROM downloads
|
|
WHERE file_path IS NOT NULL AND file_path != ''
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM file_inventory fi WHERE fi.file_path = downloads.file_path
|
|
)
|
|
""")
|
|
conn.commit()
|
|
cleanup_stats['downloads'] = orphaned_downloads
|
|
logger.info(f"Removed {orphaned_downloads} orphaned downloads records", module="Cleanup")
|
|
|
|
conn.close()
|
|
|
|
# Clean up media_metadata cache for files not in file_inventory
|
|
try:
|
|
meta_conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
|
main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
|
|
|
# Get list of valid file paths from file_inventory
|
|
main_cursor = main_conn.cursor()
|
|
main_cursor.execute("SELECT file_path FROM file_inventory")
|
|
valid_paths = set(row[0] for row in main_cursor.fetchall())
|
|
main_conn.close()
|
|
|
|
# Check metadata for orphans
|
|
meta_cursor = meta_conn.cursor()
|
|
meta_cursor.execute("SELECT file_path FROM media_metadata")
|
|
all_meta_paths = [row[0] for row in meta_cursor.fetchall()]
|
|
|
|
orphaned_meta = [p for p in all_meta_paths if p not in valid_paths]
|
|
if orphaned_meta:
|
|
placeholders = ','.join(['?' for _ in orphaned_meta])
|
|
meta_cursor.execute(f"DELETE FROM media_metadata WHERE file_path IN ({placeholders})", orphaned_meta)
|
|
meta_conn.commit()
|
|
cleanup_stats['media_metadata'] = len(orphaned_meta)
|
|
logger.info(f"Removed {len(orphaned_meta)} orphaned media_metadata records", module="Cleanup")
|
|
|
|
meta_conn.close()
|
|
except Exception:
|
|
pass # metadata cleanup is non-critical
|
|
|
|
# Clean up thumbnail cache for files not in file_inventory
|
|
thumb_db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
|
|
try:
|
|
thumb_conn = sqlite3.connect(str(thumb_db_path), timeout=30.0)
|
|
main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
|
|
|
# Get list of valid file paths
|
|
main_cursor = main_conn.cursor()
|
|
main_cursor.execute("SELECT file_path FROM file_inventory")
|
|
valid_paths = set(row[0] for row in main_cursor.fetchall())
|
|
main_conn.close()
|
|
|
|
# Check thumbnails for orphans
|
|
thumb_cursor = thumb_conn.cursor()
|
|
# Thumbnails use file_hash as key, so we need to check existence differently
|
|
try:
|
|
thumb_cursor.execute("SELECT file_path FROM thumbnails WHERE file_path IS NOT NULL")
|
|
all_thumb_paths = [row[0] for row in thumb_cursor.fetchall()]
|
|
|
|
orphaned_thumbs = [p for p in all_thumb_paths if p and p not in valid_paths]
|
|
if orphaned_thumbs:
|
|
placeholders = ','.join(['?' for _ in orphaned_thumbs])
|
|
thumb_cursor.execute(f"DELETE FROM thumbnails WHERE file_path IN ({placeholders})", orphaned_thumbs)
|
|
thumb_conn.commit()
|
|
cleanup_stats['thumbnail_cache'] = len(orphaned_thumbs)
|
|
logger.info(f"Removed {len(orphaned_thumbs)} orphaned thumbnail records", module="Cleanup")
|
|
except sqlite3.OperationalError:
|
|
# Table structure may not have file_path column
|
|
pass
|
|
|
|
thumb_conn.close()
|
|
except Exception:
|
|
pass # thumbnail cleanup is non-critical
|
|
|
|
# Log summary
|
|
total_cleaned = sum(cleanup_stats.values())
|
|
logger.info("=" * 60, module="Cleanup")
|
|
logger.info("Database Cleanup Complete", module="Cleanup")
|
|
logger.info("=" * 60, module="Cleanup")
|
|
logger.info(f"Total orphaned records removed: {total_cleaned}", module="Cleanup")
|
|
for table, count in cleanup_stats.items():
|
|
if count > 0:
|
|
logger.info(f" - {table}: {count}", module="Cleanup")
|
|
logger.info("=" * 60, module="Cleanup")
|
|
|
|
return cleanup_stats
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during database cleanup: {e}", exc_info=True, module="Error")
|
|
return cleanup_stats
|
|
finally:
|
|
# Ensure all database connections are closed
|
|
for connection in [conn, meta_conn, thumb_conn, main_conn]:
|
|
if connection:
|
|
try:
|
|
connection.close()
|
|
except Exception:
|
|
pass # Best effort cleanup
|
|
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
logger.info("Thumbnail Cache Builder starting...", module="Core")
|
|
|
|
try:
|
|
builder = ThumbnailCacheBuilder()
|
|
|
|
# Run database cleanup first (before processing)
|
|
logger.info("Phase 1: Database cleanup for orphaned records", module="Core")
|
|
builder.cleanup_orphaned_records()
|
|
|
|
# Then process thumbnails and metadata
|
|
logger.info("Phase 2: Thumbnail and metadata cache building", module="Core")
|
|
builder.scan_and_process()
|
|
|
|
logger.info("Thumbnail Cache Builder completed successfully", module="Core")
|
|
return 0
|
|
except Exception as e:
|
|
logger.error(f"Fatal error in Thumbnail Cache Builder: {e}", exc_info=True, module="Error")
|
|
return 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|