639
modules/thumbnail_cache_builder.py
Executable file
639
modules/thumbnail_cache_builder.py
Executable file
@@ -0,0 +1,639 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Background worker to pre-generate thumbnails and cache metadata for all media files.
|
||||
This improves performance by generating thumbnails in advance rather than on-demand.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
# Add parent directory to path so we can import modules
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# Bootstrap database backend (must be before any database imports)
|
||||
import modules.db_bootstrap # noqa: E402,F401
|
||||
|
||||
import sqlite3
|
||||
|
||||
from modules.universal_logger import get_logger
|
||||
|
||||
logger = get_logger('ThumbnailCacheBuilder')
|
||||
|
||||
|
||||
class ThumbnailCacheBuilder:
|
||||
"""Build and maintain thumbnail and metadata cache for media files"""
|
||||
|
||||
def __init__(self):
|
||||
self.scan_dirs = [
|
||||
Path('/opt/immich/md'),
|
||||
Path('/opt/immich/review'),
|
||||
Path('/opt/immich/recycle')
|
||||
]
|
||||
self.db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
|
||||
self.metadata_db_path = Path(__file__).parent.parent / 'database' / 'media_metadata.db'
|
||||
self.unified_db_path = Path(__file__).parent.parent / 'database' / 'media_downloader.db'
|
||||
self.max_thumb_size = (300, 300)
|
||||
|
||||
# Image and video extensions
|
||||
self.image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp'}
|
||||
self.video_extensions = {'.mp4', '.mov', '.webm', '.avi', '.mkv', '.flv', '.m4v'}
|
||||
|
||||
self.stats = {
|
||||
'processed': 0,
|
||||
'thumbnails_created': 0,
|
||||
'thumbnails_cached': 0,
|
||||
'metadata_cached': 0,
|
||||
'errors': 0,
|
||||
'skipped': 0
|
||||
}
|
||||
|
||||
self._init_metadata_db()
|
||||
|
||||
def _init_metadata_db(self):
|
||||
"""Initialize metadata cache database"""
|
||||
self.metadata_db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS media_metadata (
|
||||
file_hash TEXT PRIMARY KEY,
|
||||
file_path TEXT NOT NULL,
|
||||
width INTEGER,
|
||||
height INTEGER,
|
||||
file_size INTEGER,
|
||||
duration REAL,
|
||||
format TEXT,
|
||||
created_at TEXT,
|
||||
file_mtime DOUBLE PRECISION
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_meta_file_path ON media_metadata(file_path)")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
logger.info(f"Metadata database initialized at {self.metadata_db_path}", module="Database")
|
||||
|
||||
def _get_file_hash(self, file_path: Path, content_hash: str = None) -> str:
|
||||
"""Generate hash for file path or use content hash
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
content_hash: Optional SHA256 content hash from database (preferred for recycle bin)
|
||||
"""
|
||||
if content_hash:
|
||||
# Use first 64 chars of content hash (full SHA256 for cache key)
|
||||
return content_hash[:64]
|
||||
# Fall back to path-based hash
|
||||
return hashlib.sha256(str(file_path).encode()).hexdigest()
|
||||
|
||||
def _generate_image_thumbnail(self, file_path: Path) -> tuple:
|
||||
"""Generate thumbnail and extract metadata for image
|
||||
Returns: (thumbnail_data, width, height, format)
|
||||
"""
|
||||
try:
|
||||
with Image.open(file_path) as img:
|
||||
# Get original dimensions
|
||||
width, height = img.size
|
||||
img_format = img.format
|
||||
|
||||
# Convert RGBA to RGB if needed
|
||||
if img.mode == 'RGBA':
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
background.paste(img, mask=img.split()[3])
|
||||
img = background
|
||||
elif img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
# Generate thumbnail
|
||||
img.thumbnail(self.max_thumb_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Save to bytes
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85, optimize=True)
|
||||
thumbnail_data = buffer.getvalue()
|
||||
|
||||
return thumbnail_data, width, height, img_format
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating image thumbnail for {file_path}: {e}", module="Error")
|
||||
return None, None, None, None
|
||||
|
||||
def _generate_video_thumbnail(self, file_path: Path) -> tuple:
|
||||
"""Generate thumbnail and extract metadata for video using ffmpeg
|
||||
Returns: (thumbnail_data, width, height, duration)
|
||||
"""
|
||||
try:
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
# Get video metadata using ffprobe
|
||||
probe_cmd = [
|
||||
'ffprobe',
|
||||
'-v', 'quiet',
|
||||
'-print_format', 'json',
|
||||
'-show_format',
|
||||
'-show_streams',
|
||||
str(file_path)
|
||||
]
|
||||
|
||||
result = subprocess.run(probe_cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"ffprobe failed for {file_path}", module="Error")
|
||||
return None, None, None, None
|
||||
|
||||
metadata = json.loads(result.stdout)
|
||||
|
||||
# Extract video stream info
|
||||
video_stream = next((s for s in metadata.get('streams', []) if s.get('codec_type') == 'video'), None)
|
||||
if not video_stream:
|
||||
return None, None, None, None
|
||||
|
||||
width = video_stream.get('width')
|
||||
height = video_stream.get('height')
|
||||
duration = float(metadata.get('format', {}).get('duration', 0))
|
||||
|
||||
# Generate thumbnail - seek to 1s or 0s for very short videos
|
||||
temp_output = f"/tmp/thumb_{os.getpid()}.jpg"
|
||||
seek_time = '00:00:01' if duration > 1.5 else '00:00:00'
|
||||
|
||||
thumb_cmd = [
|
||||
'ffmpeg',
|
||||
'-ss', seek_time,
|
||||
'-i', str(file_path),
|
||||
'-vframes', '1',
|
||||
'-vf', f'scale={self.max_thumb_size[0]}:{self.max_thumb_size[1]}:force_original_aspect_ratio=decrease',
|
||||
'-y',
|
||||
temp_output
|
||||
]
|
||||
|
||||
result = subprocess.run(thumb_cmd, capture_output=True, timeout=30)
|
||||
if result.returncode != 0 or not Path(temp_output).exists():
|
||||
logger.error(f"ffmpeg thumbnail generation failed for {file_path}", module="Error")
|
||||
return None, width, height, duration
|
||||
|
||||
# Read thumbnail data
|
||||
with open(temp_output, 'rb') as f:
|
||||
thumbnail_data = f.read()
|
||||
|
||||
# Clean up temp file
|
||||
Path(temp_output).unlink(missing_ok=True)
|
||||
|
||||
return thumbnail_data, width, height, duration
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating video thumbnail for {file_path}: {e}", module="Error")
|
||||
return None, None, None, None
|
||||
|
||||
def _cache_thumbnail(self, file_path: Path, thumbnail_data: bytes, content_hash: str = None):
|
||||
"""Store thumbnail in cache database
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
thumbnail_data: JPEG thumbnail data
|
||||
content_hash: Optional SHA256 content hash from database
|
||||
"""
|
||||
try:
|
||||
file_hash = self._get_file_hash(file_path, content_hash)
|
||||
file_mtime = file_path.stat().st_mtime
|
||||
|
||||
conn = sqlite3.connect(str(self.db_path), timeout=30.0)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO thumbnails
|
||||
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error caching thumbnail for {file_path}: {e}", module="Error")
|
||||
return False
|
||||
|
||||
def _cache_metadata(self, file_path: Path, width: int, height: int, duration: float = None, format_type: str = None, content_hash: str = None):
|
||||
"""Store metadata in cache database
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
width: Image/video width
|
||||
height: Image/video height
|
||||
duration: Video duration (seconds)
|
||||
format_type: Media format
|
||||
content_hash: Optional SHA256 content hash from database
|
||||
"""
|
||||
try:
|
||||
file_hash = self._get_file_hash(file_path, content_hash)
|
||||
file_mtime = file_path.stat().st_mtime
|
||||
file_size = file_path.stat().st_size
|
||||
|
||||
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO media_metadata
|
||||
(file_hash, file_path, width, height, file_size, duration, format, created_at, file_mtime)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (file_hash, str(file_path), width, height, file_size, duration, format_type,
|
||||
datetime.now().isoformat(), file_mtime))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error caching metadata for {file_path}: {e}", module="Error")
|
||||
return False
|
||||
|
||||
def _is_cached_valid(self, file_path: Path, content_hash: str = None) -> bool:
|
||||
"""Check if file already has valid cached thumbnail and metadata
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
content_hash: Optional SHA256 content hash from database
|
||||
"""
|
||||
try:
|
||||
file_hash = self._get_file_hash(file_path, content_hash)
|
||||
file_mtime = file_path.stat().st_mtime
|
||||
|
||||
# Check thumbnail cache
|
||||
conn = sqlite3.connect(str(self.db_path), timeout=30.0)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
cursor = conn.execute(
|
||||
"SELECT file_mtime FROM thumbnails WHERE file_hash = ?",
|
||||
(file_hash,)
|
||||
)
|
||||
thumb_result = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
if not thumb_result or abs(thumb_result[0] - file_mtime) > 1:
|
||||
return False
|
||||
|
||||
# Check metadata cache
|
||||
conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
cursor = conn.execute(
|
||||
"SELECT file_mtime FROM media_metadata WHERE file_hash = ?",
|
||||
(file_hash,)
|
||||
)
|
||||
meta_result = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
if not meta_result or abs(meta_result[0] - file_mtime) > 1:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking cache for {file_path}: {e}", module="Error")
|
||||
return False
|
||||
|
||||
def process_file(self, file_path: Path, content_hash: str = None) -> bool:
|
||||
"""Process a single file - generate thumbnail and cache metadata
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
content_hash: Optional SHA256 content hash from database (preferred for cache key)
|
||||
"""
|
||||
try:
|
||||
if not file_path.exists():
|
||||
self.stats['skipped'] += 1
|
||||
return True
|
||||
|
||||
# Check if already cached and up-to-date
|
||||
if self._is_cached_valid(file_path, content_hash):
|
||||
self.stats['skipped'] += 1
|
||||
return True
|
||||
|
||||
file_ext = file_path.suffix.lower()
|
||||
|
||||
if file_ext in self.image_extensions:
|
||||
# Process image
|
||||
thumbnail_data, width, height, format_type = self._generate_image_thumbnail(file_path)
|
||||
|
||||
if thumbnail_data and width and height:
|
||||
# Cache thumbnail
|
||||
if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
|
||||
self.stats['thumbnails_created'] += 1
|
||||
|
||||
# Cache metadata
|
||||
if self._cache_metadata(file_path, width, height, format_type=format_type, content_hash=content_hash):
|
||||
self.stats['metadata_cached'] += 1
|
||||
|
||||
return True
|
||||
else:
|
||||
self.stats['errors'] += 1
|
||||
return False
|
||||
|
||||
elif file_ext in self.video_extensions:
|
||||
# Process video
|
||||
thumbnail_data, width, height, duration = self._generate_video_thumbnail(file_path)
|
||||
|
||||
# Cache thumbnail if generated
|
||||
if thumbnail_data:
|
||||
if self._cache_thumbnail(file_path, thumbnail_data, content_hash):
|
||||
self.stats['thumbnails_created'] += 1
|
||||
|
||||
# Cache metadata if we have dimensions
|
||||
if width and height:
|
||||
if self._cache_metadata(file_path, width, height, duration=duration, format_type='video', content_hash=content_hash):
|
||||
self.stats['metadata_cached'] += 1
|
||||
|
||||
# Consider successful even if thumbnail failed (metadata might still be cached)
|
||||
if width and height:
|
||||
return True
|
||||
else:
|
||||
self.stats['errors'] += 1
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file {file_path}: {e}", module="Error")
|
||||
self.stats['errors'] += 1
|
||||
return False
|
||||
|
||||
def _get_files_from_inventory(self) -> list:
|
||||
"""Query file_inventory table for all media files (database-first)
|
||||
Returns: List of tuples (file_path, content_hash or None)
|
||||
"""
|
||||
try:
|
||||
conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Query all files from file_inventory (any location: final, review, recycle)
|
||||
# Include file_hash from recycle_bin if file is in recycle location
|
||||
cursor.execute("""
|
||||
SELECT
|
||||
fi.file_path,
|
||||
fi.content_type,
|
||||
fi.location,
|
||||
rb.file_hash as content_hash
|
||||
FROM file_inventory fi
|
||||
LEFT JOIN recycle_bin rb ON fi.file_path = rb.recycle_path
|
||||
ORDER BY fi.created_date DESC
|
||||
""")
|
||||
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
# Convert to Path objects and filter by extension
|
||||
all_extensions = list(self.image_extensions) + list(self.video_extensions)
|
||||
files = []
|
||||
|
||||
for row in rows:
|
||||
file_path = Path(row['file_path'])
|
||||
if file_path.suffix.lower() in all_extensions and file_path.exists():
|
||||
# Return tuple: (file_path, content_hash or None)
|
||||
content_hash = row['content_hash'] if row['content_hash'] else None
|
||||
files.append((file_path, content_hash))
|
||||
|
||||
return files
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying file_inventory: {e}", module="Error")
|
||||
# Fallback to filesystem scan if database query fails
|
||||
logger.warning("Falling back to filesystem scan...", module="Warning")
|
||||
return self._fallback_filesystem_scan()
|
||||
|
||||
def _fallback_filesystem_scan(self) -> list:
|
||||
"""Fallback: Scan filesystem if database query fails
|
||||
Returns: List of tuples (file_path, None) - no content_hash available from filesystem
|
||||
"""
|
||||
all_files = []
|
||||
for scan_dir in self.scan_dirs:
|
||||
if not scan_dir.exists():
|
||||
continue
|
||||
for ext in list(self.image_extensions) + list(self.video_extensions):
|
||||
# Return tuples: (file_path, None) - no content hash from filesystem scan
|
||||
all_files.extend([(f, None) for f in scan_dir.rglob(f"*{ext}")])
|
||||
return all_files
|
||||
|
||||
def scan_and_process(self):
|
||||
"""Query file_inventory and process all files (database-first)"""
|
||||
logger.info("Starting thumbnail and metadata cache build...", module="Core")
|
||||
logger.info("Querying file_inventory table (database-first architecture)...", module="Core")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Query file_inventory instead of scanning filesystem
|
||||
# Returns list of tuples: (file_path, content_hash or None)
|
||||
all_files = self._get_files_from_inventory()
|
||||
|
||||
total_files = len(all_files)
|
||||
logger.info(f"Found {total_files} media files to process from file_inventory", module="Core")
|
||||
|
||||
# Count how many have content hashes (from recycle bin)
|
||||
files_with_hash = sum(1 for _, content_hash in all_files if content_hash)
|
||||
if files_with_hash > 0:
|
||||
logger.info(f" - {files_with_hash} files have content hash (from recycle bin - cache survives moves)", module="Core")
|
||||
|
||||
# Process files with progress updates
|
||||
for i, (file_path, content_hash) in enumerate(all_files, 1):
|
||||
self.process_file(file_path, content_hash)
|
||||
self.stats['processed'] += 1
|
||||
|
||||
# Progress update every 100 files
|
||||
if i % 100 == 0 or i == total_files:
|
||||
elapsed = time.time() - start_time
|
||||
rate = i / elapsed if elapsed > 0 else 0
|
||||
eta = (total_files - i) / rate if rate > 0 else 0
|
||||
|
||||
logger.info(f"Progress: {i}/{total_files} ({i/total_files*100:.1f}%) - "
|
||||
f"Rate: {rate:.1f} files/sec - ETA: {eta/60:.1f} min", module="Core")
|
||||
|
||||
# Final statistics
|
||||
elapsed = time.time() - start_time
|
||||
logger.info("=" * 60, module="Core")
|
||||
logger.info("Thumbnail and Metadata Cache Build Complete", module="Core")
|
||||
logger.info("=" * 60, module="Core")
|
||||
logger.info(f"Total files processed: {self.stats['processed']}", module="Core")
|
||||
logger.info(f"Thumbnails created: {self.stats['thumbnails_created']}", module="Core")
|
||||
logger.info(f"Metadata cached: {self.stats['metadata_cached']}", module="Core")
|
||||
logger.info(f"Files skipped (already cached): {self.stats['skipped']}", module="Core")
|
||||
logger.info(f"Errors: {self.stats['errors']}", module="Core")
|
||||
logger.info(f"Total time: {elapsed/60:.1f} minutes", module="Core")
|
||||
logger.info(f"Average rate: {self.stats['processed']/elapsed:.1f} files/sec", module="Core")
|
||||
logger.info("=" * 60, module="Core")
|
||||
|
||||
def cleanup_orphaned_records(self):
|
||||
"""Clean up orphaned database records for files that no longer exist"""
|
||||
logger.info("Starting database cleanup for orphaned records...", module="Cleanup")
|
||||
cleanup_stats = {
|
||||
'face_recognition_scans': 0,
|
||||
'downloads': 0,
|
||||
'media_metadata': 0,
|
||||
'thumbnail_cache': 0
|
||||
}
|
||||
|
||||
conn = None
|
||||
meta_conn = None
|
||||
thumb_conn = None
|
||||
main_conn = None
|
||||
|
||||
try:
|
||||
# Clean up face_recognition_scans for files not in file_inventory
|
||||
conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find orphaned face_recognition_scans (files not in file_inventory)
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM face_recognition_scans frs
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM file_inventory fi WHERE fi.file_path = frs.file_path
|
||||
)
|
||||
""")
|
||||
orphaned_count = cursor.fetchone()[0]
|
||||
|
||||
if orphaned_count > 0:
|
||||
cursor.execute("""
|
||||
DELETE FROM face_recognition_scans
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM file_inventory fi WHERE fi.file_path = face_recognition_scans.file_path
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
cleanup_stats['face_recognition_scans'] = orphaned_count
|
||||
logger.info(f"Removed {orphaned_count} orphaned face_recognition_scans records", module="Cleanup")
|
||||
|
||||
# Clean up downloads for files not in file_inventory
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) FROM downloads d
|
||||
WHERE d.file_path IS NOT NULL AND d.file_path != ''
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM file_inventory fi WHERE fi.file_path = d.file_path
|
||||
)
|
||||
""")
|
||||
orphaned_downloads = cursor.fetchone()[0]
|
||||
|
||||
if orphaned_downloads > 0:
|
||||
cursor.execute("""
|
||||
DELETE FROM downloads
|
||||
WHERE file_path IS NOT NULL AND file_path != ''
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM file_inventory fi WHERE fi.file_path = downloads.file_path
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
cleanup_stats['downloads'] = orphaned_downloads
|
||||
logger.info(f"Removed {orphaned_downloads} orphaned downloads records", module="Cleanup")
|
||||
|
||||
conn.close()
|
||||
|
||||
# Clean up media_metadata cache for files not in file_inventory
|
||||
try:
|
||||
meta_conn = sqlite3.connect(str(self.metadata_db_path), timeout=30.0)
|
||||
main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
||||
|
||||
# Get list of valid file paths from file_inventory
|
||||
main_cursor = main_conn.cursor()
|
||||
main_cursor.execute("SELECT file_path FROM file_inventory")
|
||||
valid_paths = set(row[0] for row in main_cursor.fetchall())
|
||||
main_conn.close()
|
||||
|
||||
# Check metadata for orphans
|
||||
meta_cursor = meta_conn.cursor()
|
||||
meta_cursor.execute("SELECT file_path FROM media_metadata")
|
||||
all_meta_paths = [row[0] for row in meta_cursor.fetchall()]
|
||||
|
||||
orphaned_meta = [p for p in all_meta_paths if p not in valid_paths]
|
||||
if orphaned_meta:
|
||||
placeholders = ','.join(['?' for _ in orphaned_meta])
|
||||
meta_cursor.execute(f"DELETE FROM media_metadata WHERE file_path IN ({placeholders})", orphaned_meta)
|
||||
meta_conn.commit()
|
||||
cleanup_stats['media_metadata'] = len(orphaned_meta)
|
||||
logger.info(f"Removed {len(orphaned_meta)} orphaned media_metadata records", module="Cleanup")
|
||||
|
||||
meta_conn.close()
|
||||
except Exception:
|
||||
pass # metadata cleanup is non-critical
|
||||
|
||||
# Clean up thumbnail cache for files not in file_inventory
|
||||
thumb_db_path = Path(__file__).parent.parent / 'database' / 'thumbnails.db'
|
||||
try:
|
||||
thumb_conn = sqlite3.connect(str(thumb_db_path), timeout=30.0)
|
||||
main_conn = sqlite3.connect(str(self.unified_db_path), timeout=30.0)
|
||||
|
||||
# Get list of valid file paths
|
||||
main_cursor = main_conn.cursor()
|
||||
main_cursor.execute("SELECT file_path FROM file_inventory")
|
||||
valid_paths = set(row[0] for row in main_cursor.fetchall())
|
||||
main_conn.close()
|
||||
|
||||
# Check thumbnails for orphans
|
||||
thumb_cursor = thumb_conn.cursor()
|
||||
# Thumbnails use file_hash as key, so we need to check existence differently
|
||||
try:
|
||||
thumb_cursor.execute("SELECT file_path FROM thumbnails WHERE file_path IS NOT NULL")
|
||||
all_thumb_paths = [row[0] for row in thumb_cursor.fetchall()]
|
||||
|
||||
orphaned_thumbs = [p for p in all_thumb_paths if p and p not in valid_paths]
|
||||
if orphaned_thumbs:
|
||||
placeholders = ','.join(['?' for _ in orphaned_thumbs])
|
||||
thumb_cursor.execute(f"DELETE FROM thumbnails WHERE file_path IN ({placeholders})", orphaned_thumbs)
|
||||
thumb_conn.commit()
|
||||
cleanup_stats['thumbnail_cache'] = len(orphaned_thumbs)
|
||||
logger.info(f"Removed {len(orphaned_thumbs)} orphaned thumbnail records", module="Cleanup")
|
||||
except sqlite3.OperationalError:
|
||||
# Table structure may not have file_path column
|
||||
pass
|
||||
|
||||
thumb_conn.close()
|
||||
except Exception:
|
||||
pass # thumbnail cleanup is non-critical
|
||||
|
||||
# Log summary
|
||||
total_cleaned = sum(cleanup_stats.values())
|
||||
logger.info("=" * 60, module="Cleanup")
|
||||
logger.info("Database Cleanup Complete", module="Cleanup")
|
||||
logger.info("=" * 60, module="Cleanup")
|
||||
logger.info(f"Total orphaned records removed: {total_cleaned}", module="Cleanup")
|
||||
for table, count in cleanup_stats.items():
|
||||
if count > 0:
|
||||
logger.info(f" - {table}: {count}", module="Cleanup")
|
||||
logger.info("=" * 60, module="Cleanup")
|
||||
|
||||
return cleanup_stats
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during database cleanup: {e}", exc_info=True, module="Error")
|
||||
return cleanup_stats
|
||||
finally:
|
||||
# Ensure all database connections are closed
|
||||
for connection in [conn, meta_conn, thumb_conn, main_conn]:
|
||||
if connection:
|
||||
try:
|
||||
connection.close()
|
||||
except Exception:
|
||||
pass # Best effort cleanup
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
logger.info("Thumbnail Cache Builder starting...", module="Core")
|
||||
|
||||
try:
|
||||
builder = ThumbnailCacheBuilder()
|
||||
|
||||
# Run database cleanup first (before processing)
|
||||
logger.info("Phase 1: Database cleanup for orphaned records", module="Core")
|
||||
builder.cleanup_orphaned_records()
|
||||
|
||||
# Then process thumbnails and metadata
|
||||
logger.info("Phase 2: Thumbnail and metadata cache building", module="Core")
|
||||
builder.scan_and_process()
|
||||
|
||||
logger.info("Thumbnail Cache Builder completed successfully", module="Core")
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Fatal error in Thumbnail Cache Builder: {e}", exc_info=True, module="Error")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user