#!/usr/bin/env python3 """ Backfill missing dimensions (width/height) for files in file_inventory. Uses PIL for images and ffprobe for videos. """ import os import sys from pathlib import Path # Bootstrap PostgreSQL adapter before any sqlite3 imports sys.path.insert(0, '/opt/media-downloader') from modules.db_bootstrap import bootstrap_database bootstrap_database() import sqlite3 # Database path (routed to PostgreSQL via pgadapter) DB_PATH = "/opt/media-downloader/database/media_downloader.db" def get_image_dimensions(file_path: str) -> tuple: """Get dimensions for an image file using PIL.""" try: from PIL import Image with Image.open(file_path) as img: return img.size # (width, height) except Exception as e: print(f" PIL error for {file_path}: {e}") return None, None def get_video_dimensions(file_path: str) -> tuple: """Get dimensions for a video file using ffprobe.""" try: import subprocess result = subprocess.run( ['ffprobe', '-v', 'error', '-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'csv=p=0', file_path], capture_output=True, text=True, timeout=30 ) if result.returncode == 0 and result.stdout.strip(): parts = result.stdout.strip().split(',') if len(parts) >= 2: return int(parts[0]), int(parts[1]) except Exception as e: print(f" ffprobe error for {file_path}: {e}") return None, None def main(): if not os.path.exists(DB_PATH): print(f"Database not found: {DB_PATH}") sys.exit(1) conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row cursor = conn.cursor() # Get count of files missing dimensions cursor.execute(''' SELECT COUNT(*) FROM file_inventory WHERE (width IS NULL OR height IS NULL) AND location IN ('final', 'review') ''') total_missing = cursor.fetchone()[0] print(f"Found {total_missing} files with missing dimensions") if total_missing == 0: print("No files need dimension backfill!") conn.close() return # Process in batches batch_size = 100 processed = 0 updated = 0 errors = 0 cursor.execute(''' SELECT id, file_path, content_type FROM file_inventory WHERE (width IS NULL OR height IS NULL) AND location IN ('final', 'review') ''') update_cursor = conn.cursor() batch_updates = [] for row in cursor: file_id = row['id'] file_path = row['file_path'] content_type = row['content_type'] if not os.path.exists(file_path): processed += 1 continue width, height = None, None if content_type == 'image': width, height = get_image_dimensions(file_path) elif content_type == 'video': width, height = get_video_dimensions(file_path) else: # Try to determine from extension ext = Path(file_path).suffix.lower() image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'} if ext in image_exts: width, height = get_image_dimensions(file_path) else: width, height = get_video_dimensions(file_path) if width and height: batch_updates.append((width, height, file_id)) updated += 1 else: errors += 1 processed += 1 # Commit in batches if len(batch_updates) >= batch_size: update_cursor.executemany( 'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?', batch_updates ) conn.commit() print(f" Processed {processed}/{total_missing} files, updated {updated}, errors {errors}") batch_updates = [] # Final batch if batch_updates: update_cursor.executemany( 'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?', batch_updates ) conn.commit() print(f"\nComplete! Processed {processed} files, updated {updated} with dimensions, {errors} errors") conn.close() if __name__ == '__main__': main()