145
scripts/backfill_dimensions.py
Executable file
145
scripts/backfill_dimensions.py
Executable file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill missing dimensions (width/height) for files in file_inventory.
|
||||
|
||||
Uses PIL for images and ffprobe for videos.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Bootstrap PostgreSQL adapter before any sqlite3 imports
|
||||
sys.path.insert(0, '/opt/media-downloader')
|
||||
from modules.db_bootstrap import bootstrap_database
|
||||
bootstrap_database()
|
||||
import sqlite3
|
||||
|
||||
# Database path (routed to PostgreSQL via pgadapter)
|
||||
DB_PATH = "/opt/media-downloader/database/media_downloader.db"
|
||||
|
||||
|
||||
def get_image_dimensions(file_path: str) -> tuple:
|
||||
"""Get dimensions for an image file using PIL."""
|
||||
try:
|
||||
from PIL import Image
|
||||
with Image.open(file_path) as img:
|
||||
return img.size # (width, height)
|
||||
except Exception as e:
|
||||
print(f" PIL error for {file_path}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def get_video_dimensions(file_path: str) -> tuple:
|
||||
"""Get dimensions for a video file using ffprobe."""
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
||||
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
|
||||
file_path],
|
||||
capture_output=True, text=True, timeout=30
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
parts = result.stdout.strip().split(',')
|
||||
if len(parts) >= 2:
|
||||
return int(parts[0]), int(parts[1])
|
||||
except Exception as e:
|
||||
print(f" ffprobe error for {file_path}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def main():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Database not found: {DB_PATH}")
|
||||
sys.exit(1)
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get count of files missing dimensions
|
||||
cursor.execute('''
|
||||
SELECT COUNT(*) FROM file_inventory
|
||||
WHERE (width IS NULL OR height IS NULL)
|
||||
AND location IN ('final', 'review')
|
||||
''')
|
||||
total_missing = cursor.fetchone()[0]
|
||||
print(f"Found {total_missing} files with missing dimensions")
|
||||
|
||||
if total_missing == 0:
|
||||
print("No files need dimension backfill!")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# Process in batches
|
||||
batch_size = 100
|
||||
processed = 0
|
||||
updated = 0
|
||||
errors = 0
|
||||
|
||||
cursor.execute('''
|
||||
SELECT id, file_path, content_type FROM file_inventory
|
||||
WHERE (width IS NULL OR height IS NULL)
|
||||
AND location IN ('final', 'review')
|
||||
''')
|
||||
|
||||
update_cursor = conn.cursor()
|
||||
batch_updates = []
|
||||
|
||||
for row in cursor:
|
||||
file_id = row['id']
|
||||
file_path = row['file_path']
|
||||
content_type = row['content_type']
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
processed += 1
|
||||
continue
|
||||
|
||||
width, height = None, None
|
||||
|
||||
if content_type == 'image':
|
||||
width, height = get_image_dimensions(file_path)
|
||||
elif content_type == 'video':
|
||||
width, height = get_video_dimensions(file_path)
|
||||
else:
|
||||
# Try to determine from extension
|
||||
ext = Path(file_path).suffix.lower()
|
||||
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
|
||||
if ext in image_exts:
|
||||
width, height = get_image_dimensions(file_path)
|
||||
else:
|
||||
width, height = get_video_dimensions(file_path)
|
||||
|
||||
if width and height:
|
||||
batch_updates.append((width, height, file_id))
|
||||
updated += 1
|
||||
else:
|
||||
errors += 1
|
||||
|
||||
processed += 1
|
||||
|
||||
# Commit in batches
|
||||
if len(batch_updates) >= batch_size:
|
||||
update_cursor.executemany(
|
||||
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
||||
batch_updates
|
||||
)
|
||||
conn.commit()
|
||||
print(f" Processed {processed}/{total_missing} files, updated {updated}, errors {errors}")
|
||||
batch_updates = []
|
||||
|
||||
# Final batch
|
||||
if batch_updates:
|
||||
update_cursor.executemany(
|
||||
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
||||
batch_updates
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
print(f"\nComplete! Processed {processed} files, updated {updated} with dimensions, {errors} errors")
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user