Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

145
scripts/backfill_dimensions.py Executable file
View File

@@ -0,0 +1,145 @@
#!/usr/bin/env python3
"""
Backfill missing dimensions (width/height) for files in file_inventory.
Uses PIL for images and ffprobe for videos.
"""
import os
import sys
from pathlib import Path
# Bootstrap PostgreSQL adapter before any sqlite3 imports
sys.path.insert(0, '/opt/media-downloader')
from modules.db_bootstrap import bootstrap_database
bootstrap_database()
import sqlite3
# Database path (routed to PostgreSQL via pgadapter)
DB_PATH = "/opt/media-downloader/database/media_downloader.db"
def get_image_dimensions(file_path: str) -> tuple:
"""Get dimensions for an image file using PIL."""
try:
from PIL import Image
with Image.open(file_path) as img:
return img.size # (width, height)
except Exception as e:
print(f" PIL error for {file_path}: {e}")
return None, None
def get_video_dimensions(file_path: str) -> tuple:
"""Get dimensions for a video file using ffprobe."""
try:
import subprocess
result = subprocess.run(
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
file_path],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0 and result.stdout.strip():
parts = result.stdout.strip().split(',')
if len(parts) >= 2:
return int(parts[0]), int(parts[1])
except Exception as e:
print(f" ffprobe error for {file_path}: {e}")
return None, None
def main():
if not os.path.exists(DB_PATH):
print(f"Database not found: {DB_PATH}")
sys.exit(1)
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
# Get count of files missing dimensions
cursor.execute('''
SELECT COUNT(*) FROM file_inventory
WHERE (width IS NULL OR height IS NULL)
AND location IN ('final', 'review')
''')
total_missing = cursor.fetchone()[0]
print(f"Found {total_missing} files with missing dimensions")
if total_missing == 0:
print("No files need dimension backfill!")
conn.close()
return
# Process in batches
batch_size = 100
processed = 0
updated = 0
errors = 0
cursor.execute('''
SELECT id, file_path, content_type FROM file_inventory
WHERE (width IS NULL OR height IS NULL)
AND location IN ('final', 'review')
''')
update_cursor = conn.cursor()
batch_updates = []
for row in cursor:
file_id = row['id']
file_path = row['file_path']
content_type = row['content_type']
if not os.path.exists(file_path):
processed += 1
continue
width, height = None, None
if content_type == 'image':
width, height = get_image_dimensions(file_path)
elif content_type == 'video':
width, height = get_video_dimensions(file_path)
else:
# Try to determine from extension
ext = Path(file_path).suffix.lower()
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
if ext in image_exts:
width, height = get_image_dimensions(file_path)
else:
width, height = get_video_dimensions(file_path)
if width and height:
batch_updates.append((width, height, file_id))
updated += 1
else:
errors += 1
processed += 1
# Commit in batches
if len(batch_updates) >= batch_size:
update_cursor.executemany(
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
batch_updates
)
conn.commit()
print(f" Processed {processed}/{total_missing} files, updated {updated}, errors {errors}")
batch_updates = []
# Final batch
if batch_updates:
update_cursor.executemany(
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
batch_updates
)
conn.commit()
print(f"\nComplete! Processed {processed} files, updated {updated} with dimensions, {errors} errors")
conn.close()
if __name__ == '__main__':
main()