146 lines
4.2 KiB
Python
Executable File
146 lines
4.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Backfill missing dimensions (width/height) for files in file_inventory.
|
|
|
|
Uses PIL for images and ffprobe for videos.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Bootstrap PostgreSQL adapter before any sqlite3 imports
|
|
sys.path.insert(0, '/opt/media-downloader')
|
|
from modules.db_bootstrap import bootstrap_database
|
|
bootstrap_database()
|
|
import sqlite3
|
|
|
|
# Database path (routed to PostgreSQL via pgadapter)
|
|
DB_PATH = "/opt/media-downloader/database/media_downloader.db"
|
|
|
|
|
|
def get_image_dimensions(file_path: str) -> tuple:
|
|
"""Get dimensions for an image file using PIL."""
|
|
try:
|
|
from PIL import Image
|
|
with Image.open(file_path) as img:
|
|
return img.size # (width, height)
|
|
except Exception as e:
|
|
print(f" PIL error for {file_path}: {e}")
|
|
return None, None
|
|
|
|
|
|
def get_video_dimensions(file_path: str) -> tuple:
|
|
"""Get dimensions for a video file using ffprobe."""
|
|
try:
|
|
import subprocess
|
|
result = subprocess.run(
|
|
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
|
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
|
|
file_path],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
parts = result.stdout.strip().split(',')
|
|
if len(parts) >= 2:
|
|
return int(parts[0]), int(parts[1])
|
|
except Exception as e:
|
|
print(f" ffprobe error for {file_path}: {e}")
|
|
return None, None
|
|
|
|
|
|
def main():
|
|
if not os.path.exists(DB_PATH):
|
|
print(f"Database not found: {DB_PATH}")
|
|
sys.exit(1)
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
# Get count of files missing dimensions
|
|
cursor.execute('''
|
|
SELECT COUNT(*) FROM file_inventory
|
|
WHERE (width IS NULL OR height IS NULL)
|
|
AND location IN ('final', 'review')
|
|
''')
|
|
total_missing = cursor.fetchone()[0]
|
|
print(f"Found {total_missing} files with missing dimensions")
|
|
|
|
if total_missing == 0:
|
|
print("No files need dimension backfill!")
|
|
conn.close()
|
|
return
|
|
|
|
# Process in batches
|
|
batch_size = 100
|
|
processed = 0
|
|
updated = 0
|
|
errors = 0
|
|
|
|
cursor.execute('''
|
|
SELECT id, file_path, content_type FROM file_inventory
|
|
WHERE (width IS NULL OR height IS NULL)
|
|
AND location IN ('final', 'review')
|
|
''')
|
|
|
|
update_cursor = conn.cursor()
|
|
batch_updates = []
|
|
|
|
for row in cursor:
|
|
file_id = row['id']
|
|
file_path = row['file_path']
|
|
content_type = row['content_type']
|
|
|
|
if not os.path.exists(file_path):
|
|
processed += 1
|
|
continue
|
|
|
|
width, height = None, None
|
|
|
|
if content_type == 'image':
|
|
width, height = get_image_dimensions(file_path)
|
|
elif content_type == 'video':
|
|
width, height = get_video_dimensions(file_path)
|
|
else:
|
|
# Try to determine from extension
|
|
ext = Path(file_path).suffix.lower()
|
|
image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.heic', '.heif', '.webp', '.bmp', '.tiff'}
|
|
if ext in image_exts:
|
|
width, height = get_image_dimensions(file_path)
|
|
else:
|
|
width, height = get_video_dimensions(file_path)
|
|
|
|
if width and height:
|
|
batch_updates.append((width, height, file_id))
|
|
updated += 1
|
|
else:
|
|
errors += 1
|
|
|
|
processed += 1
|
|
|
|
# Commit in batches
|
|
if len(batch_updates) >= batch_size:
|
|
update_cursor.executemany(
|
|
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
|
batch_updates
|
|
)
|
|
conn.commit()
|
|
print(f" Processed {processed}/{total_missing} files, updated {updated}, errors {errors}")
|
|
batch_updates = []
|
|
|
|
# Final batch
|
|
if batch_updates:
|
|
update_cursor.executemany(
|
|
'UPDATE file_inventory SET width = ?, height = ? WHERE id = ?',
|
|
batch_updates
|
|
)
|
|
conn.commit()
|
|
|
|
print(f"\nComplete! Processed {processed} files, updated {updated} with dimensions, {errors} errors")
|
|
conn.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|