#!/usr/bin/env python3 """ Bulk pre-generate thumbnails for file_inventory entries that don't have cached thumbnails. Targets /opt/immich/el/ and /opt/immich/elv/ paths (Immich migration). Uses multiprocessing to generate thumbnails in parallel. """ import hashlib import io import sqlite3 import subprocess import sys import time from concurrent.futures import ProcessPoolExecutor, as_completed from pathlib import Path from PIL import Image THUMB_DB = "/opt/media-downloader/database/thumbnails.db" APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader" MAX_SIZE = (300, 300) WORKERS = 6 BATCH_SIZE = 200 def generate_image_thumbnail(file_path, max_size=(300, 300)): try: img = Image.open(file_path) img.thumbnail(max_size, Image.Resampling.LANCZOS) if img.mode in ('RGBA', 'LA', 'P'): background = Image.new('RGB', img.size, (255, 255, 255)) if img.mode == 'P': img = img.convert('RGBA') background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None) img = background buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=85) return buffer.getvalue() except Exception: return None def generate_video_thumbnail(file_path, max_size=(300, 300)): for seek_time in ['00:00:01.000', '00:00:00.000']: try: result = subprocess.run([ 'ffmpeg', '-ss', seek_time, '-i', str(file_path), '-vframes', '1', '-f', 'image2pipe', '-vcodec', 'mjpeg', '-' ], capture_output=True, timeout=30) if result.returncode != 0 or not result.stdout: continue img = Image.open(io.BytesIO(result.stdout)) img.thumbnail(max_size, Image.Resampling.LANCZOS) buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=85) return buffer.getvalue() except Exception: continue return None def process_file(args): """Generate thumbnail for a single file. Runs in worker process.""" file_path, content_type, file_hash = args p = Path(file_path) if not p.exists(): return (file_hash, file_path, None, 0, 'missing') try: mtime = p.stat().st_mtime except OSError: mtime = 0 if content_type == 'video': data = generate_video_thumbnail(p, MAX_SIZE) else: data = generate_image_thumbnail(p, MAX_SIZE) if data: return (file_hash, file_path, data, mtime, 'ok') return (file_hash, file_path, None, mtime, 'failed') def get_files_needing_thumbnails(): """Query file_inventory for el/elv files, check which lack thumbnails.""" import psycopg2 conn = psycopg2.connect(APP_DB_DSN) cur = conn.cursor() cur.execute(""" SELECT file_path, content_type, file_hash FROM file_inventory WHERE (file_path LIKE '/opt/immich/el/%%' OR file_path LIKE '/opt/immich/elv/%%') AND location = 'final' ORDER BY id """) all_files = cur.fetchall() cur.close() conn.close() # Check which already have thumbnails thumb_conn = sqlite3.connect(THUMB_DB, timeout=30) thumb_cur = thumb_conn.cursor() # Get all existing thumbnail hashes in one query thumb_cur.execute("SELECT file_hash FROM thumbnails") existing_hashes = set(row[0] for row in thumb_cur.fetchall()) thumb_conn.close() needed = [] for file_path, content_type, file_hash in all_files: # Use content hash if available, else path hash cache_key = file_hash if file_hash else hashlib.sha256(file_path.encode()).hexdigest() if cache_key not in existing_hashes: needed.append((file_path, content_type, cache_key)) return needed def save_batch(results): """Save a batch of thumbnails to the DB.""" from datetime import datetime, timezone now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S') conn = sqlite3.connect(THUMB_DB, timeout=60) saved = 0 for file_hash, file_path, data, mtime, status in results: if data: conn.execute(""" INSERT OR REPLACE INTO thumbnails (file_hash, file_path, thumbnail_data, created_at, file_mtime) VALUES (?, ?, ?, ?, ?) """, (file_hash, file_path, data, now, mtime)) saved += 1 conn.commit() conn.close() return saved def main(): start = time.time() print("Bulk thumbnail pre-generation") print("=" * 60) print("Finding files needing thumbnails...") needed = get_files_needing_thumbnails() total = len(needed) print(f" {total:,} files need thumbnails") if total == 0: print(" Nothing to do!") return generated = 0 failed = 0 missing = 0 batch_results = [] with ProcessPoolExecutor(max_workers=WORKERS) as executor: futures = {executor.submit(process_file, item): item for item in needed} for i, future in enumerate(as_completed(futures), 1): result = future.result() batch_results.append(result) status = result[4] if status == 'ok': generated += 1 elif status == 'missing': missing += 1 else: failed += 1 if len(batch_results) >= BATCH_SIZE: save_batch(batch_results) batch_results = [] elapsed = time.time() - start rate = i / elapsed if elapsed > 0 else 0 eta = (total - i) / rate if rate > 0 else 0 print(f" {i:,}/{total:,} ({generated:,} ok, {failed:,} failed, {missing:,} missing) " f"[{rate:.0f}/s, ETA {eta:.0f}s]") # Final batch if batch_results: save_batch(batch_results) elapsed = time.time() - start print(f"\nDone in {elapsed:.1f}s:") print(f" Generated: {generated:,}") print(f" Failed: {failed:,}") print(f" Missing: {missing:,}") if __name__ == "__main__": main()