Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""
Bulk pre-generate thumbnails for file_inventory entries that don't have cached thumbnails.
Targets /opt/immich/el/ and /opt/immich/elv/ paths (Immich migration).
Uses multiprocessing to generate thumbnails in parallel.
"""
import hashlib
import io
import sqlite3
import subprocess
import sys
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from PIL import Image
THUMB_DB = "/opt/media-downloader/database/thumbnails.db"
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
MAX_SIZE = (300, 300)
WORKERS = 6
BATCH_SIZE = 200
def generate_image_thumbnail(file_path, max_size=(300, 300)):
try:
img = Image.open(file_path)
img.thumbnail(max_size, Image.Resampling.LANCZOS)
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = background
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=85)
return buffer.getvalue()
except Exception:
return None
def generate_video_thumbnail(file_path, max_size=(300, 300)):
for seek_time in ['00:00:01.000', '00:00:00.000']:
try:
result = subprocess.run([
'ffmpeg', '-ss', seek_time,
'-i', str(file_path),
'-vframes', '1',
'-f', 'image2pipe',
'-vcodec', 'mjpeg', '-'
], capture_output=True, timeout=30)
if result.returncode != 0 or not result.stdout:
continue
img = Image.open(io.BytesIO(result.stdout))
img.thumbnail(max_size, Image.Resampling.LANCZOS)
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=85)
return buffer.getvalue()
except Exception:
continue
return None
def process_file(args):
"""Generate thumbnail for a single file. Runs in worker process."""
file_path, content_type, file_hash = args
p = Path(file_path)
if not p.exists():
return (file_hash, file_path, None, 0, 'missing')
try:
mtime = p.stat().st_mtime
except OSError:
mtime = 0
if content_type == 'video':
data = generate_video_thumbnail(p, MAX_SIZE)
else:
data = generate_image_thumbnail(p, MAX_SIZE)
if data:
return (file_hash, file_path, data, mtime, 'ok')
return (file_hash, file_path, None, mtime, 'failed')
def get_files_needing_thumbnails():
"""Query file_inventory for el/elv files, check which lack thumbnails."""
import psycopg2
conn = psycopg2.connect(APP_DB_DSN)
cur = conn.cursor()
cur.execute("""
SELECT file_path, content_type, file_hash
FROM file_inventory
WHERE (file_path LIKE '/opt/immich/el/%%' OR file_path LIKE '/opt/immich/elv/%%')
AND location = 'final'
ORDER BY id
""")
all_files = cur.fetchall()
cur.close()
conn.close()
# Check which already have thumbnails
thumb_conn = sqlite3.connect(THUMB_DB, timeout=30)
thumb_cur = thumb_conn.cursor()
# Get all existing thumbnail hashes in one query
thumb_cur.execute("SELECT file_hash FROM thumbnails")
existing_hashes = set(row[0] for row in thumb_cur.fetchall())
thumb_conn.close()
needed = []
for file_path, content_type, file_hash in all_files:
# Use content hash if available, else path hash
cache_key = file_hash if file_hash else hashlib.sha256(file_path.encode()).hexdigest()
if cache_key not in existing_hashes:
needed.append((file_path, content_type, cache_key))
return needed
def save_batch(results):
"""Save a batch of thumbnails to the DB."""
from datetime import datetime, timezone
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S')
conn = sqlite3.connect(THUMB_DB, timeout=60)
saved = 0
for file_hash, file_path, data, mtime, status in results:
if data:
conn.execute("""
INSERT OR REPLACE INTO thumbnails
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
VALUES (?, ?, ?, ?, ?)
""", (file_hash, file_path, data, now, mtime))
saved += 1
conn.commit()
conn.close()
return saved
def main():
start = time.time()
print("Bulk thumbnail pre-generation")
print("=" * 60)
print("Finding files needing thumbnails...")
needed = get_files_needing_thumbnails()
total = len(needed)
print(f" {total:,} files need thumbnails")
if total == 0:
print(" Nothing to do!")
return
generated = 0
failed = 0
missing = 0
batch_results = []
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
futures = {executor.submit(process_file, item): item for item in needed}
for i, future in enumerate(as_completed(futures), 1):
result = future.result()
batch_results.append(result)
status = result[4]
if status == 'ok':
generated += 1
elif status == 'missing':
missing += 1
else:
failed += 1
if len(batch_results) >= BATCH_SIZE:
save_batch(batch_results)
batch_results = []
elapsed = time.time() - start
rate = i / elapsed if elapsed > 0 else 0
eta = (total - i) / rate if rate > 0 else 0
print(f" {i:,}/{total:,} ({generated:,} ok, {failed:,} failed, {missing:,} missing) "
f"[{rate:.0f}/s, ETA {eta:.0f}s]")
# Final batch
if batch_results:
save_batch(batch_results)
elapsed = time.time() - start
print(f"\nDone in {elapsed:.1f}s:")
print(f" Generated: {generated:,}")
print(f" Failed: {failed:,}")
print(f" Missing: {missing:,}")
if __name__ == "__main__":
main()