199
scripts/pregerate_thumbnails.py
Normal file
199
scripts/pregerate_thumbnails.py
Normal file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk pre-generate thumbnails for file_inventory entries that don't have cached thumbnails.
|
||||
|
||||
Targets /opt/immich/el/ and /opt/immich/elv/ paths (Immich migration).
|
||||
Uses multiprocessing to generate thumbnails in parallel.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
|
||||
THUMB_DB = "/opt/media-downloader/database/thumbnails.db"
|
||||
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
||||
MAX_SIZE = (300, 300)
|
||||
WORKERS = 6
|
||||
BATCH_SIZE = 200
|
||||
|
||||
|
||||
def generate_image_thumbnail(file_path, max_size=(300, 300)):
|
||||
try:
|
||||
img = Image.open(file_path)
|
||||
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
||||
if img.mode in ('RGBA', 'LA', 'P'):
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
||||
img = background
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def generate_video_thumbnail(file_path, max_size=(300, 300)):
|
||||
for seek_time in ['00:00:01.000', '00:00:00.000']:
|
||||
try:
|
||||
result = subprocess.run([
|
||||
'ffmpeg', '-ss', seek_time,
|
||||
'-i', str(file_path),
|
||||
'-vframes', '1',
|
||||
'-f', 'image2pipe',
|
||||
'-vcodec', 'mjpeg', '-'
|
||||
], capture_output=True, timeout=30)
|
||||
if result.returncode != 0 or not result.stdout:
|
||||
continue
|
||||
img = Image.open(io.BytesIO(result.stdout))
|
||||
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format='JPEG', quality=85)
|
||||
return buffer.getvalue()
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def process_file(args):
|
||||
"""Generate thumbnail for a single file. Runs in worker process."""
|
||||
file_path, content_type, file_hash = args
|
||||
p = Path(file_path)
|
||||
if not p.exists():
|
||||
return (file_hash, file_path, None, 0, 'missing')
|
||||
|
||||
try:
|
||||
mtime = p.stat().st_mtime
|
||||
except OSError:
|
||||
mtime = 0
|
||||
|
||||
if content_type == 'video':
|
||||
data = generate_video_thumbnail(p, MAX_SIZE)
|
||||
else:
|
||||
data = generate_image_thumbnail(p, MAX_SIZE)
|
||||
|
||||
if data:
|
||||
return (file_hash, file_path, data, mtime, 'ok')
|
||||
return (file_hash, file_path, None, mtime, 'failed')
|
||||
|
||||
|
||||
def get_files_needing_thumbnails():
|
||||
"""Query file_inventory for el/elv files, check which lack thumbnails."""
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect(APP_DB_DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT file_path, content_type, file_hash
|
||||
FROM file_inventory
|
||||
WHERE (file_path LIKE '/opt/immich/el/%%' OR file_path LIKE '/opt/immich/elv/%%')
|
||||
AND location = 'final'
|
||||
ORDER BY id
|
||||
""")
|
||||
all_files = cur.fetchall()
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
# Check which already have thumbnails
|
||||
thumb_conn = sqlite3.connect(THUMB_DB, timeout=30)
|
||||
thumb_cur = thumb_conn.cursor()
|
||||
|
||||
# Get all existing thumbnail hashes in one query
|
||||
thumb_cur.execute("SELECT file_hash FROM thumbnails")
|
||||
existing_hashes = set(row[0] for row in thumb_cur.fetchall())
|
||||
thumb_conn.close()
|
||||
|
||||
needed = []
|
||||
for file_path, content_type, file_hash in all_files:
|
||||
# Use content hash if available, else path hash
|
||||
cache_key = file_hash if file_hash else hashlib.sha256(file_path.encode()).hexdigest()
|
||||
if cache_key not in existing_hashes:
|
||||
needed.append((file_path, content_type, cache_key))
|
||||
|
||||
return needed
|
||||
|
||||
|
||||
def save_batch(results):
|
||||
"""Save a batch of thumbnails to the DB."""
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S')
|
||||
|
||||
conn = sqlite3.connect(THUMB_DB, timeout=60)
|
||||
saved = 0
|
||||
for file_hash, file_path, data, mtime, status in results:
|
||||
if data:
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO thumbnails
|
||||
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (file_hash, file_path, data, now, mtime))
|
||||
saved += 1
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return saved
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
print("Bulk thumbnail pre-generation")
|
||||
print("=" * 60)
|
||||
|
||||
print("Finding files needing thumbnails...")
|
||||
needed = get_files_needing_thumbnails()
|
||||
total = len(needed)
|
||||
print(f" {total:,} files need thumbnails")
|
||||
|
||||
if total == 0:
|
||||
print(" Nothing to do!")
|
||||
return
|
||||
|
||||
generated = 0
|
||||
failed = 0
|
||||
missing = 0
|
||||
batch_results = []
|
||||
|
||||
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
|
||||
futures = {executor.submit(process_file, item): item for item in needed}
|
||||
|
||||
for i, future in enumerate(as_completed(futures), 1):
|
||||
result = future.result()
|
||||
batch_results.append(result)
|
||||
|
||||
status = result[4]
|
||||
if status == 'ok':
|
||||
generated += 1
|
||||
elif status == 'missing':
|
||||
missing += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
if len(batch_results) >= BATCH_SIZE:
|
||||
save_batch(batch_results)
|
||||
batch_results = []
|
||||
elapsed = time.time() - start
|
||||
rate = i / elapsed if elapsed > 0 else 0
|
||||
eta = (total - i) / rate if rate > 0 else 0
|
||||
print(f" {i:,}/{total:,} ({generated:,} ok, {failed:,} failed, {missing:,} missing) "
|
||||
f"[{rate:.0f}/s, ETA {eta:.0f}s]")
|
||||
|
||||
# Final batch
|
||||
if batch_results:
|
||||
save_batch(batch_results)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"\nDone in {elapsed:.1f}s:")
|
||||
print(f" Generated: {generated:,}")
|
||||
print(f" Failed: {failed:,}")
|
||||
print(f" Missing: {missing:,}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user