200 lines
6.1 KiB
Python
200 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Bulk pre-generate thumbnails for file_inventory entries that don't have cached thumbnails.
|
|
|
|
Targets /opt/immich/el/ and /opt/immich/elv/ paths (Immich migration).
|
|
Uses multiprocessing to generate thumbnails in parallel.
|
|
"""
|
|
|
|
import hashlib
|
|
import io
|
|
import sqlite3
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
|
|
from PIL import Image
|
|
|
|
THUMB_DB = "/opt/media-downloader/database/thumbnails.db"
|
|
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
|
MAX_SIZE = (300, 300)
|
|
WORKERS = 6
|
|
BATCH_SIZE = 200
|
|
|
|
|
|
def generate_image_thumbnail(file_path, max_size=(300, 300)):
|
|
try:
|
|
img = Image.open(file_path)
|
|
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
|
if img.mode in ('RGBA', 'LA', 'P'):
|
|
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
if img.mode == 'P':
|
|
img = img.convert('RGBA')
|
|
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
|
img = background
|
|
buffer = io.BytesIO()
|
|
img.save(buffer, format='JPEG', quality=85)
|
|
return buffer.getvalue()
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def generate_video_thumbnail(file_path, max_size=(300, 300)):
|
|
for seek_time in ['00:00:01.000', '00:00:00.000']:
|
|
try:
|
|
result = subprocess.run([
|
|
'ffmpeg', '-ss', seek_time,
|
|
'-i', str(file_path),
|
|
'-vframes', '1',
|
|
'-f', 'image2pipe',
|
|
'-vcodec', 'mjpeg', '-'
|
|
], capture_output=True, timeout=30)
|
|
if result.returncode != 0 or not result.stdout:
|
|
continue
|
|
img = Image.open(io.BytesIO(result.stdout))
|
|
img.thumbnail(max_size, Image.Resampling.LANCZOS)
|
|
buffer = io.BytesIO()
|
|
img.save(buffer, format='JPEG', quality=85)
|
|
return buffer.getvalue()
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
|
|
def process_file(args):
|
|
"""Generate thumbnail for a single file. Runs in worker process."""
|
|
file_path, content_type, file_hash = args
|
|
p = Path(file_path)
|
|
if not p.exists():
|
|
return (file_hash, file_path, None, 0, 'missing')
|
|
|
|
try:
|
|
mtime = p.stat().st_mtime
|
|
except OSError:
|
|
mtime = 0
|
|
|
|
if content_type == 'video':
|
|
data = generate_video_thumbnail(p, MAX_SIZE)
|
|
else:
|
|
data = generate_image_thumbnail(p, MAX_SIZE)
|
|
|
|
if data:
|
|
return (file_hash, file_path, data, mtime, 'ok')
|
|
return (file_hash, file_path, None, mtime, 'failed')
|
|
|
|
|
|
def get_files_needing_thumbnails():
|
|
"""Query file_inventory for el/elv files, check which lack thumbnails."""
|
|
import psycopg2
|
|
|
|
conn = psycopg2.connect(APP_DB_DSN)
|
|
cur = conn.cursor()
|
|
cur.execute("""
|
|
SELECT file_path, content_type, file_hash
|
|
FROM file_inventory
|
|
WHERE (file_path LIKE '/opt/immich/el/%%' OR file_path LIKE '/opt/immich/elv/%%')
|
|
AND location = 'final'
|
|
ORDER BY id
|
|
""")
|
|
all_files = cur.fetchall()
|
|
cur.close()
|
|
conn.close()
|
|
|
|
# Check which already have thumbnails
|
|
thumb_conn = sqlite3.connect(THUMB_DB, timeout=30)
|
|
thumb_cur = thumb_conn.cursor()
|
|
|
|
# Get all existing thumbnail hashes in one query
|
|
thumb_cur.execute("SELECT file_hash FROM thumbnails")
|
|
existing_hashes = set(row[0] for row in thumb_cur.fetchall())
|
|
thumb_conn.close()
|
|
|
|
needed = []
|
|
for file_path, content_type, file_hash in all_files:
|
|
# Use content hash if available, else path hash
|
|
cache_key = file_hash if file_hash else hashlib.sha256(file_path.encode()).hexdigest()
|
|
if cache_key not in existing_hashes:
|
|
needed.append((file_path, content_type, cache_key))
|
|
|
|
return needed
|
|
|
|
|
|
def save_batch(results):
|
|
"""Save a batch of thumbnails to the DB."""
|
|
from datetime import datetime, timezone
|
|
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S')
|
|
|
|
conn = sqlite3.connect(THUMB_DB, timeout=60)
|
|
saved = 0
|
|
for file_hash, file_path, data, mtime, status in results:
|
|
if data:
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO thumbnails
|
|
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (file_hash, file_path, data, now, mtime))
|
|
saved += 1
|
|
conn.commit()
|
|
conn.close()
|
|
return saved
|
|
|
|
|
|
def main():
|
|
start = time.time()
|
|
print("Bulk thumbnail pre-generation")
|
|
print("=" * 60)
|
|
|
|
print("Finding files needing thumbnails...")
|
|
needed = get_files_needing_thumbnails()
|
|
total = len(needed)
|
|
print(f" {total:,} files need thumbnails")
|
|
|
|
if total == 0:
|
|
print(" Nothing to do!")
|
|
return
|
|
|
|
generated = 0
|
|
failed = 0
|
|
missing = 0
|
|
batch_results = []
|
|
|
|
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
|
|
futures = {executor.submit(process_file, item): item for item in needed}
|
|
|
|
for i, future in enumerate(as_completed(futures), 1):
|
|
result = future.result()
|
|
batch_results.append(result)
|
|
|
|
status = result[4]
|
|
if status == 'ok':
|
|
generated += 1
|
|
elif status == 'missing':
|
|
missing += 1
|
|
else:
|
|
failed += 1
|
|
|
|
if len(batch_results) >= BATCH_SIZE:
|
|
save_batch(batch_results)
|
|
batch_results = []
|
|
elapsed = time.time() - start
|
|
rate = i / elapsed if elapsed > 0 else 0
|
|
eta = (total - i) / rate if rate > 0 else 0
|
|
print(f" {i:,}/{total:,} ({generated:,} ok, {failed:,} failed, {missing:,} missing) "
|
|
f"[{rate:.0f}/s, ETA {eta:.0f}s]")
|
|
|
|
# Final batch
|
|
if batch_results:
|
|
save_batch(batch_results)
|
|
|
|
elapsed = time.time() - start
|
|
print(f"\nDone in {elapsed:.1f}s:")
|
|
print(f" Generated: {generated:,}")
|
|
print(f" Failed: {failed:,}")
|
|
print(f" Missing: {missing:,}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|