#!/usr/bin/env python3 """ Migrate Immich assets into file_inventory and face_recognition_scans. Connects to Immich PostgreSQL (via docker exec) and app PostgreSQL directly. Idempotent — safe to re-run. Uses ON CONFLICT DO NOTHING for file_inventory and checks for existing immich_import scans before inserting face data. Path mapping: /mnt/media/evalongoria/ → /opt/immich/el/ /mnt/media/elvideo/ → /opt/immich/elv/ /mnt/media/md/ → SKIPPED (already in file_inventory) Platform inference from subdirectories: evalongoria: IG→instagram, TT→tiktok, X→twitter, Discord→discord, Flickr→flickr, rest→unknown elvideo: YT→youtube, rest→unknown """ import subprocess import sys import time import psycopg2 import psycopg2.extras # ── Configuration ────────────────────────────────────────────────────────── APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader" IMMICH_CONTAINER = "immich_postgres" IMMICH_DB = "immich" IMMICH_USER = "postgres" BATCH_SIZE = 5000 EVA_PERSON_UUID = "0154270a-8c30-4fb7-b73b-3fb3acc49483" # Path prefix replacements (Immich → local) PATH_MAP = { "/mnt/media/evalongoria/": "/opt/immich/el/", "/mnt/media/elvideo/": "/opt/immich/elv/", } # Subdirectory → platform mapping for evalongoria EVALONGORIA_PLATFORM_MAP = { "IG": "instagram", "TT": "tiktok", "X": "twitter", "Discord": "discord", "Flickr": "flickr", "SC": "unknown", "Caps": "unknown", "Clips": "unknown", "CT": "unknown", "HQ": "unknown", "Misc": "unknown", } # Subdirectory → platform mapping for elvideo ELVIDEO_PLATFORM_MAP = { "YT": "youtube", "Misc": "unknown", } # ── Immich DB helper ─────────────────────────────────────────────────────── def immich_query(sql): """Run a SQL query against Immich PostgreSQL via docker exec, return rows as dicts.""" cmd = [ "docker", "exec", IMMICH_CONTAINER, "psql", "-U", IMMICH_USER, "-d", IMMICH_DB, "-t", "-A", "-F", "\x1f", # tuples-only, unaligned, unit-separator delimiter "-c", sql, ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) if result.returncode != 0: print(f"ERROR running Immich query: {result.stderr}", file=sys.stderr) sys.exit(1) return result.stdout.strip() def immich_query_rows(sql, columns): """Run query, parse into list of dicts with given column names.""" raw = immich_query(sql) if not raw: return [] rows = [] for line in raw.split("\n"): if not line.strip(): continue fields = line.split("\x1f") if len(fields) != len(columns): continue rows.append(dict(zip(columns, fields))) return rows # ── Path & platform helpers ──────────────────────────────────────────────── def map_path(immich_path): """Convert Immich path to local path. Returns None for /mnt/media/md/ paths.""" for immich_prefix, local_prefix in PATH_MAP.items(): if immich_path.startswith(immich_prefix): return local_prefix + immich_path[len(immich_prefix):] return None # md/ or unknown prefix — skip def infer_platform(immich_path): """Infer platform from Immich path based on subdirectory.""" if immich_path.startswith("/mnt/media/evalongoria/"): remainder = immich_path[len("/mnt/media/evalongoria/"):] # Check if first component is a known subdirectory first_component = remainder.split("/")[0] if "/" in remainder else None if first_component and first_component in EVALONGORIA_PLATFORM_MAP: return EVALONGORIA_PLATFORM_MAP[first_component] return "unknown" elif immich_path.startswith("/mnt/media/elvideo/"): remainder = immich_path[len("/mnt/media/elvideo/"):] first_component = remainder.split("/")[0] if "/" in remainder else None if first_component and first_component in ELVIDEO_PLATFORM_MAP: return ELVIDEO_PLATFORM_MAP[first_component] return "unknown" return "unknown" def infer_content_type(asset_type): """Map Immich asset type to content_type.""" if asset_type == "IMAGE": return "image" elif asset_type == "VIDEO": return "video" return "unknown" # ── Main migration ───────────────────────────────────────────────────────── def migrate_assets(app_conn): """Fetch assets from Immich and insert into file_inventory.""" print("=" * 60) print("Phase 1: Migrating Immich assets → file_inventory") print("=" * 60) # Fetch all evalongoria + elvideo assets from Immich sql = """ SELECT a.id::text, a."originalPath", a."originalFileName", a.type, a."fileCreatedAt"::text, a."deletedAt"::text, a.width::text, a.height::text, encode(a.checksum, 'hex') as file_hash, COALESCE(e."fileSizeInByte"::text, '') as file_size FROM asset a LEFT JOIN asset_exif e ON a.id = e."assetId" WHERE (a."originalPath" LIKE '/mnt/media/evalongoria/%' OR a."originalPath" LIKE '/mnt/media/elvideo/%') ORDER BY a."fileCreatedAt" """ print("Fetching assets from Immich...") columns = [ "id", "originalPath", "originalFileName", "type", "fileCreatedAt", "deletedAt", "width", "height", "file_hash", "file_size", ] rows = immich_query_rows(sql, columns) total = len(rows) print(f" Found {total:,} assets to process") # Prepare and batch-insert inserted = 0 skipped = 0 batch = [] cur = app_conn.cursor() insert_sql = """ INSERT INTO file_inventory (file_path, filename, platform, source, content_type, file_size, file_hash, width, height, location, created_date) VALUES %s ON CONFLICT (file_path) DO NOTHING """ for i, row in enumerate(rows): local_path = map_path(row["originalPath"]) if local_path is None: skipped += 1 continue platform = infer_platform(row["originalPath"]) content_type = infer_content_type(row["type"]) location = "recycle" if row["deletedAt"] else "final" width = int(row["width"]) if row["width"] else None height = int(row["height"]) if row["height"] else None file_size = int(row["file_size"]) if row["file_size"] else None # Parse timestamp — strip timezone info for timestamp without time zone column created_date = row["fileCreatedAt"] if created_date: # Remove timezone suffix like +00 or +00:00 for naive timestamp created_date = created_date.replace("+00:00", "").replace("+00", "").strip() batch.append(( local_path, row["originalFileName"], platform, "evalongoria", content_type, file_size, row["file_hash"], width, height, location, created_date if created_date else None, )) if len(batch) >= BATCH_SIZE: psycopg2.extras.execute_values( cur, insert_sql, batch, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", ) inserted += cur.rowcount app_conn.commit() processed = i + 1 print(f" Progress: {processed:,}/{total:,} processed, {inserted:,} inserted") batch = [] # Final batch if batch: psycopg2.extras.execute_values( cur, insert_sql, batch, template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", ) inserted += cur.rowcount app_conn.commit() cur.close() print(f"\n DONE: {inserted:,} rows inserted, {skipped:,} skipped (md/ paths)") return inserted def migrate_face_detections(app_conn): """Migrate Eva Longoria face detections from Immich → face_recognition_scans.""" print("\n" + "=" * 60) print("Phase 2: Migrating face detections → face_recognition_scans") print("=" * 60) # First, check if we already ran this migration cur = app_conn.cursor() cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'") existing = cur.fetchone()[0] if existing > 0: print(f" Found {existing:,} existing immich_import scans — skipping face migration") print(" (Delete existing immich_import scans first if you want to re-run)") cur.close() return 0 # Get distinct assets with Eva Longoria face + face count + path in one query print("Fetching face detection data with paths from Immich...") sql = f""" SELECT a."originalPath", COUNT(*) as eva_faces FROM asset_face af JOIN asset a ON af."assetId" = a.id WHERE af."personId" = '{EVA_PERSON_UUID}' AND af."deletedAt" IS NULL AND (a."originalPath" LIKE '/mnt/media/evalongoria/%' OR a."originalPath" LIKE '/mnt/media/elvideo/%') GROUP BY a."originalPath" """ columns = ["originalPath", "face_count"] face_rows = immich_query_rows(sql, columns) print(f" Found {len(face_rows):,} assets with Eva Longoria face detections") # Build file_path lookup from file_inventory (for /opt/immich/el/ and /opt/immich/elv/ paths) print("Building file_inventory lookup...") cur.execute(""" SELECT file_path FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%' """) inventory_paths = set(row[0] for row in cur.fetchall()) print(f" {len(inventory_paths):,} paths in file_inventory for el/elv") # Prepare face scan inserts insert_sql = """ INSERT INTO face_recognition_scans (file_path, has_match, matched_person, confidence, face_count, scan_type) VALUES %s """ batch = [] inserted = 0 skipped_not_in_inventory = 0 total = len(face_rows) for i, row in enumerate(face_rows): local_path = map_path(row["originalPath"]) if local_path is None: continue if local_path not in inventory_paths: skipped_not_in_inventory += 1 continue face_count = int(row["face_count"]) batch.append(( local_path, True, "Eva Longoria", 1.0, face_count, "immich_import", )) if len(batch) >= BATCH_SIZE: psycopg2.extras.execute_values( cur, insert_sql, batch, template="(%s, %s, %s, %s, %s, %s)", ) inserted += cur.rowcount app_conn.commit() print(f" Progress: {i + 1:,}/{total:,} processed, {inserted:,} inserted") batch = [] if batch: psycopg2.extras.execute_values( cur, insert_sql, batch, template="(%s, %s, %s, %s, %s, %s)", ) inserted += cur.rowcount app_conn.commit() cur.close() print(f"\n DONE: {inserted:,} face scans inserted") print(f" Skipped: {skipped_not_in_inventory:,} (not in file_inventory)") return inserted def verify(app_conn): """Print verification counts.""" print("\n" + "=" * 60) print("Verification") print("=" * 60) cur = app_conn.cursor() # file_inventory counts cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%'") el_count = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/elv/%'") elv_count = cur.fetchone()[0] cur.execute(""" SELECT location, COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%' GROUP BY location """) location_counts = dict(cur.fetchall()) cur.execute(""" SELECT platform, COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%' GROUP BY platform ORDER BY 2 DESC """) platform_counts = cur.fetchall() # face_recognition_scans counts cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'") face_count = cur.fetchone()[0] cur.execute("SELECT COUNT(*) FROM face_recognition_scans") total_face_scans = cur.fetchone()[0] # Total file_inventory cur.execute("SELECT COUNT(*) FROM file_inventory") total_inventory = cur.fetchone()[0] cur.close() print(f"\n file_inventory:") print(f" /opt/immich/el/* (evalongoria): {el_count:,}") print(f" /opt/immich/elv/* (elvideo): {elv_count:,}") print(f" Total new: {el_count + elv_count:,}") print(f" By location: {dict(location_counts)}") print(f" By platform:") for platform, count in platform_counts: print(f" {platform:12s}: {count:,}") print(f"\n face_recognition_scans:") print(f" immich_import: {face_count:,}") print(f" Total scans: {total_face_scans:,}") print(f"\n Total file_inventory rows: {total_inventory:,}") def main(): start = time.time() print("Immich → file_inventory migration") print("=" * 60) # Test Immich connection print("Testing Immich database connection...") test = immich_query("SELECT COUNT(*) FROM asset") print(f" Immich has {int(test):,} assets") # Connect to app database print("Connecting to app database...") app_conn = psycopg2.connect(APP_DB_DSN) try: assets_inserted = migrate_assets(app_conn) faces_inserted = migrate_face_detections(app_conn) verify(app_conn) finally: app_conn.close() elapsed = time.time() - start print(f"\nCompleted in {elapsed:.1f}s") if __name__ == "__main__": main()