433
scripts/migrate_immich_to_gallery.py
Executable file
433
scripts/migrate_immich_to_gallery.py
Executable file
@@ -0,0 +1,433 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migrate Immich assets into file_inventory and face_recognition_scans.
|
||||
|
||||
Connects to Immich PostgreSQL (via docker exec) and app PostgreSQL directly.
|
||||
Idempotent — safe to re-run. Uses ON CONFLICT DO NOTHING for file_inventory
|
||||
and checks for existing immich_import scans before inserting face data.
|
||||
|
||||
Path mapping:
|
||||
/mnt/media/evalongoria/ → /opt/immich/el/
|
||||
/mnt/media/elvideo/ → /opt/immich/elv/
|
||||
/mnt/media/md/ → SKIPPED (already in file_inventory)
|
||||
|
||||
Platform inference from subdirectories:
|
||||
evalongoria: IG→instagram, TT→tiktok, X→twitter, Discord→discord,
|
||||
Flickr→flickr, rest→unknown
|
||||
elvideo: YT→youtube, rest→unknown
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────────────────
|
||||
|
||||
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
||||
IMMICH_CONTAINER = "immich_postgres"
|
||||
IMMICH_DB = "immich"
|
||||
IMMICH_USER = "postgres"
|
||||
|
||||
BATCH_SIZE = 5000
|
||||
|
||||
EVA_PERSON_UUID = "0154270a-8c30-4fb7-b73b-3fb3acc49483"
|
||||
|
||||
# Path prefix replacements (Immich → local)
|
||||
PATH_MAP = {
|
||||
"/mnt/media/evalongoria/": "/opt/immich/el/",
|
||||
"/mnt/media/elvideo/": "/opt/immich/elv/",
|
||||
}
|
||||
|
||||
# Subdirectory → platform mapping for evalongoria
|
||||
EVALONGORIA_PLATFORM_MAP = {
|
||||
"IG": "instagram",
|
||||
"TT": "tiktok",
|
||||
"X": "twitter",
|
||||
"Discord": "discord",
|
||||
"Flickr": "flickr",
|
||||
"SC": "unknown",
|
||||
"Caps": "unknown",
|
||||
"Clips": "unknown",
|
||||
"CT": "unknown",
|
||||
"HQ": "unknown",
|
||||
"Misc": "unknown",
|
||||
}
|
||||
|
||||
# Subdirectory → platform mapping for elvideo
|
||||
ELVIDEO_PLATFORM_MAP = {
|
||||
"YT": "youtube",
|
||||
"Misc": "unknown",
|
||||
}
|
||||
|
||||
|
||||
# ── Immich DB helper ───────────────────────────────────────────────────────
|
||||
|
||||
def immich_query(sql):
|
||||
"""Run a SQL query against Immich PostgreSQL via docker exec, return rows as dicts."""
|
||||
cmd = [
|
||||
"docker", "exec", IMMICH_CONTAINER,
|
||||
"psql", "-U", IMMICH_USER, "-d", IMMICH_DB,
|
||||
"-t", "-A", "-F", "\x1f", # tuples-only, unaligned, unit-separator delimiter
|
||||
"-c", sql,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
||||
if result.returncode != 0:
|
||||
print(f"ERROR running Immich query: {result.stderr}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def immich_query_rows(sql, columns):
|
||||
"""Run query, parse into list of dicts with given column names."""
|
||||
raw = immich_query(sql)
|
||||
if not raw:
|
||||
return []
|
||||
rows = []
|
||||
for line in raw.split("\n"):
|
||||
if not line.strip():
|
||||
continue
|
||||
fields = line.split("\x1f")
|
||||
if len(fields) != len(columns):
|
||||
continue
|
||||
rows.append(dict(zip(columns, fields)))
|
||||
return rows
|
||||
|
||||
|
||||
# ── Path & platform helpers ────────────────────────────────────────────────
|
||||
|
||||
def map_path(immich_path):
|
||||
"""Convert Immich path to local path. Returns None for /mnt/media/md/ paths."""
|
||||
for immich_prefix, local_prefix in PATH_MAP.items():
|
||||
if immich_path.startswith(immich_prefix):
|
||||
return local_prefix + immich_path[len(immich_prefix):]
|
||||
return None # md/ or unknown prefix — skip
|
||||
|
||||
|
||||
def infer_platform(immich_path):
|
||||
"""Infer platform from Immich path based on subdirectory."""
|
||||
if immich_path.startswith("/mnt/media/evalongoria/"):
|
||||
remainder = immich_path[len("/mnt/media/evalongoria/"):]
|
||||
# Check if first component is a known subdirectory
|
||||
first_component = remainder.split("/")[0] if "/" in remainder else None
|
||||
if first_component and first_component in EVALONGORIA_PLATFORM_MAP:
|
||||
return EVALONGORIA_PLATFORM_MAP[first_component]
|
||||
return "unknown"
|
||||
elif immich_path.startswith("/mnt/media/elvideo/"):
|
||||
remainder = immich_path[len("/mnt/media/elvideo/"):]
|
||||
first_component = remainder.split("/")[0] if "/" in remainder else None
|
||||
if first_component and first_component in ELVIDEO_PLATFORM_MAP:
|
||||
return ELVIDEO_PLATFORM_MAP[first_component]
|
||||
return "unknown"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def infer_content_type(asset_type):
|
||||
"""Map Immich asset type to content_type."""
|
||||
if asset_type == "IMAGE":
|
||||
return "image"
|
||||
elif asset_type == "VIDEO":
|
||||
return "video"
|
||||
return "unknown"
|
||||
|
||||
|
||||
# ── Main migration ─────────────────────────────────────────────────────────
|
||||
|
||||
def migrate_assets(app_conn):
|
||||
"""Fetch assets from Immich and insert into file_inventory."""
|
||||
print("=" * 60)
|
||||
print("Phase 1: Migrating Immich assets → file_inventory")
|
||||
print("=" * 60)
|
||||
|
||||
# Fetch all evalongoria + elvideo assets from Immich
|
||||
sql = """
|
||||
SELECT
|
||||
a.id::text,
|
||||
a."originalPath",
|
||||
a."originalFileName",
|
||||
a.type,
|
||||
a."fileCreatedAt"::text,
|
||||
a."deletedAt"::text,
|
||||
a.width::text,
|
||||
a.height::text,
|
||||
encode(a.checksum, 'hex') as file_hash,
|
||||
COALESCE(e."fileSizeInByte"::text, '') as file_size
|
||||
FROM asset a
|
||||
LEFT JOIN asset_exif e ON a.id = e."assetId"
|
||||
WHERE (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
||||
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
||||
ORDER BY a."fileCreatedAt"
|
||||
"""
|
||||
|
||||
print("Fetching assets from Immich...")
|
||||
columns = [
|
||||
"id", "originalPath", "originalFileName", "type",
|
||||
"fileCreatedAt", "deletedAt", "width", "height",
|
||||
"file_hash", "file_size",
|
||||
]
|
||||
rows = immich_query_rows(sql, columns)
|
||||
total = len(rows)
|
||||
print(f" Found {total:,} assets to process")
|
||||
|
||||
# Prepare and batch-insert
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
batch = []
|
||||
|
||||
cur = app_conn.cursor()
|
||||
|
||||
insert_sql = """
|
||||
INSERT INTO file_inventory
|
||||
(file_path, filename, platform, source, content_type,
|
||||
file_size, file_hash, width, height, location, created_date)
|
||||
VALUES %s
|
||||
ON CONFLICT (file_path) DO NOTHING
|
||||
"""
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
local_path = map_path(row["originalPath"])
|
||||
if local_path is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
platform = infer_platform(row["originalPath"])
|
||||
content_type = infer_content_type(row["type"])
|
||||
location = "recycle" if row["deletedAt"] else "final"
|
||||
|
||||
width = int(row["width"]) if row["width"] else None
|
||||
height = int(row["height"]) if row["height"] else None
|
||||
file_size = int(row["file_size"]) if row["file_size"] else None
|
||||
|
||||
# Parse timestamp — strip timezone info for timestamp without time zone column
|
||||
created_date = row["fileCreatedAt"]
|
||||
if created_date:
|
||||
# Remove timezone suffix like +00 or +00:00 for naive timestamp
|
||||
created_date = created_date.replace("+00:00", "").replace("+00", "").strip()
|
||||
|
||||
batch.append((
|
||||
local_path,
|
||||
row["originalFileName"],
|
||||
platform,
|
||||
"evalongoria",
|
||||
content_type,
|
||||
file_size,
|
||||
row["file_hash"],
|
||||
width,
|
||||
height,
|
||||
location,
|
||||
created_date if created_date else None,
|
||||
))
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
processed = i + 1
|
||||
print(f" Progress: {processed:,}/{total:,} processed, {inserted:,} inserted")
|
||||
batch = []
|
||||
|
||||
# Final batch
|
||||
if batch:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
|
||||
cur.close()
|
||||
print(f"\n DONE: {inserted:,} rows inserted, {skipped:,} skipped (md/ paths)")
|
||||
return inserted
|
||||
|
||||
|
||||
def migrate_face_detections(app_conn):
|
||||
"""Migrate Eva Longoria face detections from Immich → face_recognition_scans."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Phase 2: Migrating face detections → face_recognition_scans")
|
||||
print("=" * 60)
|
||||
|
||||
# First, check if we already ran this migration
|
||||
cur = app_conn.cursor()
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
||||
existing = cur.fetchone()[0]
|
||||
if existing > 0:
|
||||
print(f" Found {existing:,} existing immich_import scans — skipping face migration")
|
||||
print(" (Delete existing immich_import scans first if you want to re-run)")
|
||||
cur.close()
|
||||
return 0
|
||||
|
||||
# Get distinct assets with Eva Longoria face + face count + path in one query
|
||||
print("Fetching face detection data with paths from Immich...")
|
||||
sql = f"""
|
||||
SELECT
|
||||
a."originalPath",
|
||||
COUNT(*) as eva_faces
|
||||
FROM asset_face af
|
||||
JOIN asset a ON af."assetId" = a.id
|
||||
WHERE af."personId" = '{EVA_PERSON_UUID}'
|
||||
AND af."deletedAt" IS NULL
|
||||
AND (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
||||
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
||||
GROUP BY a."originalPath"
|
||||
"""
|
||||
columns = ["originalPath", "face_count"]
|
||||
face_rows = immich_query_rows(sql, columns)
|
||||
print(f" Found {len(face_rows):,} assets with Eva Longoria face detections")
|
||||
|
||||
# Build file_path lookup from file_inventory (for /opt/immich/el/ and /opt/immich/elv/ paths)
|
||||
print("Building file_inventory lookup...")
|
||||
cur.execute("""
|
||||
SELECT file_path FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
""")
|
||||
inventory_paths = set(row[0] for row in cur.fetchall())
|
||||
print(f" {len(inventory_paths):,} paths in file_inventory for el/elv")
|
||||
|
||||
# Prepare face scan inserts
|
||||
insert_sql = """
|
||||
INSERT INTO face_recognition_scans
|
||||
(file_path, has_match, matched_person, confidence, face_count, scan_type)
|
||||
VALUES %s
|
||||
"""
|
||||
|
||||
batch = []
|
||||
inserted = 0
|
||||
skipped_not_in_inventory = 0
|
||||
total = len(face_rows)
|
||||
|
||||
for i, row in enumerate(face_rows):
|
||||
local_path = map_path(row["originalPath"])
|
||||
if local_path is None:
|
||||
continue
|
||||
|
||||
if local_path not in inventory_paths:
|
||||
skipped_not_in_inventory += 1
|
||||
continue
|
||||
|
||||
face_count = int(row["face_count"])
|
||||
|
||||
batch.append((
|
||||
local_path,
|
||||
True,
|
||||
"Eva Longoria",
|
||||
1.0,
|
||||
face_count,
|
||||
"immich_import",
|
||||
))
|
||||
|
||||
if len(batch) >= BATCH_SIZE:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
print(f" Progress: {i + 1:,}/{total:,} processed, {inserted:,} inserted")
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
psycopg2.extras.execute_values(
|
||||
cur, insert_sql, batch,
|
||||
template="(%s, %s, %s, %s, %s, %s)",
|
||||
)
|
||||
inserted += cur.rowcount
|
||||
app_conn.commit()
|
||||
|
||||
cur.close()
|
||||
print(f"\n DONE: {inserted:,} face scans inserted")
|
||||
print(f" Skipped: {skipped_not_in_inventory:,} (not in file_inventory)")
|
||||
return inserted
|
||||
|
||||
|
||||
def verify(app_conn):
|
||||
"""Print verification counts."""
|
||||
print("\n" + "=" * 60)
|
||||
print("Verification")
|
||||
print("=" * 60)
|
||||
|
||||
cur = app_conn.cursor()
|
||||
|
||||
# file_inventory counts
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%'")
|
||||
el_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/elv/%'")
|
||||
elv_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("""
|
||||
SELECT location, COUNT(*)
|
||||
FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
GROUP BY location
|
||||
""")
|
||||
location_counts = dict(cur.fetchall())
|
||||
|
||||
cur.execute("""
|
||||
SELECT platform, COUNT(*)
|
||||
FROM file_inventory
|
||||
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
||||
GROUP BY platform
|
||||
ORDER BY 2 DESC
|
||||
""")
|
||||
platform_counts = cur.fetchall()
|
||||
|
||||
# face_recognition_scans counts
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
||||
face_count = cur.fetchone()[0]
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM face_recognition_scans")
|
||||
total_face_scans = cur.fetchone()[0]
|
||||
|
||||
# Total file_inventory
|
||||
cur.execute("SELECT COUNT(*) FROM file_inventory")
|
||||
total_inventory = cur.fetchone()[0]
|
||||
|
||||
cur.close()
|
||||
|
||||
print(f"\n file_inventory:")
|
||||
print(f" /opt/immich/el/* (evalongoria): {el_count:,}")
|
||||
print(f" /opt/immich/elv/* (elvideo): {elv_count:,}")
|
||||
print(f" Total new: {el_count + elv_count:,}")
|
||||
print(f" By location: {dict(location_counts)}")
|
||||
print(f" By platform:")
|
||||
for platform, count in platform_counts:
|
||||
print(f" {platform:12s}: {count:,}")
|
||||
|
||||
print(f"\n face_recognition_scans:")
|
||||
print(f" immich_import: {face_count:,}")
|
||||
print(f" Total scans: {total_face_scans:,}")
|
||||
|
||||
print(f"\n Total file_inventory rows: {total_inventory:,}")
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
print("Immich → file_inventory migration")
|
||||
print("=" * 60)
|
||||
|
||||
# Test Immich connection
|
||||
print("Testing Immich database connection...")
|
||||
test = immich_query("SELECT COUNT(*) FROM asset")
|
||||
print(f" Immich has {int(test):,} assets")
|
||||
|
||||
# Connect to app database
|
||||
print("Connecting to app database...")
|
||||
app_conn = psycopg2.connect(APP_DB_DSN)
|
||||
|
||||
try:
|
||||
assets_inserted = migrate_assets(app_conn)
|
||||
faces_inserted = migrate_face_detections(app_conn)
|
||||
verify(app_conn)
|
||||
finally:
|
||||
app_conn.close()
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"\nCompleted in {elapsed:.1f}s")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user