434 lines
14 KiB
Python
Executable File
434 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Migrate Immich assets into file_inventory and face_recognition_scans.
|
|
|
|
Connects to Immich PostgreSQL (via docker exec) and app PostgreSQL directly.
|
|
Idempotent — safe to re-run. Uses ON CONFLICT DO NOTHING for file_inventory
|
|
and checks for existing immich_import scans before inserting face data.
|
|
|
|
Path mapping:
|
|
/mnt/media/evalongoria/ → /opt/immich/el/
|
|
/mnt/media/elvideo/ → /opt/immich/elv/
|
|
/mnt/media/md/ → SKIPPED (already in file_inventory)
|
|
|
|
Platform inference from subdirectories:
|
|
evalongoria: IG→instagram, TT→tiktok, X→twitter, Discord→discord,
|
|
Flickr→flickr, rest→unknown
|
|
elvideo: YT→youtube, rest→unknown
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
|
|
# ── Configuration ──────────────────────────────────────────────────────────
|
|
|
|
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
|
|
IMMICH_CONTAINER = "immich_postgres"
|
|
IMMICH_DB = "immich"
|
|
IMMICH_USER = "postgres"
|
|
|
|
BATCH_SIZE = 5000
|
|
|
|
EVA_PERSON_UUID = "0154270a-8c30-4fb7-b73b-3fb3acc49483"
|
|
|
|
# Path prefix replacements (Immich → local)
|
|
PATH_MAP = {
|
|
"/mnt/media/evalongoria/": "/opt/immich/el/",
|
|
"/mnt/media/elvideo/": "/opt/immich/elv/",
|
|
}
|
|
|
|
# Subdirectory → platform mapping for evalongoria
|
|
EVALONGORIA_PLATFORM_MAP = {
|
|
"IG": "instagram",
|
|
"TT": "tiktok",
|
|
"X": "twitter",
|
|
"Discord": "discord",
|
|
"Flickr": "flickr",
|
|
"SC": "unknown",
|
|
"Caps": "unknown",
|
|
"Clips": "unknown",
|
|
"CT": "unknown",
|
|
"HQ": "unknown",
|
|
"Misc": "unknown",
|
|
}
|
|
|
|
# Subdirectory → platform mapping for elvideo
|
|
ELVIDEO_PLATFORM_MAP = {
|
|
"YT": "youtube",
|
|
"Misc": "unknown",
|
|
}
|
|
|
|
|
|
# ── Immich DB helper ───────────────────────────────────────────────────────
|
|
|
|
def immich_query(sql):
|
|
"""Run a SQL query against Immich PostgreSQL via docker exec, return rows as dicts."""
|
|
cmd = [
|
|
"docker", "exec", IMMICH_CONTAINER,
|
|
"psql", "-U", IMMICH_USER, "-d", IMMICH_DB,
|
|
"-t", "-A", "-F", "\x1f", # tuples-only, unaligned, unit-separator delimiter
|
|
"-c", sql,
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
if result.returncode != 0:
|
|
print(f"ERROR running Immich query: {result.stderr}", file=sys.stderr)
|
|
sys.exit(1)
|
|
return result.stdout.strip()
|
|
|
|
|
|
def immich_query_rows(sql, columns):
|
|
"""Run query, parse into list of dicts with given column names."""
|
|
raw = immich_query(sql)
|
|
if not raw:
|
|
return []
|
|
rows = []
|
|
for line in raw.split("\n"):
|
|
if not line.strip():
|
|
continue
|
|
fields = line.split("\x1f")
|
|
if len(fields) != len(columns):
|
|
continue
|
|
rows.append(dict(zip(columns, fields)))
|
|
return rows
|
|
|
|
|
|
# ── Path & platform helpers ────────────────────────────────────────────────
|
|
|
|
def map_path(immich_path):
|
|
"""Convert Immich path to local path. Returns None for /mnt/media/md/ paths."""
|
|
for immich_prefix, local_prefix in PATH_MAP.items():
|
|
if immich_path.startswith(immich_prefix):
|
|
return local_prefix + immich_path[len(immich_prefix):]
|
|
return None # md/ or unknown prefix — skip
|
|
|
|
|
|
def infer_platform(immich_path):
|
|
"""Infer platform from Immich path based on subdirectory."""
|
|
if immich_path.startswith("/mnt/media/evalongoria/"):
|
|
remainder = immich_path[len("/mnt/media/evalongoria/"):]
|
|
# Check if first component is a known subdirectory
|
|
first_component = remainder.split("/")[0] if "/" in remainder else None
|
|
if first_component and first_component in EVALONGORIA_PLATFORM_MAP:
|
|
return EVALONGORIA_PLATFORM_MAP[first_component]
|
|
return "unknown"
|
|
elif immich_path.startswith("/mnt/media/elvideo/"):
|
|
remainder = immich_path[len("/mnt/media/elvideo/"):]
|
|
first_component = remainder.split("/")[0] if "/" in remainder else None
|
|
if first_component and first_component in ELVIDEO_PLATFORM_MAP:
|
|
return ELVIDEO_PLATFORM_MAP[first_component]
|
|
return "unknown"
|
|
return "unknown"
|
|
|
|
|
|
def infer_content_type(asset_type):
|
|
"""Map Immich asset type to content_type."""
|
|
if asset_type == "IMAGE":
|
|
return "image"
|
|
elif asset_type == "VIDEO":
|
|
return "video"
|
|
return "unknown"
|
|
|
|
|
|
# ── Main migration ─────────────────────────────────────────────────────────
|
|
|
|
def migrate_assets(app_conn):
|
|
"""Fetch assets from Immich and insert into file_inventory."""
|
|
print("=" * 60)
|
|
print("Phase 1: Migrating Immich assets → file_inventory")
|
|
print("=" * 60)
|
|
|
|
# Fetch all evalongoria + elvideo assets from Immich
|
|
sql = """
|
|
SELECT
|
|
a.id::text,
|
|
a."originalPath",
|
|
a."originalFileName",
|
|
a.type,
|
|
a."fileCreatedAt"::text,
|
|
a."deletedAt"::text,
|
|
a.width::text,
|
|
a.height::text,
|
|
encode(a.checksum, 'hex') as file_hash,
|
|
COALESCE(e."fileSizeInByte"::text, '') as file_size
|
|
FROM asset a
|
|
LEFT JOIN asset_exif e ON a.id = e."assetId"
|
|
WHERE (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
|
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
|
ORDER BY a."fileCreatedAt"
|
|
"""
|
|
|
|
print("Fetching assets from Immich...")
|
|
columns = [
|
|
"id", "originalPath", "originalFileName", "type",
|
|
"fileCreatedAt", "deletedAt", "width", "height",
|
|
"file_hash", "file_size",
|
|
]
|
|
rows = immich_query_rows(sql, columns)
|
|
total = len(rows)
|
|
print(f" Found {total:,} assets to process")
|
|
|
|
# Prepare and batch-insert
|
|
inserted = 0
|
|
skipped = 0
|
|
batch = []
|
|
|
|
cur = app_conn.cursor()
|
|
|
|
insert_sql = """
|
|
INSERT INTO file_inventory
|
|
(file_path, filename, platform, source, content_type,
|
|
file_size, file_hash, width, height, location, created_date)
|
|
VALUES %s
|
|
ON CONFLICT (file_path) DO NOTHING
|
|
"""
|
|
|
|
for i, row in enumerate(rows):
|
|
local_path = map_path(row["originalPath"])
|
|
if local_path is None:
|
|
skipped += 1
|
|
continue
|
|
|
|
platform = infer_platform(row["originalPath"])
|
|
content_type = infer_content_type(row["type"])
|
|
location = "recycle" if row["deletedAt"] else "final"
|
|
|
|
width = int(row["width"]) if row["width"] else None
|
|
height = int(row["height"]) if row["height"] else None
|
|
file_size = int(row["file_size"]) if row["file_size"] else None
|
|
|
|
# Parse timestamp — strip timezone info for timestamp without time zone column
|
|
created_date = row["fileCreatedAt"]
|
|
if created_date:
|
|
# Remove timezone suffix like +00 or +00:00 for naive timestamp
|
|
created_date = created_date.replace("+00:00", "").replace("+00", "").strip()
|
|
|
|
batch.append((
|
|
local_path,
|
|
row["originalFileName"],
|
|
platform,
|
|
"evalongoria",
|
|
content_type,
|
|
file_size,
|
|
row["file_hash"],
|
|
width,
|
|
height,
|
|
location,
|
|
created_date if created_date else None,
|
|
))
|
|
|
|
if len(batch) >= BATCH_SIZE:
|
|
psycopg2.extras.execute_values(
|
|
cur, insert_sql, batch,
|
|
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
|
)
|
|
inserted += cur.rowcount
|
|
app_conn.commit()
|
|
processed = i + 1
|
|
print(f" Progress: {processed:,}/{total:,} processed, {inserted:,} inserted")
|
|
batch = []
|
|
|
|
# Final batch
|
|
if batch:
|
|
psycopg2.extras.execute_values(
|
|
cur, insert_sql, batch,
|
|
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
|
)
|
|
inserted += cur.rowcount
|
|
app_conn.commit()
|
|
|
|
cur.close()
|
|
print(f"\n DONE: {inserted:,} rows inserted, {skipped:,} skipped (md/ paths)")
|
|
return inserted
|
|
|
|
|
|
def migrate_face_detections(app_conn):
|
|
"""Migrate Eva Longoria face detections from Immich → face_recognition_scans."""
|
|
print("\n" + "=" * 60)
|
|
print("Phase 2: Migrating face detections → face_recognition_scans")
|
|
print("=" * 60)
|
|
|
|
# First, check if we already ran this migration
|
|
cur = app_conn.cursor()
|
|
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
|
existing = cur.fetchone()[0]
|
|
if existing > 0:
|
|
print(f" Found {existing:,} existing immich_import scans — skipping face migration")
|
|
print(" (Delete existing immich_import scans first if you want to re-run)")
|
|
cur.close()
|
|
return 0
|
|
|
|
# Get distinct assets with Eva Longoria face + face count + path in one query
|
|
print("Fetching face detection data with paths from Immich...")
|
|
sql = f"""
|
|
SELECT
|
|
a."originalPath",
|
|
COUNT(*) as eva_faces
|
|
FROM asset_face af
|
|
JOIN asset a ON af."assetId" = a.id
|
|
WHERE af."personId" = '{EVA_PERSON_UUID}'
|
|
AND af."deletedAt" IS NULL
|
|
AND (a."originalPath" LIKE '/mnt/media/evalongoria/%'
|
|
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
|
|
GROUP BY a."originalPath"
|
|
"""
|
|
columns = ["originalPath", "face_count"]
|
|
face_rows = immich_query_rows(sql, columns)
|
|
print(f" Found {len(face_rows):,} assets with Eva Longoria face detections")
|
|
|
|
# Build file_path lookup from file_inventory (for /opt/immich/el/ and /opt/immich/elv/ paths)
|
|
print("Building file_inventory lookup...")
|
|
cur.execute("""
|
|
SELECT file_path FROM file_inventory
|
|
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
|
""")
|
|
inventory_paths = set(row[0] for row in cur.fetchall())
|
|
print(f" {len(inventory_paths):,} paths in file_inventory for el/elv")
|
|
|
|
# Prepare face scan inserts
|
|
insert_sql = """
|
|
INSERT INTO face_recognition_scans
|
|
(file_path, has_match, matched_person, confidence, face_count, scan_type)
|
|
VALUES %s
|
|
"""
|
|
|
|
batch = []
|
|
inserted = 0
|
|
skipped_not_in_inventory = 0
|
|
total = len(face_rows)
|
|
|
|
for i, row in enumerate(face_rows):
|
|
local_path = map_path(row["originalPath"])
|
|
if local_path is None:
|
|
continue
|
|
|
|
if local_path not in inventory_paths:
|
|
skipped_not_in_inventory += 1
|
|
continue
|
|
|
|
face_count = int(row["face_count"])
|
|
|
|
batch.append((
|
|
local_path,
|
|
True,
|
|
"Eva Longoria",
|
|
1.0,
|
|
face_count,
|
|
"immich_import",
|
|
))
|
|
|
|
if len(batch) >= BATCH_SIZE:
|
|
psycopg2.extras.execute_values(
|
|
cur, insert_sql, batch,
|
|
template="(%s, %s, %s, %s, %s, %s)",
|
|
)
|
|
inserted += cur.rowcount
|
|
app_conn.commit()
|
|
print(f" Progress: {i + 1:,}/{total:,} processed, {inserted:,} inserted")
|
|
batch = []
|
|
|
|
if batch:
|
|
psycopg2.extras.execute_values(
|
|
cur, insert_sql, batch,
|
|
template="(%s, %s, %s, %s, %s, %s)",
|
|
)
|
|
inserted += cur.rowcount
|
|
app_conn.commit()
|
|
|
|
cur.close()
|
|
print(f"\n DONE: {inserted:,} face scans inserted")
|
|
print(f" Skipped: {skipped_not_in_inventory:,} (not in file_inventory)")
|
|
return inserted
|
|
|
|
|
|
def verify(app_conn):
|
|
"""Print verification counts."""
|
|
print("\n" + "=" * 60)
|
|
print("Verification")
|
|
print("=" * 60)
|
|
|
|
cur = app_conn.cursor()
|
|
|
|
# file_inventory counts
|
|
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%'")
|
|
el_count = cur.fetchone()[0]
|
|
|
|
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/elv/%'")
|
|
elv_count = cur.fetchone()[0]
|
|
|
|
cur.execute("""
|
|
SELECT location, COUNT(*)
|
|
FROM file_inventory
|
|
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
|
GROUP BY location
|
|
""")
|
|
location_counts = dict(cur.fetchall())
|
|
|
|
cur.execute("""
|
|
SELECT platform, COUNT(*)
|
|
FROM file_inventory
|
|
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
|
|
GROUP BY platform
|
|
ORDER BY 2 DESC
|
|
""")
|
|
platform_counts = cur.fetchall()
|
|
|
|
# face_recognition_scans counts
|
|
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
|
|
face_count = cur.fetchone()[0]
|
|
|
|
cur.execute("SELECT COUNT(*) FROM face_recognition_scans")
|
|
total_face_scans = cur.fetchone()[0]
|
|
|
|
# Total file_inventory
|
|
cur.execute("SELECT COUNT(*) FROM file_inventory")
|
|
total_inventory = cur.fetchone()[0]
|
|
|
|
cur.close()
|
|
|
|
print(f"\n file_inventory:")
|
|
print(f" /opt/immich/el/* (evalongoria): {el_count:,}")
|
|
print(f" /opt/immich/elv/* (elvideo): {elv_count:,}")
|
|
print(f" Total new: {el_count + elv_count:,}")
|
|
print(f" By location: {dict(location_counts)}")
|
|
print(f" By platform:")
|
|
for platform, count in platform_counts:
|
|
print(f" {platform:12s}: {count:,}")
|
|
|
|
print(f"\n face_recognition_scans:")
|
|
print(f" immich_import: {face_count:,}")
|
|
print(f" Total scans: {total_face_scans:,}")
|
|
|
|
print(f"\n Total file_inventory rows: {total_inventory:,}")
|
|
|
|
|
|
def main():
|
|
start = time.time()
|
|
print("Immich → file_inventory migration")
|
|
print("=" * 60)
|
|
|
|
# Test Immich connection
|
|
print("Testing Immich database connection...")
|
|
test = immich_query("SELECT COUNT(*) FROM asset")
|
|
print(f" Immich has {int(test):,} assets")
|
|
|
|
# Connect to app database
|
|
print("Connecting to app database...")
|
|
app_conn = psycopg2.connect(APP_DB_DSN)
|
|
|
|
try:
|
|
assets_inserted = migrate_assets(app_conn)
|
|
faces_inserted = migrate_face_detections(app_conn)
|
|
verify(app_conn)
|
|
finally:
|
|
app_conn.close()
|
|
|
|
elapsed = time.time() - start
|
|
print(f"\nCompleted in {elapsed:.1f}s")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|