Initial commit

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Todd
2026-03-29 22:42:55 -04:00
commit 0d7b2b1aab
389 changed files with 280296 additions and 0 deletions

View File

@@ -0,0 +1,433 @@
#!/usr/bin/env python3
"""
Migrate Immich assets into file_inventory and face_recognition_scans.
Connects to Immich PostgreSQL (via docker exec) and app PostgreSQL directly.
Idempotent — safe to re-run. Uses ON CONFLICT DO NOTHING for file_inventory
and checks for existing immich_import scans before inserting face data.
Path mapping:
/mnt/media/evalongoria/ → /opt/immich/el/
/mnt/media/elvideo/ → /opt/immich/elv/
/mnt/media/md/ → SKIPPED (already in file_inventory)
Platform inference from subdirectories:
evalongoria: IG→instagram, TT→tiktok, X→twitter, Discord→discord,
Flickr→flickr, rest→unknown
elvideo: YT→youtube, rest→unknown
"""
import subprocess
import sys
import time
import psycopg2
import psycopg2.extras
# ── Configuration ──────────────────────────────────────────────────────────
APP_DB_DSN = "postgresql://media_downloader:PNsihOXvvuPwWiIvGlsc9Fh2YmMmB@localhost/media_downloader"
IMMICH_CONTAINER = "immich_postgres"
IMMICH_DB = "immich"
IMMICH_USER = "postgres"
BATCH_SIZE = 5000
EVA_PERSON_UUID = "0154270a-8c30-4fb7-b73b-3fb3acc49483"
# Path prefix replacements (Immich → local)
PATH_MAP = {
"/mnt/media/evalongoria/": "/opt/immich/el/",
"/mnt/media/elvideo/": "/opt/immich/elv/",
}
# Subdirectory → platform mapping for evalongoria
EVALONGORIA_PLATFORM_MAP = {
"IG": "instagram",
"TT": "tiktok",
"X": "twitter",
"Discord": "discord",
"Flickr": "flickr",
"SC": "unknown",
"Caps": "unknown",
"Clips": "unknown",
"CT": "unknown",
"HQ": "unknown",
"Misc": "unknown",
}
# Subdirectory → platform mapping for elvideo
ELVIDEO_PLATFORM_MAP = {
"YT": "youtube",
"Misc": "unknown",
}
# ── Immich DB helper ───────────────────────────────────────────────────────
def immich_query(sql):
"""Run a SQL query against Immich PostgreSQL via docker exec, return rows as dicts."""
cmd = [
"docker", "exec", IMMICH_CONTAINER,
"psql", "-U", IMMICH_USER, "-d", IMMICH_DB,
"-t", "-A", "-F", "\x1f", # tuples-only, unaligned, unit-separator delimiter
"-c", sql,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
if result.returncode != 0:
print(f"ERROR running Immich query: {result.stderr}", file=sys.stderr)
sys.exit(1)
return result.stdout.strip()
def immich_query_rows(sql, columns):
"""Run query, parse into list of dicts with given column names."""
raw = immich_query(sql)
if not raw:
return []
rows = []
for line in raw.split("\n"):
if not line.strip():
continue
fields = line.split("\x1f")
if len(fields) != len(columns):
continue
rows.append(dict(zip(columns, fields)))
return rows
# ── Path & platform helpers ────────────────────────────────────────────────
def map_path(immich_path):
"""Convert Immich path to local path. Returns None for /mnt/media/md/ paths."""
for immich_prefix, local_prefix in PATH_MAP.items():
if immich_path.startswith(immich_prefix):
return local_prefix + immich_path[len(immich_prefix):]
return None # md/ or unknown prefix — skip
def infer_platform(immich_path):
"""Infer platform from Immich path based on subdirectory."""
if immich_path.startswith("/mnt/media/evalongoria/"):
remainder = immich_path[len("/mnt/media/evalongoria/"):]
# Check if first component is a known subdirectory
first_component = remainder.split("/")[0] if "/" in remainder else None
if first_component and first_component in EVALONGORIA_PLATFORM_MAP:
return EVALONGORIA_PLATFORM_MAP[first_component]
return "unknown"
elif immich_path.startswith("/mnt/media/elvideo/"):
remainder = immich_path[len("/mnt/media/elvideo/"):]
first_component = remainder.split("/")[0] if "/" in remainder else None
if first_component and first_component in ELVIDEO_PLATFORM_MAP:
return ELVIDEO_PLATFORM_MAP[first_component]
return "unknown"
return "unknown"
def infer_content_type(asset_type):
"""Map Immich asset type to content_type."""
if asset_type == "IMAGE":
return "image"
elif asset_type == "VIDEO":
return "video"
return "unknown"
# ── Main migration ─────────────────────────────────────────────────────────
def migrate_assets(app_conn):
"""Fetch assets from Immich and insert into file_inventory."""
print("=" * 60)
print("Phase 1: Migrating Immich assets → file_inventory")
print("=" * 60)
# Fetch all evalongoria + elvideo assets from Immich
sql = """
SELECT
a.id::text,
a."originalPath",
a."originalFileName",
a.type,
a."fileCreatedAt"::text,
a."deletedAt"::text,
a.width::text,
a.height::text,
encode(a.checksum, 'hex') as file_hash,
COALESCE(e."fileSizeInByte"::text, '') as file_size
FROM asset a
LEFT JOIN asset_exif e ON a.id = e."assetId"
WHERE (a."originalPath" LIKE '/mnt/media/evalongoria/%'
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
ORDER BY a."fileCreatedAt"
"""
print("Fetching assets from Immich...")
columns = [
"id", "originalPath", "originalFileName", "type",
"fileCreatedAt", "deletedAt", "width", "height",
"file_hash", "file_size",
]
rows = immich_query_rows(sql, columns)
total = len(rows)
print(f" Found {total:,} assets to process")
# Prepare and batch-insert
inserted = 0
skipped = 0
batch = []
cur = app_conn.cursor()
insert_sql = """
INSERT INTO file_inventory
(file_path, filename, platform, source, content_type,
file_size, file_hash, width, height, location, created_date)
VALUES %s
ON CONFLICT (file_path) DO NOTHING
"""
for i, row in enumerate(rows):
local_path = map_path(row["originalPath"])
if local_path is None:
skipped += 1
continue
platform = infer_platform(row["originalPath"])
content_type = infer_content_type(row["type"])
location = "recycle" if row["deletedAt"] else "final"
width = int(row["width"]) if row["width"] else None
height = int(row["height"]) if row["height"] else None
file_size = int(row["file_size"]) if row["file_size"] else None
# Parse timestamp — strip timezone info for timestamp without time zone column
created_date = row["fileCreatedAt"]
if created_date:
# Remove timezone suffix like +00 or +00:00 for naive timestamp
created_date = created_date.replace("+00:00", "").replace("+00", "").strip()
batch.append((
local_path,
row["originalFileName"],
platform,
"evalongoria",
content_type,
file_size,
row["file_hash"],
width,
height,
location,
created_date if created_date else None,
))
if len(batch) >= BATCH_SIZE:
psycopg2.extras.execute_values(
cur, insert_sql, batch,
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
)
inserted += cur.rowcount
app_conn.commit()
processed = i + 1
print(f" Progress: {processed:,}/{total:,} processed, {inserted:,} inserted")
batch = []
# Final batch
if batch:
psycopg2.extras.execute_values(
cur, insert_sql, batch,
template="(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
)
inserted += cur.rowcount
app_conn.commit()
cur.close()
print(f"\n DONE: {inserted:,} rows inserted, {skipped:,} skipped (md/ paths)")
return inserted
def migrate_face_detections(app_conn):
"""Migrate Eva Longoria face detections from Immich → face_recognition_scans."""
print("\n" + "=" * 60)
print("Phase 2: Migrating face detections → face_recognition_scans")
print("=" * 60)
# First, check if we already ran this migration
cur = app_conn.cursor()
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
existing = cur.fetchone()[0]
if existing > 0:
print(f" Found {existing:,} existing immich_import scans — skipping face migration")
print(" (Delete existing immich_import scans first if you want to re-run)")
cur.close()
return 0
# Get distinct assets with Eva Longoria face + face count + path in one query
print("Fetching face detection data with paths from Immich...")
sql = f"""
SELECT
a."originalPath",
COUNT(*) as eva_faces
FROM asset_face af
JOIN asset a ON af."assetId" = a.id
WHERE af."personId" = '{EVA_PERSON_UUID}'
AND af."deletedAt" IS NULL
AND (a."originalPath" LIKE '/mnt/media/evalongoria/%'
OR a."originalPath" LIKE '/mnt/media/elvideo/%')
GROUP BY a."originalPath"
"""
columns = ["originalPath", "face_count"]
face_rows = immich_query_rows(sql, columns)
print(f" Found {len(face_rows):,} assets with Eva Longoria face detections")
# Build file_path lookup from file_inventory (for /opt/immich/el/ and /opt/immich/elv/ paths)
print("Building file_inventory lookup...")
cur.execute("""
SELECT file_path FROM file_inventory
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
""")
inventory_paths = set(row[0] for row in cur.fetchall())
print(f" {len(inventory_paths):,} paths in file_inventory for el/elv")
# Prepare face scan inserts
insert_sql = """
INSERT INTO face_recognition_scans
(file_path, has_match, matched_person, confidence, face_count, scan_type)
VALUES %s
"""
batch = []
inserted = 0
skipped_not_in_inventory = 0
total = len(face_rows)
for i, row in enumerate(face_rows):
local_path = map_path(row["originalPath"])
if local_path is None:
continue
if local_path not in inventory_paths:
skipped_not_in_inventory += 1
continue
face_count = int(row["face_count"])
batch.append((
local_path,
True,
"Eva Longoria",
1.0,
face_count,
"immich_import",
))
if len(batch) >= BATCH_SIZE:
psycopg2.extras.execute_values(
cur, insert_sql, batch,
template="(%s, %s, %s, %s, %s, %s)",
)
inserted += cur.rowcount
app_conn.commit()
print(f" Progress: {i + 1:,}/{total:,} processed, {inserted:,} inserted")
batch = []
if batch:
psycopg2.extras.execute_values(
cur, insert_sql, batch,
template="(%s, %s, %s, %s, %s, %s)",
)
inserted += cur.rowcount
app_conn.commit()
cur.close()
print(f"\n DONE: {inserted:,} face scans inserted")
print(f" Skipped: {skipped_not_in_inventory:,} (not in file_inventory)")
return inserted
def verify(app_conn):
"""Print verification counts."""
print("\n" + "=" * 60)
print("Verification")
print("=" * 60)
cur = app_conn.cursor()
# file_inventory counts
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/el/%'")
el_count = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM file_inventory WHERE file_path LIKE '/opt/immich/elv/%'")
elv_count = cur.fetchone()[0]
cur.execute("""
SELECT location, COUNT(*)
FROM file_inventory
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
GROUP BY location
""")
location_counts = dict(cur.fetchall())
cur.execute("""
SELECT platform, COUNT(*)
FROM file_inventory
WHERE file_path LIKE '/opt/immich/el/%' OR file_path LIKE '/opt/immich/elv/%'
GROUP BY platform
ORDER BY 2 DESC
""")
platform_counts = cur.fetchall()
# face_recognition_scans counts
cur.execute("SELECT COUNT(*) FROM face_recognition_scans WHERE scan_type = 'immich_import'")
face_count = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM face_recognition_scans")
total_face_scans = cur.fetchone()[0]
# Total file_inventory
cur.execute("SELECT COUNT(*) FROM file_inventory")
total_inventory = cur.fetchone()[0]
cur.close()
print(f"\n file_inventory:")
print(f" /opt/immich/el/* (evalongoria): {el_count:,}")
print(f" /opt/immich/elv/* (elvideo): {elv_count:,}")
print(f" Total new: {el_count + elv_count:,}")
print(f" By location: {dict(location_counts)}")
print(f" By platform:")
for platform, count in platform_counts:
print(f" {platform:12s}: {count:,}")
print(f"\n face_recognition_scans:")
print(f" immich_import: {face_count:,}")
print(f" Total scans: {total_face_scans:,}")
print(f"\n Total file_inventory rows: {total_inventory:,}")
def main():
start = time.time()
print("Immich → file_inventory migration")
print("=" * 60)
# Test Immich connection
print("Testing Immich database connection...")
test = immich_query("SELECT COUNT(*) FROM asset")
print(f" Immich has {int(test):,} assets")
# Connect to app database
print("Connecting to app database...")
app_conn = psycopg2.connect(APP_DB_DSN)
try:
assets_inserted = migrate_assets(app_conn)
faces_inserted = migrate_face_detections(app_conn)
verify(app_conn)
finally:
app_conn.close()
elapsed = time.time() - start
print(f"\nCompleted in {elapsed:.1f}s")
if __name__ == "__main__":
main()