#!/usr/bin/env python3 """ Retroactive Face Recognition Scanner Scans existing files in a directory and moves unmatched files to review queue while storing their original destination paths for later restoration. """ import os import sys import json import shutil from pathlib import Path # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Bootstrap PostgreSQL adapter before any database imports from modules.db_bootstrap import bootstrap_database bootstrap_database() from modules.face_recognition_module import FaceRecognitionModule from modules.unified_database import UnifiedDatabase from modules.settings_manager import SettingsManager # Configuration SCAN_BASE_DIR = "/opt/immich/md" REVIEW_DIR = "/opt/immich/review" DATABASE_PATH = "/opt/media-downloader/database/media_downloader.db" # Supported file extensions IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'} VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'} SUPPORTED_EXTENSIONS = IMAGE_EXTENSIONS | VIDEO_EXTENSIONS class RetroactiveFaceScanner: def __init__(self, scan_only=False): self.db = UnifiedDatabase() self.settings_manager = SettingsManager(DATABASE_PATH) self.face_module = FaceRecognitionModule(unified_db=self.db) self.scan_only = scan_only self.stats = { 'total_files': 0, 'matched': 0, 'unmatched': 0, 'errors': 0, 'skipped': 0 } def get_relative_path(self, full_path): """Get path relative to SCAN_BASE_DIR""" try: return os.path.relpath(full_path, SCAN_BASE_DIR) except ValueError: return full_path def scan_directory(self, directory): """Recursively scan directory for media files""" print(f"\nšŸ” Scanning directory: {directory}") print("=" * 70) for root, dirs, files in os.walk(directory): for filename in files: file_path = os.path.join(root, filename) file_ext = os.path.splitext(filename)[1].lower() if file_ext not in SUPPORTED_EXTENSIONS: continue self.stats['total_files'] += 1 self.process_file(file_path, file_ext in VIDEO_EXTENSIONS) self.print_stats() def process_file(self, file_path, is_video): """Process a single file with face recognition""" filename = os.path.basename(file_path) relative_path = self.get_relative_path(os.path.dirname(file_path)) print(f"\n[{self.stats['total_files']}] {filename}") print(f" Location: {relative_path}") try: # Get face recognition settings settings = self.settings_manager.get('face_recognition', {}) if not settings.get('enabled', False): print(" ⚠ Face recognition is disabled in settings") self.stats['skipped'] += 1 return tolerance = settings.get('tolerance', 0.6) # Check for faces print(f" šŸ” Checking for faces (tolerance: {tolerance})...") result = self.face_module.check_image(file_path, tolerance=tolerance, is_video=is_video) # Log scan result to database try: self.db.log_face_recognition_scan( file_path=file_path, has_match=result.get('has_match', False), matched_person=result.get('person_name'), confidence=result.get('confidence'), face_count=result.get('face_count', 0), scan_type='retroactive' ) except Exception as db_err: print(f" ⚠ Warning: Failed to log to database: {db_err}") if result.get('has_match'): person_name = result.get('person_name', 'Unknown') confidence = result.get('confidence', 0) print(f" āœ“ MATCH: {person_name} (confidence: {confidence:.2%})") self.stats['matched'] += 1 else: if self.scan_only: print(f" āœ— NO MATCH (scan-only mode, not moving file)") self.stats['unmatched'] += 1 else: print(f" āœ— NO MATCH - Moving to review queue...") self.move_to_review(file_path, file_path) # Pass full path as original path self.stats['unmatched'] += 1 except Exception as e: print(f" āœ— ERROR: {str(e)}") self.stats['errors'] += 1 def move_to_review(self, file_path, original_path): """Move file to review queue and update database with intended_path""" try: from pathlib import Path # Maintain directory structure in review queue base_path = Path(SCAN_BASE_DIR) file_path_obj = Path(file_path) if file_path_obj.is_relative_to(base_path): # Get relative path from base relative_path = file_path_obj.relative_to(base_path) # Recreate under review directory review_path = Path(REVIEW_DIR) / relative_path else: # Fallback to flat structure if not under base path review_path = Path(REVIEW_DIR) / file_path_obj.name # Ensure parent directory exists review_path.parent.mkdir(parents=True, exist_ok=True) # Move file shutil.move(file_path, str(review_path)) # Update database entry with new review path and store intended_path in metadata with self.db.get_connection(for_write=True) as conn: cursor = conn.cursor() # Find the download entry for this file cursor.execute('SELECT id, metadata FROM downloads WHERE file_path = ?', (file_path,)) row = cursor.fetchone() if row: download_id = row['id'] existing_metadata = json.loads(row['metadata']) if row['metadata'] else {} # Add intended_path to metadata existing_metadata['intended_path'] = file_path # Update the download record with new review path and metadata cursor.execute(''' UPDATE downloads SET file_path = ?, metadata = ? WHERE id = ? ''', (str(review_path), json.dumps(existing_metadata), download_id)) print(f" → Moved to: {review_path}") print(f" → Original path stored in database: {file_path}") else: print(f" ⚠ Warning: No database entry found for {file_path}") print(f" → Moved to: {review_path} (not tracked in database)") except Exception as e: print(f" āœ— Failed to move file: {e}") raise def print_stats(self): """Print final statistics""" print("\n" + "=" * 70) print("šŸ“Š SCAN COMPLETE") print("=" * 70) print(f"Total files scanned: {self.stats['total_files']}") print(f"āœ“ Matched: {self.stats['matched']}") print(f"āœ— Unmatched (moved): {self.stats['unmatched']}") print(f"⚠ Errors: {self.stats['errors']}") print(f"⊘ Skipped: {self.stats['skipped']}") print("=" * 70) def main(): import argparse parser = argparse.ArgumentParser( description='Retroactively scan existing files with face recognition', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Scan social media directory python3 scripts/retroactive_face_scan.py "social media" # Scan specific subdirectory python3 scripts/retroactive_face_scan.py "social media/instagram" # Scan with full path python3 scripts/retroactive_face_scan.py "/opt/immich/md/social media" Note: Original paths are stored in the database metadata as 'intended_path'. Use the Review UI to keep/delete/add reference to moved files. """ ) parser.add_argument( 'directory', help='Directory to scan (relative to /opt/immich/md or absolute path)' ) parser.add_argument( '--dry-run', action='store_true', help='Show what would be done without moving files' ) parser.add_argument( '--scan-only', action='store_true', help='Scan and update database only - do not move unmatched files to review' ) args = parser.parse_args() # Determine scan directory if os.path.isabs(args.directory): scan_dir = args.directory else: scan_dir = os.path.join(SCAN_BASE_DIR, args.directory) if not os.path.exists(scan_dir): print(f"āœ— Error: Directory does not exist: {scan_dir}") sys.exit(1) if not os.path.isdir(scan_dir): print(f"āœ— Error: Not a directory: {scan_dir}") sys.exit(1) if args.dry_run: print("šŸ” DRY RUN MODE - No files will be moved") print("=" * 70) print(f"\nšŸŽÆ Retroactive Face Recognition Scan") print(f"Scan directory: {scan_dir}") print(f"Review queue: {REVIEW_DIR}") # Confirm response = input("\nContinue? (y/n): ") if response.lower() != 'y': print("Cancelled.") sys.exit(0) # Run scan scanner = RetroactiveFaceScanner(scan_only=args.scan_only) if args.scan_only: print("šŸ” SCAN-ONLY MODE - Files will NOT be moved to review") print("=" * 70) if args.dry_run: # TODO: Implement dry run mode print("\n⚠ Dry run mode not yet implemented") sys.exit(1) else: scanner.scan_directory(scan_dir) print(f"\nāœ“ Scan complete!") if args.scan_only: print(f"\nšŸ“ Scan-only mode: Database updated with face recognition results.") print(f"No files were moved. Use the GUI to filter by 'Not Scanned' or 'No Match'.") else: print(f"\nUnmatched files have been moved to: {REVIEW_DIR}") print(f"Use the Review UI at http://your-server:5173/review to process them.") print(f"\nOriginal paths stored in database metadata.") if __name__ == '__main__': main()