media-downloader/modules/move_module.py

#!/usr/bin/env python3
"""
Move Module - Handles file moving and timestamp management
"""

import os
import shutil
import time
import pwd
import grp
import gc
import sqlite3
from pathlib import Path
from datetime import datetime
from typing import Optional, List, Dict, Union, Any
from modules.base_module import LoggingMixin
from modules.universal_logger import get_logger

logger = get_logger('MoveManager')  # For module-level functions

# Import UnifiedDatabase for file hash deduplication
try:
    from .unified_database import UnifiedDatabase
except ImportError:
    try:
        from unified_database import UnifiedDatabase
    except ImportError:
        UnifiedDatabase = None
        logger.warning("UnifiedDatabase not available - file hash deduplication disabled")

# Import date utilities for EXIF timestamp updates
try:
    from modules.date_utils import DateHandler
    DATE_UTILS_AVAILABLE = True
except ImportError:
    try:
        from .date_utils import DateHandler
        DATE_UTILS_AVAILABLE = True
    except ImportError:
        DATE_UTILS_AVAILABLE = False
        logger.debug("DateHandler not available - EXIF updates disabled")


def _extract_exif_date(filepath: Path) -> Optional[datetime]:
    """Extract date from EXIF metadata using exiftool

    Checks DateTimeOriginal, CreateDate, then DateCreated in order of preference.
    Returns None if no valid date found.
    """
    import subprocess
    try:
        result = subprocess.run([
            'exiftool', '-s', '-s', '-s',
            '-DateTimeOriginal', '-CreateDate', '-DateCreated',
            str(filepath)
        ], capture_output=True, text=True, timeout=10)

        if result.returncode == 0 and result.stdout.strip():
            # exiftool returns dates in format "YYYY:MM:DD HH:MM:SS"
            for line in result.stdout.strip().split('\n'):
                date_str = line.strip()
                if date_str and date_str != '-':
                    try:
                        # Try parsing EXIF date format
                        return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
                    except ValueError:
                        try:
                            # Try alternate format without time
                            return datetime.strptime(date_str, "%Y:%m:%d")
                        except ValueError:
                            continue
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass
    except Exception as e:
        logger.debug(f"EXIF date extraction failed for {filepath}: {e}")
    return None


# Pushover notifier handled by scheduler/notification system

class MoveManager(LoggingMixin):
    """Manages file moves with proper timestamp handling and notifications"""

    def __init__(self, log_callback=None, notifier=None, unified_db=None, face_recognition_enabled=True, on_download_complete=None, event_emitter=None):
        """
        Initialize the MoveManager

        Args:
            log_callback: Optional callback function for logging (tag, level, message)
            notifier: Optional PushoverNotifier instance for batch notifications
            unified_db: Optional UnifiedDatabase instance for file hash deduplication
            face_recognition_enabled: Enable face recognition filtering (default: True)
            on_download_complete: Optional callback when downloads complete (platform, source, count)
            event_emitter: Optional ScraperEventEmitter instance for real-time scraping monitor
        """
        # Initialize logging via mixin
        self._init_logger('MoveManager', log_callback, default_module='Move')

        self.notifier = notifier
        self.unified_db = unified_db
        self.face_recognition_enabled = face_recognition_enabled
        self.on_download_complete = on_download_complete
        self.event_emitter = event_emitter
        self.current_session = {}  # Store session context for event emission
        self.face_module = None
        self.stats = {
            'moved': 0,
            'skipped': 0,
            'failed': 0,
            'duplicates': 0,  # Track files skipped due to duplicate content
            'review_queue': 0  # Track files moved to review queue (no face match)
        }
        # Track moved files for batch notifications
        self.batch_context = None
        self.moved_files = []
        self.review_queue_files = []  # Separate tracking for review queue items
        self.repost_queue = []  # Queue for repost detection processing (done after moves complete)
        self._last_move_had_face_recognition = False  # Tracks if last move_file ran face recognition

        # Background thread pool for non-blocking post-processing (thumbnails, dimensions)
        from concurrent.futures import ThreadPoolExecutor
        self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')

        # Initialize face recognition if enabled
        if self.face_recognition_enabled and self.unified_db:
            try:
                from modules.face_recognition_module import FaceRecognitionModule
                self.face_module = FaceRecognitionModule(
                    unified_db=self.unified_db,
                    log_callback=lambda msg, lvl: self.log(msg, lvl)
                )
                self.log("Face recognition module initialized", "info")
            except Exception as e:
                self.log(f"Failed to initialize face recognition: {e}", "warning")
                self.face_module = None

        # Cache face recognition settings (read once from DB, reused per file)
        self._face_recognition_tolerance = None
        self._review_path = None
        self._face_recognition_settings_cache = None  # Raw JSON settings blob
        self._video_face_settings_cache = None  # Parsed video settings dict

        # Initialize activity status manager for real-time updates
        from modules.activity_status import get_activity_manager
        self.activity_manager = get_activity_manager(unified_db)

        # Log database status for debugging hash deduplication
        if self.unified_db:
            self.log("MoveManager initialized with UnifiedDatabase - hash deduplication ENABLED", "info")
        else:
            self.log("MoveManager initialized WITHOUT UnifiedDatabase - hash deduplication DISABLED", "warning")

        if not UnifiedDatabase:
            self.log("UnifiedDatabase class not imported - hash deduplication DISABLED", "warning")

    def set_session_context(self, platform: str, account: str, session_id: str):
        """Set context for current scraping session for event emission

        Args:
            platform: Platform name (e.g., 'instagram', 'snapchat')
            account: Account/username being scraped
            session_id: Unique session identifier for this scraping run
        """
        self.current_session = {
            'platform': platform,
            'account': account,
            'session_id': session_id
        }
        self.log(f"Session context set: {platform}/{account} ({session_id})", "debug")

    def release_models(self):
        """
        Release ML models to free memory.
        Call this after batch processing to prevent OOM in long-running services.
        Models will be lazy-loaded again when needed.
        """
        if self.face_module is not None:
            self.face_module.release_model()
            self.log("Face recognition model released to free memory", "info")
        gc.collect()

    def _is_image_file(self, file_path: Path) -> bool:
        """Check if file is an image (not video)"""
        image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'}
        return file_path.suffix.lower() in image_extensions

    def _is_video_file(self, file_path: Path) -> bool:
        """Check if file is a video"""
        video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'}
        return file_path.suffix.lower() in video_extensions

    def _generate_thumbnail_cache(self, file_path: Path, content_hash: str = None) -> None:
        """
        Pre-generate and cache thumbnail for a file.
        This speeds up page loading by having thumbnails ready in the database.

        Uses content hash (SHA256 of file content) as cache key so thumbnails
        survive file moves (e.g., to recycle bin).

        Args:
            file_path: Path to the media file
            content_hash: Optional pre-computed SHA256 content hash
        """
        try:
            import hashlib
            import io
            import subprocess
            from PIL import Image

            thumb_db_path = Path('/opt/media-downloader/database/thumbnails.db')

            # Calculate content hash for cache key (survives file moves)
            if content_hash:
                file_hash = content_hash
            else:
                # Calculate SHA256 of file content
                sha256 = hashlib.sha256()
                with open(file_path, 'rb') as f:
                    for chunk in iter(lambda: f.read(65536), b''):
                        sha256.update(chunk)
                file_hash = sha256.hexdigest()

            file_mtime = file_path.stat().st_mtime

            # Check if already cached
            conn = sqlite3.connect(str(thumb_db_path), timeout=10.0)
            try:
                cursor = conn.cursor()
                cursor.execute("SELECT 1 FROM thumbnails WHERE file_hash = ?", (file_hash,))
                if cursor.fetchone():
                    return  # Already cached

                # Generate thumbnail
                thumbnail_data = None
                max_size = (300, 300)

                if self._is_video_file(file_path):
                    # Video thumbnail using ffmpeg
                    try:
                        result = subprocess.run([
                            'ffmpeg', '-i', str(file_path),
                            '-ss', '00:00:01.000', '-vframes', '1',
                            '-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
                        ], capture_output=True, timeout=10)

                        if result.returncode == 0 and result.stdout:
                            img = Image.open(io.BytesIO(result.stdout))
                            img.thumbnail(max_size, Image.Resampling.LANCZOS)
                            if img.mode != 'RGB':
                                img = img.convert('RGB')
                            buffer = io.BytesIO()
                            img.save(buffer, format='JPEG', quality=85)
                            thumbnail_data = buffer.getvalue()
                    except Exception as e:
                        self.log(f"Video thumbnail generation failed: {e}", "debug")

                elif self._is_image_file(file_path):
                    # Image thumbnail
                    try:
                        img = Image.open(file_path)
                        img.thumbnail(max_size, Image.Resampling.LANCZOS)

                        if img.mode in ('RGBA', 'LA', 'P'):
                            background = Image.new('RGB', img.size, (255, 255, 255))
                            if img.mode == 'P':
                                img = img.convert('RGBA')
                            background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
                            img = background

                        buffer = io.BytesIO()
                        img.save(buffer, format='JPEG', quality=85)
                        thumbnail_data = buffer.getvalue()
                    except Exception as e:
                        self.log(f"Image thumbnail generation failed: {e}", "debug")

                # Cache the thumbnail
                if thumbnail_data:
                    from datetime import datetime
                    cursor.execute("""
                        INSERT OR REPLACE INTO thumbnails
                        (file_hash, file_path, thumbnail_data, created_at, file_mtime)
                        VALUES (?, ?, ?, ?, ?)
                    """, (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
                    conn.commit()
                    self.log(f"Cached thumbnail: {file_path.name}", "debug")
            finally:
                conn.close()

        except Exception as e:
            self.log(f"Thumbnail cache generation failed for {file_path.name}: {e}", "debug")

    def _record_file_inventory_bg(self, destination: Path, source_name_file: str,
                                   platform: str, source_name: str,
                                   moved_to_review: bool, file_hash: str = None,
                                   timestamp: 'datetime' = None) -> None:
        """Record file in inventory table with dimensions (runs in background thread).

        Args:
            destination: Final destination path
            source_name_file: Original source filename
            platform: Platform name
            source_name: Source/username
            moved_to_review: Whether file was sent to review queue
            file_hash: Pre-computed file hash (or None)
            timestamp: Post date extracted from filename (or None to use current time)
        """
        try:
            location = 'review' if moved_to_review else 'final'
            content_type_val = 'image' if self._is_image_file(destination) else 'video'

            from datetime import datetime, timezone
            file_stat = destination.stat()
            if timestamp:
                inventory_created = timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
            else:
                inventory_created = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

            # Look up download method from downloads table
            method = None
            if platform == 'instagram':
                try:
                    with self.unified_db.get_connection() as conn:
                        cursor = conn.cursor()
                        cursor.execute('''
                            SELECT method FROM downloads
                            WHERE filename = ? AND platform = 'instagram'
                            LIMIT 1
                        ''', (source_name_file,))
                        row = cursor.fetchone()
                        if row and row[0]:
                            method = row[0]
                except Exception:
                    pass

            # Extract media dimensions
            width, height = None, None
            try:
                if content_type_val == 'image':
                    from PIL import Image
                    with Image.open(destination) as img:
                        width, height = img.size
                elif content_type_val == 'video':
                    import subprocess
                    result = subprocess.run(
                        ['ffprobe', '-v', 'error', '-select_streams', 'v:0',
                         '-show_entries', 'stream=width,height', '-of', 'csv=p=0',
                         str(destination)],
                        capture_output=True, text=True, timeout=10
                    )
                    if result.returncode == 0 and result.stdout.strip():
                        parts = result.stdout.strip().split(',')
                        if len(parts) >= 2:
                            width, height = int(parts[0]), int(parts[1])
            except Exception as e:
                self.log(f"Could not extract dimensions for {destination.name}: {e}", "debug")

            self.unified_db.upsert_file_inventory(
                file_path=str(destination),
                filename=destination.name,
                platform=platform,
                source=source_name,
                content_type=content_type_val,
                file_size=file_stat.st_size,
                file_hash=file_hash if file_hash else None,
                width=width,
                height=height,
                location=location,
                created_date=inventory_created,
                method=method
            )
            self.log(f"Recorded in file_inventory: {destination.name} (location={location})", "debug")

            # Queue for discovery scans (embedding) for files going to final
            if location == 'final':
                try:
                    self.unified_db.queue_file_for_discovery(
                        str(destination),
                        scan_types=['embedding'],
                        priority=5
                    )
                    self.log(f"Queued {destination.name} for discovery scans", "debug")
                except Exception as e:
                    self.log(f"Failed to queue for discovery: {e}", "debug")

        except Exception as e:
            self.log(f"Failed to record in file_inventory: {e}", "debug")

    def _get_face_recognition_settings(self) -> dict:
        """Get face recognition settings from database (cached per session).

        Returns:
            dict: Face recognition settings, or empty dict if unavailable.
        """
        if self._face_recognition_settings_cache is not None:
            return self._face_recognition_settings_cache

        if self.unified_db:
            try:
                import json
                with self.unified_db.get_connection() as conn:
                    cursor = conn.cursor()
                    cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
                    result = cursor.fetchone()
                    if result:
                        self._face_recognition_settings_cache = json.loads(result[0])
                        return self._face_recognition_settings_cache
            except Exception as e:
                self.log(f"Failed to read face recognition settings: {e}", "debug")

        self._face_recognition_settings_cache = {}
        return self._face_recognition_settings_cache

    def _get_face_recognition_tolerance(self, is_video: bool = False, source: str = None) -> float:
        """Get face recognition tolerance from database settings

        Args:
            is_video: If True, get video_tolerance instead of image tolerance
            source: Optional source username (e.g., 'evalongoria') for source-based tolerance

        Returns:
            float: Tolerance value (0.0-1.0), defaults to 0.15 for images, 0.30 for videos
        """
        settings = self._get_face_recognition_settings()

        if settings:
            # Get base tolerance (video or image)
            if is_video:
                base_tolerance = float(settings.get('video_tolerance', settings.get('tolerance', 0.30)))
            else:
                base_tolerance = float(settings.get('tolerance', 0.15))

            # Check for source-specific tolerance
            if source:
                source_tolerances = settings.get('source_tolerances', {})
                if source in source_tolerances:
                    source_tolerance = float(source_tolerances[source])
                    if is_video:
                        tolerance = max(source_tolerance, base_tolerance)
                        self.log(f"Using max of source ({source_tolerance}) and video ({base_tolerance}) tolerance for '{source}': {tolerance}", "debug")
                    else:
                        tolerance = source_tolerance
                        self.log(f"Using source-based tolerance for '{source}': {tolerance}", "debug")
                    return tolerance

            return base_tolerance

        # Default: 0.15 for images, 0.30 for videos
        return 0.30 if is_video else 0.15

    def _get_review_path(self) -> str:
        """Get review path from database settings

        Returns:
            str: Review path, defaults to '/opt/immich/review' if not set
        """
        # Return cached value if available
        if self._review_path is not None:
            return self._review_path

        # Try to read from database
        if self.unified_db:
            try:
                import json
                with self.unified_db.get_connection() as conn:
                    cursor = conn.cursor()
                    cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
                    result = cursor.fetchone()
                    if result:
                        settings = json.loads(result[0])
                        review_path = settings.get('review_path', '/opt/immich/review')
                        self._review_path = review_path
                        self.log(f"Using review path: {review_path}", "debug")
                        return review_path
            except Exception as e:
                self.log(f"Failed to read review path from database: {e}", "debug")

        # Default to /opt/immich/review if not found
        self._review_path = '/opt/immich/review'
        return '/opt/immich/review'

    def _get_video_face_recognition_settings(self) -> dict:
        """Get video face recognition settings from database (cached per session).

        Returns:
            dict: Settings with enable_video_recognition, video_face_frames, frame_positions
        """
        if self._video_face_settings_cache is not None:
            return self._video_face_settings_cache

        default_settings = {
            'enable_video_recognition': False,
            'video_face_frames': 3,
            'frame_positions': [0.1, 0.5, 0.9]
        }

        settings = self._get_face_recognition_settings()
        if settings:
            self._video_face_settings_cache = {
                'enable_video_recognition': bool(settings.get('enable_video_recognition', False)),
                'video_face_frames': int(settings.get('video_face_frames', 3)),
                'frame_positions': settings.get('frame_positions', [0.1, 0.5, 0.9])
            }
        else:
            self._video_face_settings_cache = default_settings

        return self._video_face_settings_cache

    def _apply_ownership(self, file_path: Path) -> None:
        """Apply configured file ownership to moved files

        Args:
            file_path: Path to file to change ownership
        """
        if not self.unified_db:
            return

        try:
            # Get ownership settings from database
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
                result = cursor.fetchone()

                if not result:
                    return  # No ownership configured

                import json
                settings = json.loads(result[0])

                # Check if ownership is enabled
                if not settings.get('enabled', False):
                    return

                owner = settings.get('owner', '').strip()
                group = settings.get('group', '').strip()

                if not owner and not group:
                    return  # Nothing to change

                # Get current file ownership
                stat_info = file_path.stat()
                uid = stat_info.st_uid
                gid = stat_info.st_gid

                # Resolve owner username to UID
                if owner:
                    try:
                        uid = pwd.getpwnam(owner).pw_uid
                    except KeyError:
                        self.log(f"Warning: User '{owner}' not found, skipping ownership change", "warning")
                        return

                # Resolve group name to GID
                if group:
                    try:
                        gid = grp.getgrnam(group).gr_gid
                    except KeyError:
                        self.log(f"Warning: Group '{group}' not found, skipping ownership change", "warning")
                        return

                # Change ownership
                os.chown(file_path, uid, gid)
                self.log(f"Changed ownership: {file_path.name} → {owner}:{group}", "debug")

        except PermissionError:
            self.log(f"Permission denied changing ownership of {file_path.name} (run as root/sudo)", "warning")
        except Exception as e:
            self.log(f"Failed to apply ownership to {file_path.name}: {e}", "debug")

    def _apply_ownership_to_path(self, dir_path: Path) -> None:
        """Apply configured file ownership to a directory and all parent directories

        Args:
            dir_path: Path to directory to change ownership (and parents)
        """
        if not self.unified_db or not dir_path.exists():
            return

        try:
            # Get ownership settings from database
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
                result = cursor.fetchone()

                if not result:
                    return  # No ownership configured

                import json
                settings = json.loads(result[0])

                # Check if ownership is enabled
                if not settings.get('enabled', False):
                    return

                owner = settings.get('owner', '').strip()
                group = settings.get('group', '').strip()

                if not owner and not group:
                    return  # Nothing to change

                # Resolve owner username to UID
                uid = -1  # -1 means don't change
                gid = -1  # -1 means don't change

                if owner:
                    try:
                        uid = pwd.getpwnam(owner).pw_uid
                    except KeyError:
                        self.log(f"Warning: User '{owner}' not found", "warning")
                        return

                if group:
                    try:
                        gid = grp.getgrnam(group).gr_gid
                    except KeyError:
                        self.log(f"Warning: Group '{group}' not found", "warning")
                        return

                # Apply ownership to this directory and all parents up to /opt/immich/md
                base_path = Path("/opt/immich/md")
                current_path = dir_path

                while current_path != current_path.parent:  # Stop at filesystem root
                    # Stop if we've gone above the base path
                    if not current_path.is_relative_to(base_path):
                        break

                    # Change ownership of this directory
                    try:
                        os.chown(current_path, uid, gid)
                        self.log(f"Changed directory ownership: {current_path} → {owner}:{group}", "debug")
                    except PermissionError:
                        self.log(f"Permission denied changing ownership of {current_path}", "debug")
                        break
                    except Exception as e:
                        self.log(f"Failed to change ownership of {current_path}: {e}", "debug")
                        break

                    # Move to parent directory
                    current_path = current_path.parent

        except Exception as e:
            self.log(f"Failed to apply ownership to directories: {e}", "debug")

    def move_file(self,
                  source: Union[str, Path],
                  destination: Union[str, Path],
                  timestamp: Optional[datetime] = None,
                  preserve_if_no_timestamp: bool = True,
                  content_type: str = None) -> bool:
        """
        Move a single file with optional timestamp setting

        Args:
            source: Source file path
            destination: Destination file path
            timestamp: Optional datetime to set on the file
            preserve_if_no_timestamp: If True and no timestamp provided, preserve original
            content_type: Optional content type for tracking

        Returns:
            True if successful, False otherwise
        """
        source = Path(source)
        destination = Path(destination)

        if not source.exists():
            self.log(f"Source file not found: {source}", "error")
            self.stats['failed'] += 1
            return False

        if destination.exists():
            self.log(f"Skipping existing file: {destination.name}", "info")
            self.stats['skipped'] += 1

            # Update database even when skipping to prevent re-downloads
            if self.unified_db and self.batch_context:
                platform = self.batch_context.get('platform')
                source_name = self.batch_context.get('source')
                if platform and source_name:
                    try:
                        # Update database with final path (using existing destination)
                        updated = self.unified_db.update_file_location_by_filename(
                            filename=source.name,
                            platform=platform,
                            source=source_name,
                            final_path=str(destination)
                        )
                        if updated:
                            self.log(f"Updated database for skipped file: {destination.name}", "debug")
                    except Exception as e:
                        self.log(f"Failed to update database for skipped file: {e}", "debug")

            return False

        # Initialize file_hash before conditional block (used later in upsert_file_inventory)
        file_hash = None

        # File hash deduplication check (checks downloads, recycle_bin, and file_inventory)
        if self.unified_db and UnifiedDatabase:
            try:
                self.activity_manager.update_status(f"Checking file hash: {source.name}")
                file_hash = UnifiedDatabase.get_file_hash(str(source))
                self.log(f"[HASH_CHECK] Calculated hash for {source.name}: {file_hash[:16] if file_hash else 'None'}...", "debug")
                if file_hash:
                    # Check if this file hash already exists anywhere (downloads, recycle_bin, file_inventory)
                    is_duplicate = self.unified_db.is_file_hash_downloaded(file_hash)
                    self.log(f"[HASH_CHECK] Hash exists in system: {is_duplicate}", "debug")

                    if is_duplicate:
                        # Get details from downloads table first (most common case)
                        existing = self.unified_db.get_download_by_file_hash(file_hash)

                        if existing:
                            existing_path = existing.get('file_path', 'unknown')
                            existing_filename = existing.get('filename', 'unknown')
                            existing_platform = existing.get('platform', 'unknown')
                            existing_source = existing.get('source', 'unknown')

                            self.log(f"[HASH_CHECK] Existing file: {existing_filename} at {existing_path}", "debug")
                            self.log(f"[HASH_CHECK] Source file: {source.name} at {source}", "debug")
                            self.log(f"[HASH_CHECK] Paths match: {str(source) == existing_path}", "debug")

                            if existing_path and str(source) != existing_path:
                                # Check if existing file actually exists at that path
                                existing_file_exists = Path(existing_path).exists() if existing_path != 'unknown' else False
                                self.log(f"[HASH_CHECK] Existing file exists on disk: {existing_file_exists}", "debug")

                                if existing_file_exists:
                                    # Duplicate found - keep first file, delete new
                                    self.log(
                                        f"Skipping duplicate file (same content): {source.name} "
                                        f"[Already exists: {existing_filename} from {existing_platform}/{existing_source} at {existing_path}]",
                                        "warning"
                                    )
                                    self.stats['duplicates'] += 1

                                    # Delete the duplicate source file to save space
                                    try:
                                        source.unlink()
                                        self.log(f"Deleted duplicate: {source.name}", "debug")
                                    except Exception as e:
                                        self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")

                                    return False
                                else:
                                    # Existing file path in DB but file doesn't exist - allow move and update DB
                                    self.log(f"[HASH_CHECK] Existing record found but file missing at {existing_path}, allowing move to proceed", "debug")
                            else:
                                self.log(f"[HASH_CHECK] Source and existing paths match or no existing path, allowing move", "debug")
                        else:
                            # Hash exists in recycle_bin or file_inventory but not downloads
                            self.log(f"Skipping duplicate file: {source.name} [Hash exists in recycle bin or review queue]", "warning")
                            self.stats['duplicates'] += 1

                            # Delete the duplicate source file
                            try:
                                source.unlink()
                                self.log(f"Deleted duplicate: {source.name}", "debug")
                            except Exception as e:
                                self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")

                            return False
            except Exception as e:
                # Don't fail the move if hash check fails, just log and continue
                self.log(f"File hash check failed for {source.name}: {e}", "debug")

        # Instagram perceptual duplicate detection (visually similar content with overlays)
        if self.batch_context:
            platform = self.batch_context.get('platform', '')
            source_name = self.batch_context.get('source', '')
            content_type = self.batch_context.get('content_type', '')

            # Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
            platform_lower = platform.lower()
            is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])

            if is_instagram:
                self.activity_manager.update_status(f"Checking perceptual hash: {source.name}")
                perceptual_result = self._check_perceptual_duplicate(str(source), platform, source_name, content_type)
                if perceptual_result == "skip":
                    # This file is a lower quality perceptual duplicate - skip it
                    self.log(f"Skipping perceptual duplicate (has overlays or lower quality): {source.name}", "info")
                    return False
                # If perceptual_result is None or file_path, continue processing

        # Track if file is being moved to review queue
        moved_to_review = False
        original_intended_path = str(destination)
        queued_for_repost_check = False  # Track if we added this file to repost queue

        # Instagram story repost detection (only if enabled in settings)
        if self._is_instagram_story(source) and self.batch_context:
            source_username = self.batch_context.get('source', '')
            platform = self.batch_context.get('platform', '')

            # Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
            platform_lower = platform.lower()
            is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])

            if is_instagram:
                self.activity_manager.update_status(f"Checking repost detection: {source.name}")
                result = self._check_repost_and_replace(str(source), source_username, str(destination))
                if result == "queued":
                    queued_for_repost_check = True
                elif result:
                    source = Path(result)
                    self.log(f"Replaced repost with original: {source.name}", "info")

                    # Update batch_context to reflect the ORIGINAL source
                    # This ensures database records it correctly as from the original user
                    replacement_filename = source.name.lower()

                    # Extract original username from filename (e.g., globalgiftfoundation_20251109_...)
                    import re
                    match = re.match(r'^([a-z0-9._]+)_\d{8}', replacement_filename)
                    if match:
                        original_source = match.group(1)
                        self.batch_context['source'] = original_source

                        # Update content_type based on filename
                        if 'story' in replacement_filename:
                            self.batch_context['content_type'] = 'story'
                        elif 'post' in replacement_filename:
                            self.batch_context['content_type'] = 'post'
                        elif 'reel' in replacement_filename:
                            self.batch_context['content_type'] = 'reel'

                        self.log(f"Updated batch_context: source={original_source}", "debug")

        # Face recognition check
        if self.face_module:
            # Check if video face recognition is enabled
            video_settings = self._get_video_face_recognition_settings()

            # Process videos with multi-frame face recognition (if enabled)
            if self._is_video_file(source):
                if video_settings['enable_video_recognition']:
                    try:
                        # Get source username from batch context for source-based tolerance
                        source_username = self.batch_context.get('source') if self.batch_context else None
                        tolerance = self._get_face_recognition_tolerance(is_video=True, source=source_username)
                        frame_positions = video_settings['frame_positions']

                        self.log(f"Checking video with {len(frame_positions)} frames (tolerance: {tolerance}): {source.name}", "info")
                        self.activity_manager.update_status(f"Checking facial recognition: {source.name}")

                        # Use multi-frame checking for videos
                        result = self.face_module.check_video_multiframe(
                            str(source),
                            tolerance=tolerance,
                            positions=frame_positions
                        )

                        # Store result for event emission
                        self._last_face_result = result

                        # Log face recognition result to database
                        # Use best_candidate for matched_person when no match found (to show who was closest)
                        if self.unified_db:
                            try:
                                self.unified_db.log_face_recognition_scan(
                                    file_path=str(destination),
                                    has_match=result['has_match'],
                                    matched_person=result.get('person_name') or result.get('best_candidate'),
                                    confidence=result.get('confidence'),
                                    face_count=result.get('face_count', 0),
                                    scan_type='auto'
                                )
                            except Exception as log_err:
                                self.log(f"Failed to log face recognition result: {log_err}", "debug")

                        if not result['has_match']:
                            # No face match - move to review queue
                            review_path = self._get_review_path()
                            base_path = Path("/opt/immich/md")
                            if destination.is_relative_to(base_path):
                                relative_path = destination.relative_to(base_path)
                                review_dest = Path(review_path) / relative_path
                            else:
                                review_dest = Path(review_path) / source.name

                            best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
                            self.log(f"No face match in video {source.name} (checked {result['frames_checked']} frames){best_info} - moving to review queue", "info")
                            destination = review_dest
                            moved_to_review = True
                            self.stats['review_queue'] += 1
                        else:
                            # Face matched - continue to original destination
                            self.log(f"Face match in video: {result['person_name']} ({result['confidence']:.1%}, frame {result['best_frame_index']+1}/{result['frames_checked']}) - proceeding to final destination", "info")

                        # Track that face recognition ran (batch loop handles pacing)
                        self._last_move_had_face_recognition = True

                    except Exception as e:
                        self.log(f"Video face recognition failed for {source.name}: {e} - moving to review queue", "warning")
                        # Re-check file existence - if file was deleted, skip the move
                        if not source.exists():
                            self.log(f"Source file no longer exists after video face check failure: {source.name}", "warning")
                            self.stats['failed'] += 1
                            return False
                        # On error, move to review queue as safety measure
                        review_path = self._get_review_path()
                        base_path = Path("/opt/immich/md")
                        if destination.is_relative_to(base_path):
                            relative_path = destination.relative_to(base_path)
                            review_dest = Path(review_path) / relative_path
                        else:
                            review_dest = Path(review_path) / source.name
                        destination = review_dest
                        moved_to_review = True
                        self.stats['review_queue'] += 1
                else:
                    # Video face recognition disabled - skip and move to review queue
                    review_path = self._get_review_path()
                    base_path = Path("/opt/immich/md")
                    if destination.is_relative_to(base_path):
                        relative_path = destination.relative_to(base_path)
                        review_dest = Path(review_path) / relative_path
                    else:
                        review_dest = Path(review_path) / source.name

                    self.log(f"Video face recognition disabled - moving {source.name} to review queue", "debug")
                    destination = review_dest
                    moved_to_review = True
                    self.stats['review_queue'] += 1

            # Process images with face recognition
            elif self._is_image_file(source):
                try:
                    is_video = False  # Only processing images now
                    # Get source username from batch context for source-based tolerance
                    source_username = self.batch_context.get('source') if self.batch_context else None
                    tolerance = self._get_face_recognition_tolerance(is_video=False, source=source_username)
                    self.log(f"Checking image (tolerance: {tolerance}): {source.name}", "debug")
                    result = self.face_module.check_image(str(source), tolerance=tolerance, is_video=is_video)

                    # Store result for event emission
                    self._last_face_result = result

                    # Log face recognition result to database
                    # Use best_candidate for matched_person when no match found (to show who was closest)
                    if self.unified_db:
                        try:
                            self.unified_db.log_face_recognition_scan(
                                file_path=str(destination),  # Use final destination path
                                has_match=result['has_match'],
                                matched_person=result.get('person_name') or result.get('best_candidate'),
                                confidence=result.get('confidence'),
                                face_count=result.get('face_count', 0),
                                scan_type='auto'
                            )
                        except Exception as log_err:
                            self.log(f"Failed to log face recognition result: {log_err}", "debug")

                    if not result['has_match']:
                        # No face match - move to review queue instead
                        # Maintain folder structure in review queue
                        review_path = self._get_review_path()
                        base_path = Path("/opt/immich/md")
                        if destination.is_relative_to(base_path):
                            # Get relative path from base
                            relative_path = destination.relative_to(base_path)
                            # Recreate under review directory
                            review_dest = Path(review_path) / relative_path
                        else:
                            # Fallback to flat structure if not under base path
                            review_dest = Path(review_path) / source.name

                        file_type = "video" if is_video else "image"
                        best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
                        self.log(f"No face match for {file_type} {source.name}{best_info} - moving to review queue at {review_dest}", "info")

                        # Update destination to review path
                        destination = review_dest
                        moved_to_review = True
                        self.stats['review_queue'] += 1
                    else:
                        # Face matched - continue to original destination
                        file_type = "video" if is_video else "image"
                        self.log(f"Face match in {file_type}: {result['person_name']} ({result['confidence']:.1%}) - proceeding to final destination", "debug")
                except Exception as e:
                    # Don't fail the move if face check fails, just log and continue
                    self.log(f"Face recognition check failed for {source.name}: {e}", "debug")
                    # Re-check file existence - if file was deleted, skip the move
                    if not source.exists():
                        self.log(f"Source file no longer exists after face check failure: {source.name}", "warning")
                        self.stats['failed'] += 1
                        return False
                finally:
                    # Track that face recognition ran
                    self._last_move_had_face_recognition = True

        try:
            # Ensure destination directory exists
            destination.parent.mkdir(parents=True, exist_ok=True)

            # Apply ownership to created directories
            self._apply_ownership_to_path(destination.parent)

            if preserve_if_no_timestamp and not timestamp:
                # Try to extract EXIF date from source file first
                # This fixes issues where filesystem mtime is wrong but EXIF has correct date
                exif_date = None
                if self._is_image_file(source):
                    exif_date = _extract_exif_date(source)
                    if exif_date:
                        self.log(f"Extracted EXIF date for {source.name}: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")

                # Copy file with metadata preserved (single pass)
                shutil.copy2(str(source), str(destination))

                if exif_date:
                    # Override timestamps with EXIF date (more accurate than filesystem)
                    if DATE_UTILS_AVAILABLE:
                        DateHandler.update_file_timestamps(destination, exif_date)
                        self.log(f"Set timestamps from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                    else:
                        timestamp_unix = exif_date.timestamp()
                        os.utime(destination, (timestamp_unix, timestamp_unix))
                        self.log(f"Set filesystem timestamp from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
            else:
                # Copy file content
                shutil.copy(str(source), str(destination))

                # Set timestamp if provided - use DateHandler for comprehensive update
                # This sets EXIF metadata (including MetadataDate for Immich) AND filesystem times
                if timestamp:
                    if DATE_UTILS_AVAILABLE:
                        # Use centralized date handler for EXIF + filesystem timestamps
                        DateHandler.update_file_timestamps(destination, timestamp)
                        self.log(f"Set all timestamps to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                    else:
                        # Fallback to filesystem-only timestamps
                        timestamp_unix = timestamp.timestamp()
                        os.utime(destination, (timestamp_unix, timestamp_unix))
                        self.log(f"Set filesystem timestamp to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")

            self.log(f"Moved: {source.name} → {destination.name}", "info")
            self.stats['moved'] += 1
            self.activity_manager.update_status(f"Moving images: {source.name}")

            # Pre-generate thumbnail in background (non-blocking)
            self._bg_executor.submit(self._generate_thumbnail_cache, destination, file_hash)

            # Apply file ownership if configured
            self._apply_ownership(destination)

            # Update database with final file location and hash
            if self.unified_db and self.batch_context:
                platform = self.batch_context.get('platform')
                source_name = self.batch_context.get('source')
                content_type_ctx = self.batch_context.get('content_type')

                # For tagged content, extract actual poster from filename (e.g., "rtlliving_20251124_..." -> "rtlliving")
                if content_type_ctx == 'tagged' and source.name:
                    import re
                    # Use date pattern to correctly extract usernames that may contain underscores
                    # Pattern: username_YYYYMMDD_...
                    date_pattern = re.match(r'^(.+?)_(\d{8})_', source.name)
                    if date_pattern:
                        extracted_source = date_pattern.group(1).lower()
                        # Validate: Instagram usernames are 1-30 chars, alphanumeric + underscore + period
                        if extracted_source and re.match(r'^[a-z0-9_.]{1,30}$', extracted_source):
                            if extracted_source != source_name:
                                self.log(f"Tagged content: using poster @{extracted_source} instead of @{source_name}", "debug")
                                source_name = extracted_source
                        else:
                            self.log(f"Tagged content: extracted '{extracted_source}' doesn't look like valid username, keeping @{source_name}", "debug")

                if platform and source_name:
                    try:
                        # Update database with final path and hash
                        updated = self.unified_db.update_file_location_by_filename(
                            filename=source.name,
                            platform=platform,
                            source=source_name,
                            final_path=str(destination)
                        )

                        # Batch all path updates in a single transaction
                        with self.unified_db.get_connection(for_write=True) as conn:
                            cursor = conn.cursor()

                            # If moved to review queue, add intended_path to metadata
                            if updated and moved_to_review:
                                import json
                                cursor.execute('''
                                    SELECT metadata FROM downloads
                                    WHERE filename = ? AND platform = ? AND source = ?
                                ''', (source.name, platform, source_name))
                                row = cursor.fetchone()

                                if row:
                                    metadata = json.loads(row['metadata']) if row['metadata'] else {}
                                    metadata['intended_path'] = original_intended_path
                                    cursor.execute('''
                                        UPDATE downloads
                                        SET metadata = ?
                                        WHERE filename = ? AND platform = ? AND source = ?
                                    ''', (json.dumps(metadata), source.name, platform, source_name))
                                    self.log(f"Saved intended destination to metadata: {original_intended_path}", "debug")

                            # Update perceptual hash path
                            try:
                                cursor.execute('''
                                    UPDATE instagram_perceptual_hashes
                                    SET file_path = ?
                                    WHERE filename = ? AND platform = ? AND source = ?
                                ''', (str(destination), source.name, platform, source_name))
                                if cursor.rowcount > 0:
                                    self.log(f"Updated perceptual hash path: {destination}", "debug")
                            except Exception as e:
                                self.log(f"Failed to update perceptual hash path: {e}", "debug")

                            # Update face recognition scans path
                            try:
                                cursor.execute('''
                                    UPDATE face_recognition_scans
                                    SET file_path = ?
                                    WHERE file_path = ?
                                ''', (str(destination), original_intended_path))
                                if cursor.rowcount > 0:
                                    self.log(f"Updated face recognition scan path: {original_intended_path} -> {destination}", "debug")
                            except Exception as e:
                                self.log(f"Failed to update face recognition scan path: {e}", "debug")

                        if updated:
                            self.log(f"Updated database with final location: {destination}", "debug")
                        else:
                            self.log(f"No database record found to update for {source.name}", "debug")

                    except Exception as e:
                        self.log(f"Failed to update database location: {e}", "debug")

                    # Record in file_inventory in background (dimensions + discovery queue)
                    # This avoids blocking the move pipeline on ffprobe/PIL calls
                    self._bg_executor.submit(
                        self._record_file_inventory_bg,
                        destination, source.name, platform, source_name,
                        moved_to_review, file_hash, timestamp
                    )

            # Track for batch notification if in batch mode
            # Auto-detect content type from path if not provided
            if not content_type and self.batch_context:
                # Prefer the batch-level content_type (set by caller who knows what's being downloaded)
                # This prevents path-based inference from overriding e.g. 'media' with 'post'
                # just because the temp dir contains "posts/"
                batch_ct = self.batch_context.get('content_type')
                if batch_ct:
                    # Normalize plural forms to singular for proper notification grammar
                    _SINGULAR = {'posts': 'post', 'stories': 'story', 'reels': 'reel',
                                 'videos': 'video', 'images': 'image', 'items': 'item'}
                    content_type = _SINGULAR.get(batch_ct, batch_ct)
                else:
                    # No batch content_type set, try to infer from source path
                    path_str = str(source).lower()
                    if 'story' in path_str or 'stories' in path_str:
                        content_type = 'story'
                    elif 'reel' in path_str:
                        content_type = 'reel'
                    elif 'post' in path_str:
                        content_type = 'post'
                    elif 'video' in path_str:
                        content_type = 'video'
                    elif 'image' in path_str or 'photo' in path_str:
                        content_type = 'image'

            # Track with full path for image attachment in notifications
            self.track_moved_file(str(destination), content_type=content_type, is_review=moved_to_review)

            # Update repost queue with actual final destination (in case it changed due to review queue)
            if queued_for_repost_check and self.repost_queue:
                # Find the most recent queue entry for this file and update with actual final destination
                for entry in reversed(self.repost_queue):
                    if Path(entry['file_path']).name == destination.name:
                        # Update with actual final path (might be review queue)
                        entry['file_path'] = str(destination)
                        self.log(f"Updated repost queue entry with final destination: {destination.name}", "debug")
                        break

            # Emit WebSocket event for real-time scraping monitor
            if self.event_emitter and self.current_session:
                try:
                    # Determine media type
                    media_type = 'video' if self._is_video_file(destination) else 'image'

                    # Determine destination type (media/review/recycle)
                    dest_str = str(destination).lower()
                    if moved_to_review or '/review' in dest_str:
                        destination_type = 'review'
                    elif '/recycle' in dest_str:
                        destination_type = 'recycle'
                    else:
                        destination_type = 'media'

                    # Generate thumbnail URL
                    import urllib.parse
                    thumbnail_url = f"/api/files/thumbnail?path={urllib.parse.quote(str(destination))}"

                    # Get face match info if available
                    face_match = {'matched': False}
                    if hasattr(self, '_last_face_result') and self._last_face_result:
                        if self._last_face_result.get('has_match'):
                            face_match = {
                                'matched': True,
                                'person_name': self._last_face_result.get('person_name'),
                                'confidence': self._last_face_result.get('confidence')
                            }

                    # Emit file_moved event
                    self.event_emitter.emit_file_moved(
                        session_id=self.current_session.get('session_id', 'unknown'),
                        platform=self.current_session.get('platform', 'unknown'),
                        account=self.current_session.get('account', 'unknown'),
                        filename=destination.name,
                        media_type=media_type,
                        destination_type=destination_type,
                        destination_path=str(destination),
                        thumbnail_url=thumbnail_url,
                        face_match=face_match
                    )
                except Exception as emit_err:
                    self.log(f"Failed to emit file_moved event: {emit_err}", "debug")

            return True

        except Exception as e:
            self.log(f"Failed to move {source.name}: {e}", "error")
            self.stats['failed'] += 1
            return False

    def move_files_batch(self,
                        source_dir: Union[str, Path],
                        dest_dir: Union[str, Path],
                        file_timestamps: Dict[str, Optional[datetime]] = None,
                        extensions: List[str] = None,
                        preserve_if_no_timestamp: bool = True) -> Dict:
        """
        Move multiple files from source to destination with timestamp management

        Args:
            source_dir: Source directory
            dest_dir: Destination directory
            file_timestamps: Dict mapping filename to datetime (or None to preserve)
            extensions: List of file extensions to move (e.g., ['.jpg', '.mp4'])
            preserve_if_no_timestamp: If True, preserve timestamp for files not in dict

        Returns:
            Statistics dictionary
        """
        source_dir = Path(source_dir)
        dest_dir = Path(dest_dir)
        file_timestamps = file_timestamps or {}

        # Skip if source and destination are the same
        if source_dir.resolve() == dest_dir.resolve():
            self.log("Source and destination are the same, skipping move", "info")
            return {"moved": 0, "failed": 0, "errors": []}

        if not source_dir.exists():
            self.log(f"Source directory not found: {source_dir}", "error")
            return self.stats

        # Find all files to move
        files_to_move = []
        for pattern in ['*'] if not extensions else [f'*{ext}' for ext in extensions]:
            files_to_move.extend(source_dir.rglob(pattern))

        # Filter to only files (not directories)
        files_to_move = [f for f in files_to_move if f.is_file()]

        # Filter by extension if specified
        if extensions:
            files_to_move = [f for f in files_to_move if f.suffix.lower() in extensions]

        self.log(f"Found {len(files_to_move)} files to move", "info")
        total_files = len(files_to_move)

        for file_idx, source_file in enumerate(files_to_move):
            # Determine destination path
            relative_path = source_file.relative_to(source_dir)
            dest_file = dest_dir / relative_path

            # Get timestamp for this file
            timestamp = file_timestamps.get(source_file.name)

            # Update file-level progress
            self.activity_manager.update_status(
                f"Processing {source_file.name}",
                progress_current=file_idx + 1,
                progress_total=total_files
            )

            # Move the file
            self._last_move_had_face_recognition = False
            self.move_file(
                source_file,
                dest_file,
                timestamp=timestamp,
                preserve_if_no_timestamp=preserve_if_no_timestamp
            )

            # Periodic GC after face recognition batches to free ML tensors
            if self._last_move_had_face_recognition and (file_idx + 1) % 10 == 0:
                gc.collect()

        return self.stats

    def move_with_metadata(self, file_info: Dict) -> bool:
        """
        Move a file using metadata dictionary

        Args:
            file_info: Dictionary containing:
                - source: Source file path
                - destination: Destination file path
                - timestamp: Optional datetime object
                - preserve_original: If True and no timestamp, preserve original

        Returns:
            True if successful
        """
        source = file_info.get('source')
        destination = file_info.get('destination')
        timestamp = file_info.get('timestamp')
        preserve = file_info.get('preserve_original', True)

        if not source or not destination:
            self.log("Missing source or destination in file_info", "error")
            return False

        return self.move_file(source, destination, timestamp, preserve)

    def get_stats(self) -> Dict:
        """Get movement statistics"""
        return self.stats.copy()

    def reset_stats(self):
        """Reset statistics"""
        self.stats = {
            'moved': 0,
            'skipped': 0,
            'failed': 0,
            'duplicates': 0
        }

    def start_batch(self, platform: str, source: str = None, content_type: str = None, search_term: str = None):
        """
        Start a batch move operation for notifications

        Args:
            platform: Platform name (instagram, tiktok, forum, etc.)
            source: Source/username
            content_type: Type of content (post, story, reel, thread, etc.)
            search_term: Optional search term (for forum searches)
        """
        self.batch_context = {
            'platform': platform,
            'source': source,
            'content_type': content_type,
            'search_term': search_term
        }
        self.moved_files = []
        # Clear per-batch settings caches so DB changes take effect between batches
        self._face_recognition_settings_cache = None
        self._video_face_settings_cache = None
        self._review_path = None
        self.log(f"Started batch move for {platform}/{source or 'unknown'}/{content_type or 'items'}", "debug")

    def end_batch(self):
        """
        End batch move operation and send notification

        Returns:
            Number of files moved in this batch
        """
        if not self.batch_context:
            return 0

        # Wait for background tasks (thumbnails, inventory) to complete
        # before sending notifications
        self._bg_executor.shutdown(wait=True)
        from concurrent.futures import ThreadPoolExecutor
        self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')

        moved_count = len(self.moved_files)
        review_count = len(self.review_queue_files)

        platform = self.batch_context.get('platform', 'unknown')
        source = self.batch_context.get('source')
        content_type = self.batch_context.get('content_type') or 'item'  # Handle None explicitly
        search_term = self.batch_context.get('search_term')

        # Send batch notification for matched files (moved to final destination)
        if self.notifier and moved_count > 0:
            try:
                # Prepare download list for notification
                # Use individual file content types if available, otherwise fall back to batch content type
                downloads = []
                for file_info in self.moved_files:
                    downloads.append({
                        'source': source,
                        'content_type': file_info.get('content_type') or content_type,
                        'filename': file_info.get('filename'),
                        'file_path': file_info.get('file_path')  # Full path for image attachment
                    })

                # Send batch notification
                success = self.notifier.notify_batch_download(
                    platform=platform,
                    downloads=downloads,
                    search_term=search_term
                )

                if success:
                    self.log(f"Sent notification: {moved_count} {content_type}(s) from {source or platform}", "info")
                else:
                    self.log("Failed to send notification", "warning")

            except Exception as e:
                self.log(f"Failed to send batch notification: {e}", "error")

        # Send separate notification for review queue items (no face match)
        if self.notifier and review_count > 0:
            try:
                # Prepare review queue list for notification
                downloads = []
                for file_info in self.review_queue_files:
                    downloads.append({
                        'source': source,
                        'content_type': file_info.get('content_type') or content_type,
                        'filename': file_info.get('filename'),
                        'file_path': file_info.get('file_path')  # Full path for image attachment
                    })

                # Send review queue notification with lower priority
                success = self.notifier.notify_batch_download(
                    platform=platform,
                    downloads=downloads,
                    search_term=search_term,
                    is_review_queue=True  # Flag to indicate this is review queue notification
                )

                if success:
                    self.log(f"Sent review queue notification: {review_count} {content_type}(s) from {source or platform}", "info")
                else:
                    self.log("Review queue notification not sent (may be disabled in settings)", "debug")

            except Exception as e:
                self.log(f"Failed to send review queue notification: {e}", "error")

        # Process repost queue (download originals, find matches, replace)
        # This happens after all files are moved and notifications sent
        if platform in ['instagram', 'fastdl', 'imginn', 'toolzu', 'instaloader']:
            self.process_repost_queue()

        # Call download complete callback to trigger UI updates
        total_count = moved_count + review_count
        if total_count > 0 and self.on_download_complete:
            try:
                self.on_download_complete(platform, source, total_count)
            except Exception as e:
                self.log(f"Error in download complete callback: {e}", "error")

        # Clear batch context
        self.batch_context = None
        self.moved_files = []
        self.review_queue_files = []

        return moved_count + review_count

    def _is_instagram_story(self, file_path: Path) -> bool:
        """Check if file is an Instagram story based on path"""
        path_str = str(file_path).lower()
        return 'story' in path_str or 'stories' in path_str

    def _is_repost_detection_enabled(self) -> bool:
        """Check if repost detection is enabled in settings"""
        try:
            if not self.unified_db:
                return False

            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                cursor.execute("SELECT value FROM settings WHERE key = 'repost_detection'")
                result = cursor.fetchone()
                if result:
                    import json
                    settings = json.loads(result[0])
                    return settings.get('enabled', False)  # Default: DISABLED
        except (sqlite3.Error, json.JSONDecodeError, KeyError) as e:
            self.log(f"Error checking repost detection setting: {e}", "debug")
        return False  # Default: DISABLED

    def _check_repost_and_replace(self, file_path: str, source_username: str, destination_path: str = None) -> Optional[str]:
        """
        Queue file for repost detection (processed after batch completes)

        Args:
            file_path: Current temp file path (for OCR scanning)
            source_username: Username who posted this
            destination_path: Final permanent storage path (used in queue)

        Returns None (queueing only, no immediate replacement)
        """
        # Check if feature is enabled
        if not self._is_repost_detection_enabled():
            return None  # Feature disabled - skip

        try:
            from modules.instagram_repost_detector import InstagramRepostDetector

            # Initialize detector for OCR check only
            detector = InstagramRepostDetector(
                unified_db=self.unified_db,
                log_callback=lambda msg, lvl: self.log(msg, lvl)
            )

            # Quick OCR check to see if it's a repost (using temp file)
            original_username = detector._extract_username_from_repost(file_path)

            if original_username and original_username.lower() != source_username.lower():
                # Queue for processing after moves complete
                # IMPORTANT: Store destination_path (permanent location) NOT file_path (temp)
                path_to_queue = destination_path if destination_path else file_path

                self.repost_queue.append({
                    'file_path': path_to_queue,
                    'source_username': source_username,
                    'detected_username': original_username
                })
                self.log(f"Queued repost detection: {Path(path_to_queue).name} → @{original_username}", "info")
                return "queued"  # Signal that file was queued

            return None  # No repost detected, no immediate replacement

        except Exception as e:
            self.log(f"Repost queue check failed: {e}", "debug")
            return None

    def process_repost_queue(self):
        """Process all queued reposts (called after batch completes)"""
        if not self.repost_queue:
            return

        self.log(f"Processing {len(self.repost_queue)} queued reposts...", "info")

        try:
            from modules.instagram_repost_detector import InstagramRepostDetector

            detector = InstagramRepostDetector(
                unified_db=self.unified_db,
                log_callback=lambda msg, lvl: self.log(msg, lvl)
            )

            # Group by detected username to avoid re-downloading same user
            users_to_process = {}
            for item in self.repost_queue:
                username = item['detected_username']
                if username not in users_to_process:
                    users_to_process[username] = []
                users_to_process[username].append(item)

            # Process each user's reposts
            processed = 0
            replaced = 0

            for username, items in users_to_process.items():
                self.log(f"Processing {len(items)} repost(s) from @{username}", "info")

                for item in items:
                    try:
                        # Process the repost (download if needed, find match, replace)
                        replacement = detector.check_and_replace_repost(
                            item['file_path'],
                            item['source_username']
                        )

                        if replacement:
                            self.log(f"✓ Replaced repost with original from @{username}", "success")
                            replaced += 1

                        processed += 1

                    except Exception as e:
                        self.log(f"Failed to process repost for @{username}: {e}", "error")

            self.log(f"Repost queue processed: {processed} checked, {replaced} replaced", "info")

        except Exception as e:
            self.log(f"Repost queue processing failed: {e}", "error")

        finally:
            # Clear the queue
            self.repost_queue = []

    def _check_perceptual_duplicate(self, file_path: str, platform: str, source: str, content_type: str = None) -> Optional[str]:
        """
        Check if file is a perceptual duplicate (visually similar with overlays) for Instagram

        Returns:
            - None if not a duplicate or feature disabled
            - "skip" if this file should be skipped (lower quality duplicate)
            - file_path if this file should be kept
        """
        try:
            from modules.instagram_perceptual_duplicate_detector import InstagramPerceptualDuplicateDetector

            # Initialize detector (uses its own universal logger now)
            detector = InstagramPerceptualDuplicateDetector(
                unified_db=self.unified_db
            )

            # Run detection
            result = detector.check_and_handle_duplicate(file_path, platform, source, content_type)

            return result

        except Exception as e:
            self.log(f"Perceptual duplicate detection failed: {e}", "error")
            import traceback
            self.log(traceback.format_exc(), "error")
            return None

    def track_moved_file(self, file_path: str, metadata: Dict[str, Any] = None, content_type: str = None, is_review: bool = False):
        """
        Track a moved file for batch notification

        Args:
            file_path: Full path to the moved file (for image attachment)
            metadata: Optional metadata dictionary
            content_type: Optional specific content type for this file (overrides batch default)
            is_review: True if file was moved to review queue (no face match)
        """
        if self.batch_context:
            file_info = {
                'file_path': file_path,
                'filename': Path(file_path).name,  # Extract filename for logging
                'metadata': metadata or {},
                'content_type': content_type  # Track individual file's content type
            }

            if is_review:
                self.review_queue_files.append(file_info)
            else:
                self.moved_files.append(file_info)


def move_files_simple(source_dir: Union[str, Path],
                     dest_dir: Union[str, Path],
                     extensions: List[str] = None,
                     file_timestamps: Dict[str, datetime] = None,
                     log_callback=None) -> Dict:
    """
    Simple function interface for moving files

    Args:
        source_dir: Source directory
        dest_dir: Destination directory
        extensions: List of file extensions to move
        file_timestamps: Optional dict mapping filenames to timestamps
        log_callback: Optional logging callback

    Returns:
        Statistics dict
    """
    manager = MoveManager(log_callback=log_callback)
    return manager.move_files_batch(
        source_dir,
        dest_dir,
        file_timestamps=file_timestamps,
        extensions=extensions
    )


if __name__ == "__main__":
    # Test the module
    import tempfile

    print("Testing MoveManager...")

    # Create test environment
    with tempfile.TemporaryDirectory() as tmpdir:
        test_dir = Path(tmpdir)
        src_dir = test_dir / "source"
        dst_dir = test_dir / "destination"
        src_dir.mkdir()

        # Create test files
        test_file1 = src_dir / "test1.jpg"
        test_file2 = src_dir / "test2.mp4"
        test_file1.write_text("content1")
        test_file2.write_text("content2")

        # Test moving with timestamps
        manager = MoveManager()

        # Move with specific timestamp
        specific_time = datetime(2025, 8, 26, 17, 2, 24)
        manager.move_file(test_file1, dst_dir / "test1.jpg", timestamp=specific_time)

        # Move preserving original
        manager.move_file(test_file2, dst_dir / "test2.mp4", timestamp=None)

        # Check results
        dst_file1 = dst_dir / "test1.jpg"
        if dst_file1.exists():
            mtime = datetime.fromtimestamp(os.stat(dst_file1).st_mtime)
            print(f"✅ File 1 moved with timestamp: {mtime}")

        print(f"Stats: {manager.get_stats()}")