media-downloader/modules/reddit_community_monitor.py

#!/usr/bin/env python3
"""
Reddit Community Monitor Module

Monitors specified Reddit communities (subreddits) for new posts,
downloads all media (including imgur/redgifs attachments via gallery-dl),
and automatically creates private gallery posts for mapped persons.

Design:
- Each community (subreddit) is mapped to a person in the private gallery
- Uses gallery-dl for downloading with full Reddit/imgur/redgifs support
- All new posts are tagged with a "reddit" tag
- Configurable check intervals and lookback periods
- Supports cookie authentication for age-gated content
"""

import asyncio
import hashlib
import json
import mimetypes
import os
import re
import sqlite3
import subprocess
import tempfile
import uuid
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Any, Set

from modules.universal_logger import get_logger

logger = get_logger('RedditMonitor')

# Key file path for background crypto access
REDDIT_MONITOR_KEY_FILE = '/opt/immich/private/.reddit_monitor_key'


class RedditCommunityMonitor:
    """
    Background monitor for Reddit communities.
    Downloads media from subreddits and imports to private gallery.
    """

    def __init__(self, db_path: str, activity_manager=None):
        self.db_path = db_path
        self.activity_manager = activity_manager
        self.gallery_dl_path = '/opt/media-downloader/venv/bin/gallery-dl'

    def _get_connection(self) -> sqlite3.Connection:
        """Get a database connection with row factory."""
        conn = sqlite3.connect(self.db_path)
        conn.row_factory = sqlite3.Row
        conn.execute("PRAGMA foreign_keys = ON")
        return conn

    # =========================================================================
    # SETTINGS METHODS
    # =========================================================================

    def get_settings(self) -> Dict:
        """Get Reddit monitor settings from private_media_config."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT key, value FROM private_media_config WHERE key LIKE 'reddit_monitor_%'"
            )
            rows = cursor.fetchall()
            settings = {}
            for row in rows:
                key = row['key'].replace('reddit_monitor_', '')
                value = row['value']
                if value == 'true':
                    value = True
                elif value == 'false':
                    value = False
                elif value and value.isdigit():
                    value = int(value)
                settings[key] = value

            # Defaults
            return {
                'enabled': settings.get('enabled', False),
                'check_interval_hours': settings.get('check_interval_hours', 4),
                'lookback_days': settings.get('lookback_days', 3),
                'last_checked': settings.get('last_checked', None),
            }
        finally:
            conn.close()

    def update_settings(self, **kwargs) -> bool:
        """Update Reddit monitor settings."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            for key, value in kwargs.items():
                db_key = f'reddit_monitor_{key}'
                if isinstance(value, bool):
                    db_value = 'true' if value else 'false'
                else:
                    db_value = str(value) if value is not None else ''
                cursor.execute('''
                    INSERT OR REPLACE INTO private_media_config (key, value, updated_at)
                    VALUES (?, ?, CURRENT_TIMESTAMP)
                ''', (db_key, db_value))
            conn.commit()
            return True
        except Exception as e:
            logger.error(f"Failed to update settings: {e}")
            return False
        finally:
            conn.close()

    # =========================================================================
    # COOKIE MANAGEMENT
    # =========================================================================

    def save_cookies(self, crypto, cookies_json: str) -> bool:
        """Save cookies encrypted in the config table."""
        conn = self._get_connection()
        try:
            encrypted = crypto.encrypt_field(cookies_json)
            cursor = conn.cursor()
            cursor.execute('''
                INSERT OR REPLACE INTO private_media_config (key, value, updated_at)
                VALUES ('reddit_monitor_encrypted_cookies', ?, CURRENT_TIMESTAMP)
            ''', (encrypted,))
            conn.commit()
            logger.info("Reddit cookies saved (encrypted)")
            return True
        except Exception as e:
            logger.error(f"Failed to save cookies: {e}")
            return False
        finally:
            conn.close()

    def has_cookies(self, crypto) -> bool:
        """Check if encrypted cookies exist and are valid."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT value FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'"
            )
            row = cursor.fetchone()
            if not row or not row['value']:
                return False
            # Try to decrypt to verify they're valid
            try:
                decrypted = crypto.decrypt_field(row['value'])
                return bool(decrypted and decrypted.strip())
            except Exception:
                return False
        finally:
            conn.close()

    def delete_cookies(self) -> bool:
        """Delete stored cookies."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "DELETE FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'"
            )
            conn.commit()
            logger.info("Reddit cookies deleted")
            return True
        except Exception as e:
            logger.error(f"Failed to delete cookies: {e}")
            return False
        finally:
            conn.close()

    def _get_cookies_json(self, crypto) -> Optional[str]:
        """Load and decrypt stored cookies JSON."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT value FROM private_media_config WHERE key = 'reddit_monitor_encrypted_cookies'"
            )
            row = cursor.fetchone()
            if not row or not row['value']:
                return None
            try:
                return crypto.decrypt_field(row['value'])
            except Exception as e:
                logger.error(f"Failed to decrypt cookies: {e}")
                return None
        finally:
            conn.close()

    def _write_netscape_cookie_file(self, cookies_json: str, output_path: str) -> bool:
        """Convert JSON cookies array to Netscape cookie file format for gallery-dl."""
        try:
            cookies = json.loads(cookies_json)
            if not isinstance(cookies, list):
                logger.error("Cookies is not a JSON array")
                return False

            with open(output_path, 'w') as f:
                f.write("# Netscape HTTP Cookie File\n")
                f.write("# https://curl.haxx.se/docs/http-cookies.html\n\n")
                for cookie in cookies:
                    domain = cookie.get('domain', '')
                    # Ensure domain starts with . for domain-wide cookies
                    include_subdomains = 'TRUE' if domain.startswith('.') else 'FALSE'
                    path = cookie.get('path', '/')
                    secure = 'TRUE' if cookie.get('secure', False) else 'FALSE'
                    # Convert expiry - use 0 for session cookies
                    expires = cookie.get('expirationDate', cookie.get('expiry', cookie.get('expires', 0)))
                    if expires is None:
                        expires = 0
                    expires = str(int(float(expires)))
                    name = cookie.get('name', '')
                    value = cookie.get('value', '')
                    f.write(f"{domain}\t{include_subdomains}\t{path}\t{secure}\t{expires}\t{name}\t{value}\n")

            return True
        except Exception as e:
            logger.error(f"Failed to write Netscape cookie file: {e}")
            return False

    # =========================================================================
    # COMMUNITY MAPPING METHODS
    # =========================================================================

    def get_all_communities(self) -> List[Dict]:
        """Get all community mappings with person info and live media count."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT c.*, p.encrypted_name as person_encrypted_name,
                       r.encrypted_name as relationship_encrypted_name,
                       r.color as relationship_color,
                       (SELECT COUNT(*) FROM private_media m
                        WHERE m.person_id = c.person_id AND m.source_type = 'reddit') as actual_media_count
                FROM private_media_reddit_communities c
                LEFT JOIN private_media_persons p ON c.person_id = p.id
                LEFT JOIN private_media_relationships r ON p.relationship_id = r.id
                ORDER BY c.subreddit_name
            ''')
            communities = []
            for row in cursor.fetchall():
                d = dict(row)
                d['total_media_found'] = d.pop('actual_media_count', 0)
                communities.append(d)
            return communities
        finally:
            conn.close()

    def get_community(self, community_id: int) -> Optional[Dict]:
        """Get a single community mapping."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT c.*, p.encrypted_name as person_encrypted_name,
                       r.encrypted_name as relationship_encrypted_name,
                       r.color as relationship_color
                FROM private_media_reddit_communities c
                LEFT JOIN private_media_persons p ON c.person_id = p.id
                LEFT JOIN private_media_relationships r ON p.relationship_id = r.id
                WHERE c.id = ?
            ''', (community_id,))
            row = cursor.fetchone()
            return dict(row) if row else None
        finally:
            conn.close()

    def add_community(self, subreddit_name: str, person_id: int) -> int:
        """Add a new community mapping. Returns the new ID."""
        # Strip r/ prefix if present
        subreddit_name = re.sub(r'^r/', '', subreddit_name.strip())
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                INSERT INTO private_media_reddit_communities (subreddit_name, person_id)
                VALUES (?, ?)
            ''', (subreddit_name, person_id))
            conn.commit()
            return cursor.lastrowid
        finally:
            conn.close()

    def update_community(self, community_id: int, **kwargs) -> bool:
        """Update a community mapping."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            updates = []
            values = []
            for key, value in kwargs.items():
                if key == 'subreddit_name' and value is not None:
                    value = re.sub(r'^r/', '', value.strip())
                if key == 'enabled':
                    value = 1 if value else 0
                updates.append(f'{key} = ?')
                values.append(value)
            if not updates:
                return False
            updates.append("updated_at = CURRENT_TIMESTAMP")
            values.append(community_id)
            cursor.execute(
                f"UPDATE private_media_reddit_communities SET {', '.join(updates)} WHERE id = ?",
                values
            )
            conn.commit()
            return cursor.rowcount > 0
        except Exception as e:
            logger.error(f"Failed to update community {community_id}: {e}")
            return False
        finally:
            conn.close()

    def delete_community(self, community_id: int) -> bool:
        """Delete a community mapping."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "DELETE FROM private_media_reddit_communities WHERE id = ?",
                (community_id,)
            )
            conn.commit()
            return cursor.rowcount > 0
        finally:
            conn.close()

    def get_communities_for_person(self, person_id: int) -> List[Dict]:
        """Get all communities mapped to a person."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT * FROM private_media_reddit_communities WHERE person_id = ?",
                (person_id,)
            )
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    def get_history(self, community_id: int) -> List[Dict]:
        """Get download history for a community."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT * FROM private_media_reddit_history
                WHERE community_id = ?
                ORDER BY processed_at DESC
                LIMIT 100
            ''', (community_id,))
            return [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

    # =========================================================================
    # CRYPTO ACCESS
    # =========================================================================

    def _get_crypto(self):
        """Load crypto from key file for background access."""
        from modules.private_gallery_crypto import load_key_from_file
        crypto = load_key_from_file(REDDIT_MONITOR_KEY_FILE)
        if crypto is None:
            logger.warning("Reddit monitor crypto unavailable - key file missing or invalid")
        return crypto

    # =========================================================================
    # DOWNLOAD METHODS
    # =========================================================================

    async def check_all_now(self, from_scheduler: bool = False) -> int:
        """
        Check all enabled communities for new posts.

        Args:
            from_scheduler: Whether this was triggered by the scheduler

        Returns:
            Total count of new media items imported
        """
        settings = self.get_settings()
        if from_scheduler and not settings.get('enabled'):
            logger.debug("Reddit monitor is disabled")
            return 0

        crypto = self._get_crypto()
        if crypto is None:
            logger.warning("Skipping Reddit check: encryption key not available")
            return 0

        # Get enabled communities
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT * FROM private_media_reddit_communities WHERE enabled = 1"
            )
            communities = [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

        if not communities:
            logger.debug("No enabled Reddit communities to check")
            return 0

        # Start background task tracking
        if self.activity_manager:
            self.activity_manager.start_background_task(
                'reddit_monitor',
                'reddit_community_monitor',
                'Reddit Community Monitor',
                'Running',
                {'total_communities': len(communities), 'media_found': 0}
            )

        total_media = 0
        affected_person_ids: Set[int] = set()
        lookback_days = settings.get('lookback_days', 3)

        # Load cookies from encrypted storage
        cookies_json = self._get_cookies_json(crypto)

        # Crash recovery checkpoint
        from modules.task_checkpoint import TaskCheckpoint
        checkpoint = TaskCheckpoint('reddit_monitor', 'background')
        checkpoint.start(total_items=len(communities))
        if checkpoint.is_recovering():
            logger.info("Reddit monitor: recovering — skipping already-checked communities")

        try:
            for idx, community in enumerate(communities):
                subreddit = community['subreddit_name']
                person_id = community['person_id']
                community_id = community['id']

                if checkpoint.is_completed(str(community_id)):
                    continue

                checkpoint.set_current(str(community_id))

                try:
                    # Use longer lookback for communities that have never imported anything
                    effective_lookback = lookback_days
                    if community.get('total_media_found', 0) == 0 and not community.get('last_checked'):
                        effective_lookback = 30
                        logger.info(f"First check for r/{subreddit}, using 30-day lookback")

                    media_count = await self._check_community(
                        community_id, subreddit, person_id,
                        effective_lookback, cookies_json, crypto,
                        community_idx=idx, total_communities=len(communities),
                        running_media_total=total_media
                    )
                    total_media += media_count

                    if media_count > 0:
                        affected_person_ids.add(person_id)
                        if self.activity_manager:
                            self.activity_manager.update_background_task(
                                'reddit_monitor',
                                f'Found {media_count} new media in r/{subreddit}',
                                idx + 1, len(communities),
                                {'total_communities': len(communities), 'media_found': total_media, 'current_community': f'r/{subreddit}', 'phase': 'found', 'last_found': media_count}
                            )
                except Exception as e:
                    logger.error(f"Error checking r/{subreddit}: {e}")
                    import traceback
                    logger.debug(f"Traceback: {traceback.format_exc()}")

                checkpoint.mark_completed(str(community_id))

            # Checkpoint complete
            checkpoint.finish()

            # Update last_checked timestamp
            self.update_settings(last_checked=datetime.now().isoformat())

            # Auto-dedup for persons that received new media
            if affected_person_ids:
                if self.activity_manager:
                    self.activity_manager.update_background_task(
                        'reddit_monitor', 'Deduplicating...',
                        len(communities), len(communities),
                        {'phase': 'deduplicating', 'media_found': total_media}
                    )
                dedup_deleted = self._run_dedup_for_persons(affected_person_ids, crypto)
                if dedup_deleted > 0:
                    logger.info(f"Reddit monitor: auto-dedup removed {dedup_deleted} duplicates")

        finally:
            if self.activity_manager:
                self.activity_manager.stop_background_task('reddit_monitor')

        if total_media > 0:
            logger.info(f"Reddit monitor: imported {total_media} new media items")
        else:
            logger.debug("Reddit monitor: no new media found")

        return total_media

    async def download_full_community(self, community_id: int) -> int:
        """
        Download all available media from a community (no date filter).

        Args:
            community_id: ID of the community to download

        Returns:
            Count of new media items imported
        """
        community = self.get_community(community_id)
        if not community:
            logger.error(f"Community {community_id} not found")
            return 0

        crypto = self._get_crypto()
        if crypto is None:
            logger.warning("Cannot download: encryption key not available")
            return 0

        cookies_json = self._get_cookies_json(crypto)

        subreddit = community['subreddit_name']
        if self.activity_manager:
            self.activity_manager.start_background_task(
                'reddit_monitor',
                'reddit_community_monitor',
                'Reddit Community Monitor',
                'Running',
                {'total_communities': 1, 'media_found': 0, 'current_community': f'r/{subreddit}', 'full_download': True}
            )

        try:
            media_count = await self._check_community(
                community_id, subreddit,
                community['person_id'], None, cookies_json, crypto,
                community_idx=0, total_communities=1, running_media_total=0
            )
            return media_count
        finally:
            if self.activity_manager:
                self.activity_manager.stop_background_task('reddit_monitor')

    async def check_single_community(self, community_id: int) -> int:
        """
        Check a single community for new posts (using lookback_days filter).

        Args:
            community_id: ID of the community to check

        Returns:
            Count of new media items imported
        """
        community = self.get_community(community_id)
        if not community:
            logger.error(f"Community {community_id} not found")
            return 0

        crypto = self._get_crypto()
        if crypto is None:
            logger.warning("Cannot check: encryption key not available")
            return 0

        settings = self.get_settings()
        lookback_days = settings.get('lookback_days', 3)
        cookies_json = self._get_cookies_json(crypto)

        subreddit = community['subreddit_name']
        if self.activity_manager:
            self.activity_manager.start_background_task(
                'reddit_monitor',
                'reddit_community_monitor',
                'Reddit Community Monitor',
                'Running',
                {'total_communities': 1, 'media_found': 0, 'current_community': f'r/{subreddit}'}
            )

        try:
            # Use longer lookback for communities that have never imported anything
            effective_lookback = lookback_days
            if community.get('total_media_found', 0) == 0 and not community.get('last_checked'):
                effective_lookback = 30
                logger.info(f"First check for r/{subreddit}, using 30-day lookback")

            media_count = await self._check_community(
                community_id, subreddit,
                community['person_id'], effective_lookback, cookies_json, crypto,
                community_idx=0, total_communities=1, running_media_total=0
            )
            return media_count
        finally:
            if self.activity_manager:
                self.activity_manager.stop_background_task('reddit_monitor')

    async def check_communities_by_person(self, person_id: int) -> int:
        """
        Check all enabled communities for a given person.

        Args:
            person_id: ID of the person whose communities to check

        Returns:
            Total count of new media items imported
        """
        crypto = self._get_crypto()
        if crypto is None:
            logger.warning("Cannot check: encryption key not available")
            return 0

        # Get enabled communities for this person
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT * FROM private_media_reddit_communities WHERE person_id = ? AND enabled = 1",
                (person_id,)
            )
            communities = [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

        if not communities:
            logger.debug(f"No enabled communities for person {person_id}")
            return 0

        settings = self.get_settings()
        lookback_days = settings.get('lookback_days', 3)
        cookies_json = self._get_cookies_json(crypto)

        if self.activity_manager:
            self.activity_manager.start_background_task(
                'reddit_monitor',
                'reddit_community_monitor',
                'Reddit Community Monitor',
                'Running',
                {'total_communities': len(communities), 'media_found': 0}
            )

        total_media = 0
        try:
            for idx, community in enumerate(communities):
                subreddit = community['subreddit_name']
                community_id = community['id']

                try:
                    effective_lookback = lookback_days
                    if community.get('total_media_found', 0) == 0 and not community.get('last_checked'):
                        effective_lookback = 30
                        logger.info(f"First check for r/{subreddit}, using 30-day lookback")

                    media_count = await self._check_community(
                        community_id, subreddit, community['person_id'],
                        effective_lookback, cookies_json, crypto,
                        community_idx=idx, total_communities=len(communities),
                        running_media_total=total_media
                    )
                    total_media += media_count

                    if media_count > 0 and self.activity_manager:
                        self.activity_manager.update_background_task(
                            'reddit_monitor',
                            f'Found {media_count} new media in r/{subreddit}',
                            idx + 1, len(communities),
                            {'total_communities': len(communities), 'media_found': total_media,
                             'current_community': f'r/{subreddit}', 'phase': 'found', 'last_found': media_count}
                        )
                except Exception as e:
                    logger.error(f"Error checking r/{subreddit}: {e}")
                    import traceback
                    logger.debug(f"Traceback: {traceback.format_exc()}")

            # Auto-dedup for this person if new media was imported
            if total_media > 0:
                if self.activity_manager:
                    self.activity_manager.update_background_task(
                        'reddit_monitor', 'Deduplicating...',
                        len(communities), len(communities),
                        {'phase': 'deduplicating', 'media_found': total_media}
                    )
                dedup_deleted = self._run_dedup_for_persons({person_id}, crypto)
                if dedup_deleted > 0:
                    logger.info(f"Reddit person check: auto-dedup removed {dedup_deleted} duplicates")

        finally:
            if self.activity_manager:
                self.activity_manager.stop_background_task('reddit_monitor')

        if total_media > 0:
            logger.info(f"Reddit person check: imported {total_media} new media items")

        return total_media

    def _update_status(self, status_text: str, community_idx: int, total_communities: int, extra: Dict = None):
        """Helper to update background task status with detailed info."""
        if not self.activity_manager:
            return
        data = {
            'total_communities': total_communities,
            'media_found': extra.get('media_found', 0) if extra else 0,
        }
        if extra:
            data.update(extra)
        self.activity_manager.update_background_task(
            'reddit_monitor', status_text,
            community_idx, total_communities, data
        )

    async def _check_community(
        self, community_id: int, subreddit: str, person_id: int,
        lookback_days: Optional[int], cookies_json: Optional[str], crypto,
        community_idx: int = 0, total_communities: int = 1, running_media_total: int = 0
    ) -> int:
        """Check a single community and import new media."""
        with tempfile.TemporaryDirectory(prefix=f'reddit_{subreddit}_') as temp_dir:
            # Phase: Downloading
            self._update_status(
                f'Downloading from r/{subreddit}...', community_idx, total_communities,
                {'current_community': f'r/{subreddit}', 'phase': 'downloading',
                 'media_found': running_media_total}
            )

            # Run gallery-dl
            files = await self._run_gallery_dl(
                subreddit, temp_dir, lookback_days, cookies_json
            )
            if not files:
                logger.debug(f"No files downloaded from r/{subreddit}")
                self._update_status(
                    f'No new files in r/{subreddit}', community_idx, total_communities,
                    {'current_community': f'r/{subreddit}', 'phase': 'done',
                     'media_found': running_media_total}
                )
                # Still update last_checked so we know we tried
                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute('''
                        UPDATE private_media_reddit_communities
                        SET last_checked = CURRENT_TIMESTAMP
                        WHERE id = ?
                    ''', (community_id,))
                    conn.commit()
                finally:
                    conn.close()
                return 0

            # Phase: Processing
            self._update_status(
                f'Downloaded {len(files)} files from r/{subreddit}, grouping by post...',
                community_idx, total_communities,
                {'current_community': f'r/{subreddit}', 'phase': 'processing',
                 'files_downloaded': len(files), 'media_found': running_media_total}
            )

            # Group files by Reddit post ID
            posts = self._group_files_by_post(files, temp_dir)

            # Get or create "reddit" tag
            reddit_tag_id = self._ensure_reddit_tag(crypto)

            # Filter out already-processed posts
            new_posts = {}
            for reddit_post_id, post_data in posts.items():
                if not self._is_post_processed(community_id, reddit_post_id):
                    new_posts[reddit_post_id] = post_data

            if not new_posts:
                self._update_status(
                    f'No new posts in r/{subreddit} ({len(posts)} already imported)',
                    community_idx, total_communities,
                    {'current_community': f'r/{subreddit}', 'phase': 'done',
                     'files_downloaded': len(files), 'media_found': running_media_total}
                )
                # Still need to update total_media = 0 path below
                posts_to_import = {}
            else:
                posts_to_import = new_posts

            # Import each post
            total_media = 0
            for post_num, (reddit_post_id, post_data) in enumerate(posts_to_import.items(), 1):
                num_files = len(post_data['files'])
                self._update_status(
                    f'Importing post {post_num}/{len(posts_to_import)} from r/{subreddit} ({num_files} files)',
                    community_idx, total_communities,
                    {'current_community': f'r/{subreddit}', 'phase': 'importing',
                     'files_downloaded': len(files), 'posts_imported': post_num,
                     'posts_total': len(posts_to_import),
                     'media_found': running_media_total + total_media}
                )

                media_count = self._import_post_to_gallery(
                    post_data, person_id, reddit_tag_id, crypto,
                    subreddit, community_id, reddit_post_id,
                    community_idx, total_communities, running_media_total + total_media
                )
                total_media += media_count

            # Update community stats
            if total_media > 0:
                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute('''
                        UPDATE private_media_reddit_communities
                        SET total_media_found = total_media_found + ?,
                            last_checked = CURRENT_TIMESTAMP,
                            updated_at = CURRENT_TIMESTAMP
                        WHERE id = ?
                    ''', (total_media, community_id))
                    conn.commit()
                finally:
                    conn.close()
            else:
                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute('''
                        UPDATE private_media_reddit_communities
                        SET last_checked = CURRENT_TIMESTAMP
                        WHERE id = ?
                    ''', (community_id,))
                    conn.commit()
                finally:
                    conn.close()

            return total_media

    # =========================================================================
    # HELPER METHODS
    # =========================================================================

    async def _run_gallery_dl(
        self, subreddit: str, temp_dir: str,
        lookback_days: Optional[int] = None, cookies_json: Optional[str] = None
    ) -> List[Path]:
        """
        Run gallery-dl to download media from a subreddit.

        Returns:
            List of downloaded file paths
        """
        # Use a persistent download archive so gallery-dl skips already-downloaded URLs
        archive_dir = os.path.join(os.path.dirname(self.db_path) if '/' in self.db_path else '/opt/media-downloader/data', 'cache')
        os.makedirs(archive_dir, exist_ok=True)
        archive_path = os.path.join(archive_dir, 'reddit_gallery_dl_archive.db')

        cmd = [
            self.gallery_dl_path,
            '--write-metadata',
            '--download-archive', archive_path,
            '-d', temp_dir,
        ]

        # Use REST API mode instead of OAuth API to avoid shared rate limits.
        # The default OAuth client-id is shared by all gallery-dl users globally,
        # causing 429 rate limits with many subreddits. REST mode uses www.reddit.com
        # directly with cookies for auth, bypassing OAuth rate limits entirely.
        cmd.extend(['-o', 'extractor.reddit.api=rest'])

        # Limit to 200 most recent posts per subreddit to avoid timeout from full history pagination
        cmd.extend(['--range', '1-200'])

        cmd.append(f'https://www.reddit.com/r/{subreddit}/new/')

        if lookback_days:
            cutoff = (datetime.now() - timedelta(days=lookback_days)).strftime('%Y-%m-%d')
            cmd.extend(['--filter', f"date >= datetime.strptime('{cutoff}', '%Y-%m-%d')"])

        # Write JSON cookies to a temp Netscape cookie file
        # Ensure temp_dir exists (can be cleaned by systemd-tmpfiles or race conditions)
        temp_cookie_file = None
        if cookies_json:
            os.makedirs(temp_dir, exist_ok=True)
            temp_cookie_file = os.path.join(temp_dir, '.cookies.txt')
            if self._write_netscape_cookie_file(cookies_json, temp_cookie_file):
                cmd.extend(['--cookies', temp_cookie_file])

        logger.info(f"Running gallery-dl for r/{subreddit}")
        logger.debug(f"Command: {' '.join(cmd)}")

        try:
            loop = asyncio.get_event_loop()
            result = await loop.run_in_executor(
                None,
                lambda: subprocess.run(
                    cmd,
                    capture_output=True,
                    text=True,
                    timeout=600  # 10 minute timeout
                )
            )

            # gallery-dl exit codes are bitflags: 1=some errors, 4=some skipped, 8=all skipped
            # Code 4 (skipped) and 5 (skipped+errors) are normal when files already exist
            if result.returncode not in (0, 1, 4, 5):
                logger.warning(f"gallery-dl returned code {result.returncode} for r/{subreddit}")
                if result.stderr:
                    logger.debug(f"gallery-dl stderr: {result.stderr[:500]}")

        except subprocess.TimeoutExpired:
            logger.error(f"gallery-dl timed out for r/{subreddit}")
            return []
        except Exception as e:
            logger.error(f"gallery-dl failed for r/{subreddit}: {e}")
            return []

        # Collect all non-JSON, non-cookie files from the temp directory
        downloaded = []
        skip_exts = {'.json', '.txt'}
        skip_names = {'.cookies.txt'}
        for root, dirs, filenames in os.walk(temp_dir):
            for fname in filenames:
                if fname in skip_names or fname.startswith('.'):
                    continue
                if fname.endswith('.json'):
                    continue
                downloaded.append(Path(root) / fname)

        logger.info(f"Downloaded {len(downloaded)} files from r/{subreddit}")
        return downloaded

    def _group_files_by_post(
        self, files: List[Path], temp_dir: str
    ) -> Dict[str, Dict]:
        """
        Group downloaded files by their Reddit post ID using metadata JSON sidecars.

        Returns:
            Dict mapping reddit_post_id -> {
                'files': [Path],
                'title': str,
                'date': str,
                'source_url': str
            }
        """
        posts: Dict[str, Dict] = {}

        for file_path in files:
            # Look for matching metadata JSON sidecar
            json_path = file_path.with_suffix(file_path.suffix + '.json')
            if not json_path.exists():
                # Try without double extension
                json_path = file_path.with_suffix('.json')

            metadata = {}
            if json_path.exists():
                try:
                    with open(json_path, 'r', encoding='utf-8') as f:
                        metadata = json.load(f)
                except (json.JSONDecodeError, Exception) as e:
                    logger.debug(f"Failed to parse metadata for {file_path.name}: {e}")

            # Extract Reddit post ID - gallery-dl uses various field names
            reddit_post_id = None
            for key in ('id', 'reddit_id', 'parent_id'):
                if key in metadata:
                    reddit_post_id = str(metadata[key])
                    break

            if not reddit_post_id:
                # Use filename-based grouping as fallback
                # gallery-dl typically names files like: subreddit_postid_num.ext
                parts = file_path.stem.split('_')
                if len(parts) >= 2:
                    reddit_post_id = parts[-2] if len(parts) >= 3 else parts[-1]
                else:
                    reddit_post_id = file_path.stem

            # Extract post date (ensure ISO format in local time for frontend)
            # gallery-dl stores Reddit dates in UTC — convert to local time
            post_date = None
            if 'date' in metadata:
                date_val = metadata['date']
                if isinstance(date_val, str):
                    try:
                        from datetime import timezone as tz
                        for fmt in ('%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d'):
                            try:
                                utc_dt = datetime.strptime(date_val, fmt).replace(tzinfo=tz.utc)
                                post_date = utc_dt.astimezone().strftime('%Y-%m-%dT%H:%M:%S')
                                break
                            except ValueError:
                                continue
                        if not post_date:
                            post_date = date_val  # fallback to raw string
                    except Exception:
                        post_date = date_val
                elif isinstance(date_val, (int, float)):
                    try:
                        post_date = datetime.fromtimestamp(date_val).isoformat()
                    except (ValueError, OSError):
                        pass

            if not post_date and 'created_utc' in metadata:
                try:
                    post_date = datetime.fromtimestamp(metadata['created_utc']).isoformat()
                except (ValueError, OSError):
                    pass

            if not post_date:
                post_date = datetime.now().isoformat()

            # Extract title
            title = metadata.get('title', metadata.get('description', ''))

            # Build source URL
            subreddit = metadata.get('subreddit', '')
            source_url = f"https://www.reddit.com/r/{subreddit}/comments/{reddit_post_id}" if subreddit else ''

            if reddit_post_id not in posts:
                posts[reddit_post_id] = {
                    'files': [],
                    'title': title,
                    'date': post_date,
                    'source_url': source_url,
                }

            posts[reddit_post_id]['files'].append(file_path)

        return posts

    def _is_post_processed(self, community_id: int, reddit_post_id: str) -> bool:
        """Check if a Reddit post has already been processed."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT id FROM private_media_reddit_history WHERE community_id = ? AND reddit_post_id = ?",
                (community_id, reddit_post_id)
            )
            return cursor.fetchone() is not None
        finally:
            conn.close()

    def _ensure_reddit_tag(self, crypto) -> int:
        """Find or create a 'reddit' tag in private_gallery_tags."""
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute("SELECT id, encrypted_name FROM private_gallery_tags")
            for row in cursor.fetchall():
                try:
                    name = crypto.decrypt_field(row['encrypted_name'])
                    if name.lower() == 'reddit':
                        return row['id']
                except Exception:
                    continue

            # Create the tag
            encrypted_name = crypto.encrypt_field('Reddit')
            cursor.execute('''
                INSERT INTO private_gallery_tags (encrypted_name, color)
                VALUES (?, '#ff4500')
            ''', (encrypted_name,))
            conn.commit()
            tag_id = cursor.lastrowid
            logger.info(f"Created 'Reddit' tag with ID {tag_id}")
            return tag_id
        finally:
            conn.close()

    def _import_post_to_gallery(
        self, post_data: Dict, person_id: int, reddit_tag_id: int,
        crypto, subreddit: str, community_id: int, reddit_post_id: str,
        community_idx: int = 0, total_communities: int = 1, running_media_total: int = 0
    ) -> int:
        """
        Import a Reddit post's media files into the private gallery.

        Returns:
            Number of media files successfully imported
        """
        files = post_data['files']
        title = post_data['title']
        post_date = post_data['date']
        source_url = post_data.get('source_url', '')

        if not files:
            return 0

        # Get storage path from config
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute("SELECT value FROM private_media_config WHERE key = 'storage_path'")
            row = cursor.fetchone()
            storage_path = Path(row['value']) if row else Path('/opt/immich/private')
        finally:
            conn.close()

        data_path = storage_path / 'data'
        thumbs_path = storage_path / 'thumbs'
        data_path.mkdir(parents=True, exist_ok=True)
        thumbs_path.mkdir(parents=True, exist_ok=True)

        # Create a post
        encrypted_desc = crypto.encrypt_field(title) if title else None
        encrypted_date = crypto.encrypt_field(post_date) if post_date else crypto.encrypt_field(datetime.now().isoformat())
        now_iso = datetime.now().isoformat()

        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                INSERT INTO private_media_posts (person_id, encrypted_description, encrypted_media_date, created_at, updated_at)
                VALUES (?, ?, ?, ?, ?)
            ''', (person_id, encrypted_desc, encrypted_date, now_iso, now_iso))
            conn.commit()
            post_id = cursor.lastrowid
        finally:
            conn.close()

        media_count = 0
        media_ids = []
        total_files = len(files)

        for file_idx, file_path in enumerate(files, 1):
            try:
                if not file_path.exists() or file_path.stat().st_size == 0:
                    continue

                # Update status: encrypting/importing file
                self._update_status(
                    f'Encrypting file {file_idx}/{total_files} from r/{subreddit}',
                    community_idx, total_communities,
                    {'current_community': f'r/{subreddit}', 'phase': 'encrypting',
                     'current_file': file_idx, 'total_files': total_files,
                     'media_found': running_media_total + media_count}
                )

                # Calculate file hash
                sha256 = hashlib.sha256()
                with open(file_path, 'rb') as f:
                    for chunk in iter(lambda: f.read(65536), b''):
                        sha256.update(chunk)
                file_hash = sha256.hexdigest()

                # Check for duplicates (scoped by person)
                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute(
                        'SELECT id FROM private_media WHERE file_hash = ? AND person_id = ?',
                        (file_hash, person_id)
                    )
                    if cursor.fetchone():
                        logger.debug(f"Duplicate file skipped: {file_path.name}")
                        continue
                finally:
                    conn.close()

                # Get file info
                file_info = self._get_file_info(file_path)
                file_size = file_path.stat().st_size

                # Compute perceptual hash
                perceptual_hash = self._compute_perceptual_hash(file_path)

                # Generate storage ID
                storage_id = str(uuid.uuid4())

                # Generate thumbnail
                temp_thumb = Path(tempfile.gettempdir()) / f"pg_thumb_{storage_id}.jpg"
                self._generate_thumbnail(file_path, temp_thumb, file_info['file_type'])

                # Encrypt the file
                encrypted_file = data_path / f"{storage_id}.enc"
                if not crypto.encrypt_file(file_path, encrypted_file):
                    logger.error(f"Encryption failed for {file_path.name}")
                    continue

                # Encrypt thumbnail
                if temp_thumb.exists():
                    encrypted_thumb = thumbs_path / f"{storage_id}.enc"
                    crypto.encrypt_file(temp_thumb, encrypted_thumb)
                    try:
                        temp_thumb.unlink()
                    except Exception:
                        pass

                # Insert media record
                encrypted_filename = crypto.encrypt_field(file_path.name)
                encrypted_source = crypto.encrypt_field(source_url)

                conn = self._get_connection()
                try:
                    cursor = conn.cursor()
                    cursor.execute('''
                        INSERT INTO private_media (
                            post_id, storage_id, encrypted_filename, encrypted_description,
                            file_hash, file_size, file_type, mime_type,
                            width, height, duration, person_id,
                            encrypted_media_date, source_type, encrypted_source_path,
                            perceptual_hash, created_at
                        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    ''', (
                        post_id,
                        storage_id,
                        encrypted_filename,
                        None,
                        file_hash,
                        file_size,
                        file_info['file_type'],
                        file_info['mime_type'],
                        file_info['width'],
                        file_info['height'],
                        file_info['duration'],
                        person_id,
                        encrypted_date,
                        'reddit',
                        encrypted_source,
                        perceptual_hash,
                        now_iso
                    ))
                    media_id = cursor.lastrowid
                    media_ids.append(media_id)
                    conn.commit()
                finally:
                    conn.close()

                media_count += 1

            except Exception as e:
                logger.error(f"Failed to import {file_path.name}: {e}")
                import traceback
                logger.debug(f"Traceback: {traceback.format_exc()}")

        # Apply reddit tag to the post
        if media_count > 0:
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                cursor.execute('''
                    INSERT OR IGNORE INTO private_media_post_tags (post_id, tag_id)
                    VALUES (?, ?)
                ''', (post_id, reddit_tag_id))
                conn.commit()
            finally:
                conn.close()
        else:
            # Delete the empty post
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                cursor.execute("DELETE FROM private_media_posts WHERE id = ?", (post_id,))
                conn.commit()
            finally:
                conn.close()

        # Record in history only if we successfully imported media
        if media_count > 0:
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                cursor.execute('''
                    INSERT OR IGNORE INTO private_media_reddit_history
                        (community_id, reddit_post_id, media_count)
                    VALUES (?, ?, ?)
                ''', (community_id, reddit_post_id, media_count))
                conn.commit()
            finally:
                conn.close()

        if media_count > 0:
            logger.info(f"Imported {media_count} files from r/{subreddit} post {reddit_post_id}")

        return media_count

    def _get_file_info(self, file_path: Path) -> Dict[str, Any]:
        """Get file type, mime type, and dimensions."""
        ext = file_path.suffix.lower().lstrip('.')
        mime_type, _ = mimetypes.guess_type(str(file_path))
        if not mime_type:
            mime_type = 'application/octet-stream'

        image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
        video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}

        if ext in image_exts:
            file_type = 'image'
        elif ext in video_exts:
            file_type = 'video'
        else:
            file_type = 'other'

        info = {
            'file_type': file_type,
            'mime_type': mime_type,
            'width': None,
            'height': None,
            'duration': None
        }

        if file_type == 'image':
            try:
                from PIL import Image
                with Image.open(file_path) as img:
                    info['width'], info['height'] = img.size
            except Exception:
                pass

        if file_type == 'video':
            try:
                result = subprocess.run([
                    'ffprobe', '-v', 'quiet', '-print_format', 'json',
                    '-show_streams', '-show_format', str(file_path)
                ], capture_output=True, text=True, timeout=30)
                if result.returncode == 0:
                    data = json.loads(result.stdout)
                    for stream in data.get('streams', []):
                        if stream.get('codec_type') == 'video':
                            info['width'] = stream.get('width')
                            info['height'] = stream.get('height')
                            break
                    if 'format' in data:
                        duration = data['format'].get('duration')
                        if duration:
                            info['duration'] = float(duration)
            except Exception:
                pass

        return info

    def _run_dedup_for_persons(self, person_ids: Set[int], crypto) -> int:
        """
        Run perceptual dedup for the given person IDs, auto-deleting duplicates.
        Uses the same algorithm as the dashboard's dedup scanner.

        Returns total number of duplicates deleted.
        """
        if not person_ids:
            return 0

        # Read config for threshold and storage path
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute("SELECT key, value FROM private_media_config WHERE key IN ('duplicate_auto_select_distance', 'storage_path')")
            config = {row['key']: row['value'] for row in cursor.fetchall()}
        finally:
            conn.close()

        threshold = int(config.get('duplicate_auto_select_distance', '2'))
        storage_path = Path(config.get('storage_path', '/opt/immich/private'))
        data_path = storage_path / 'data'
        thumbs_path = storage_path / 'thumbs'

        total_deleted = 0

        for person_id in person_ids:
            try:
                total_deleted += self._dedup_person(person_id, crypto, threshold, data_path, thumbs_path, storage_path)
            except Exception as e:
                logger.error(f"Dedup failed for person {person_id}: {e}")
                import traceback
                logger.debug(f"Dedup traceback: {traceback.format_exc()}")

        return total_deleted

    def _dedup_person(self, person_id: int, crypto, threshold: int, data_path: Path, thumbs_path: Path, storage_path: Path) -> int:
        """Run dedup for a single person. Returns number of duplicates deleted."""
        # Fetch all media with perceptual hashes for this person
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT id, post_id, storage_id, file_type, perceptual_hash, width, height
                FROM private_media
                WHERE post_id IN (SELECT id FROM private_media_posts WHERE person_id = ?)
                  AND file_type IN ('image', 'video')
                  AND perceptual_hash IS NOT NULL
                  AND perceptual_hash != ''
            ''', (person_id,))
            all_media = [dict(row) for row in cursor.fetchall()]
        finally:
            conn.close()

        if len(all_media) < 2:
            return 0

        # Pre-compute integer values for fast XOR-based hamming distance
        hash_ints = {}
        for m in all_media:
            try:
                hash_ints[m['id']] = int(m['perceptual_hash'], 16)
            except (ValueError, TypeError):
                pass

        # Union-Find for grouping duplicates
        parent = {m['id']: m['id'] for m in all_media}

        def find(x):
            while parent[x] != x:
                parent[x] = parent[parent[x]]
                x = parent[x]
            return x

        def union(x, y):
            px, py = find(x), find(y)
            if px != py:
                parent[px] = py

        # Compare all pairs
        for i in range(len(all_media)):
            id_i = all_media[i]['id']
            if id_i not in hash_ints:
                continue
            hi = hash_ints[id_i]
            for j in range(i + 1, len(all_media)):
                id_j = all_media[j]['id']
                if id_j not in hash_ints:
                    continue
                dist = bin(hi ^ hash_ints[id_j]).count('1')
                if dist <= threshold:
                    union(id_i, id_j)

        # Group by root
        groups: Dict[int, list] = {}
        for m in all_media:
            root = find(m['id'])
            if root not in groups:
                groups[root] = []
            groups[root].append(m)

        # Filter to actual duplicate groups (size > 1)
        duplicate_groups = [g for g in groups.values() if len(g) > 1]

        if not duplicate_groups:
            return 0

        # In each group: keep highest resolution, mark rest for deletion
        to_delete = []
        for group in duplicate_groups:
            # Sort by resolution (width * height) descending, keep first
            group.sort(key=lambda m: (m['width'] or 0) * (m['height'] or 0), reverse=True)
            to_delete.extend(group[1:])  # All except the highest resolution

        if not to_delete:
            return 0

        # Delete duplicate files and DB records
        deleted = 0
        conn = self._get_connection()
        try:
            cursor = conn.cursor()
            for media in to_delete:
                storage_id = media['storage_id']

                # Delete encrypted data file
                data_file = data_path / f"{storage_id}.enc"
                if data_file.exists():
                    data_file.unlink()

                # Delete thumbnail file
                thumb_file = thumbs_path / f"{storage_id}.enc"
                if thumb_file.exists():
                    thumb_file.unlink()

                # Delete DB record
                cursor.execute('DELETE FROM private_media WHERE id = ?', (media['id'],))
                deleted += 1

            conn.commit()
        finally:
            conn.close()

        # Clean up empty reddit-tagged posts
        self._cleanup_empty_reddit_posts_after_dedup(crypto, storage_path)

        logger.info(f"Dedup: deleted {deleted} duplicates across {len(duplicate_groups)} groups for person {person_id}")
        return deleted

    def _cleanup_empty_reddit_posts_after_dedup(self, crypto, storage_path: Path):
        """Delete reddit-tagged posts that have no remaining media after dedup."""
        try:
            conn = self._get_connection()
            try:
                # Find the reddit tag ID
                cursor = conn.cursor()
                cursor.execute("SELECT id, encrypted_name FROM private_gallery_tags")
                reddit_tag_id = None
                for row in cursor.fetchall():
                    try:
                        name = crypto.decrypt_field(row['encrypted_name'])
                        if name and name.lower() == 'reddit':
                            reddit_tag_id = row['id']
                            break
                    except Exception:
                        continue
            finally:
                conn.close()

            if reddit_tag_id is None:
                return

            # Find empty reddit-tagged posts
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                cursor.execute('''
                    SELECT p.id FROM private_media_posts p
                    JOIN private_media_post_tags pt ON pt.post_id = p.id
                    WHERE pt.tag_id = ?
                      AND NOT EXISTS (SELECT 1 FROM private_media m WHERE m.post_id = p.id)
                ''', (reddit_tag_id,))
                empty_posts = [row['id'] for row in cursor.fetchall()]
            finally:
                conn.close()

            if not empty_posts:
                return

            # Delete empty posts
            conn = self._get_connection()
            try:
                cursor = conn.cursor()
                for post_id in empty_posts:
                    cursor.execute('DELETE FROM private_media_post_tags WHERE post_id = ?', (post_id,))
                    cursor.execute('DELETE FROM private_media_posts WHERE id = ?', (post_id,))
                conn.commit()
            finally:
                conn.close()

            logger.info(f"Dedup cleanup: removed {len(empty_posts)} empty reddit-tagged posts")
        except Exception as e:
            logger.error(f"Failed to cleanup empty reddit posts after dedup: {e}")

    def _compute_perceptual_hash(self, file_path: Path) -> Optional[str]:
        """Calculate perceptual hash for an image or video file."""
        try:
            import imagehash
            from PIL import Image
        except ImportError:
            return None

        ext = file_path.suffix.lower().lstrip('.')
        image_exts = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff', 'heic', 'heif', 'avif'}
        video_exts = {'mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v', 'wmv', 'flv'}

        pil_image = None
        frame = None
        frame_rgb = None

        try:
            if ext in video_exts:
                try:
                    import cv2
                except ImportError:
                    return None
                cap = cv2.VideoCapture(str(file_path))
                if not cap.isOpened():
                    return None
                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                cap.set(cv2.CAP_PROP_POS_FRAMES, int(total_frames * 0.5))
                ret, frame = cap.read()
                cap.release()
                if not ret or frame is None:
                    return None
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                pil_image = Image.fromarray(frame_rgb)
            elif ext in image_exts:
                pil_image = Image.open(file_path)
            else:
                return None

            phash = str(imagehash.dhash(pil_image, hash_size=16))
            return phash
        except Exception:
            return None
        finally:
            if pil_image is not None:
                pil_image.close()
                del pil_image
            if frame_rgb is not None:
                del frame_rgb
            if frame is not None:
                del frame

    def _generate_thumbnail(self, file_path: Path, output_path: Path, file_type: str) -> bool:
        """Generate a thumbnail for an image or video."""
        try:
            output_path.parent.mkdir(parents=True, exist_ok=True)

            if file_type == 'image':
                from PIL import Image, ImageOps
                with Image.open(file_path) as img:
                    img = ImageOps.exif_transpose(img)
                    img.thumbnail((400, 400))
                    if img.mode in ('RGBA', 'P'):
                        img = img.convert('RGB')
                    img.save(output_path, 'JPEG', quality=85)
                return True

            elif file_type == 'video':
                result = subprocess.run([
                    'ffmpeg', '-y', '-i', str(file_path),
                    '-ss', '00:00:01', '-vframes', '1',
                    '-vf', 'scale=400:-1:force_original_aspect_ratio=decrease',
                    str(output_path)
                ], capture_output=True, timeout=30)
                return result.returncode == 0 and output_path.exists()

        except Exception as e:
            logger.error(f"Thumbnail generation failed: {e}")
        return False