media-downloader/modules/imginn_module.py

#!/usr/bin/env python3
"""
ImgInn downloader module with FastDL-compatible file naming
Format: {profile}_{YYYYMMDD_HHMMSS}_{media_id}{ext}
"""

import os
import json
import time
import random
import re
import subprocess
import platform
import requests
from pathlib import Path
from datetime import datetime, timedelta
from modules.base_module import LoggingMixin
from modules.cloudflare_handler import (
    CloudflareHandler, SiteStatus, get_flaresolverr_user_agent,
    get_playwright_context_options, get_playwright_stealth_scripts
)
from modules.instagram_utils import (
    extract_instagram_media_id,
    scan_existing_files_for_media_ids,
    record_instagram_download,
    is_instagram_downloaded
)

from typing import Dict, Optional
from playwright.sync_api import sync_playwright


class ImgInnDownloader(LoggingMixin):
    """ImgInn downloader with FastDL-compatible naming"""

    def __init__(self,
                 headless: bool = True,
                 cookie_file: str = "/opt/media-downloader/cookies/imginn_cookies.json",
                 show_progress: bool = True,
                 use_database: bool = True,
                 log_callback=None,
                 unified_db=None,
    ):
        """Initialize downloader compatible with media-downloader system"""
        # Initialize logging via mixin
        self._init_logger('Instagram', log_callback, default_module='Download')

        self.headless = headless
        self.downloaded_files = set()  # Track downloaded media IDs
        self.show_progress = show_progress
        self.use_database = use_database
        self.download_count = 0
        self.unified_db = unified_db  # Store for scraper config access
        self.scraper_id = 'imginn'  # Scraper ID in database
        self.pending_downloads = []  # Track downloads for deferred database recording

        # Rate limiting - track last scrape time to avoid hitting Cloudflare
        self._last_scrape_time = None
        self._min_scrape_interval = 15  # Minimum seconds between scrape types

        # Track transient page load failures per session
        self._page_load_failures = 0
        self._page_load_failure_threshold = 5  # Escalate to error after this many

        # Browser reuse across profiles
        self.playwright = None
        self.browser = None
        self.context = None
        self.page = None

        # Use unified database directly (no adapter needed)
        if unified_db and use_database:
            self.unified_db = unified_db
        else:
            self.unified_db = None
            self.use_database = False

        # Initialize activity status manager for real-time updates
        from modules.activity_status import get_activity_manager
        self.activity_manager = get_activity_manager(unified_db)

        # Load scraper configuration from database if available
        self.proxy_url = None
        self.cookie_file = None  # Default to None (use database)

        if unified_db:
            scraper_config = unified_db.get_scraper(self.scraper_id)
            if scraper_config:
                # Get proxy configuration
                if scraper_config.get('proxy_enabled') and scraper_config.get('proxy_url'):
                    self.proxy_url = scraper_config['proxy_url']
                    self.log(f"Using proxy: {self.proxy_url}", "info")

        # Fall back to cookie file if no database
        if not unified_db:
            self.cookie_file = Path(cookie_file)
            self.cookie_file.parent.mkdir(parents=True, exist_ok=True)

        # User-Agent to match FlareSolverr (dynamically fetched for consistency)
        self.user_agent = get_flaresolverr_user_agent()

        # Initialize universal Cloudflare handler
        # Pass proxy_url if configured, and cookie_file=None for database storage
        self.cf_handler = CloudflareHandler(
            module_name="ImgInn",
            cookie_file=str(self.cookie_file) if self.cookie_file else None,
            user_agent=self.user_agent,
            logger=self.logger,
            aggressive_expiry=True,  # Refresh cookies expiring within 7 days
            proxy_url=self.proxy_url  # Pass proxy to FlareSolverr
        )

        # Keep for backwards compatibility
        self.flaresolverr_url = self.cf_handler.flaresolverr_url
        self.flaresolverr_enabled = self.cf_handler.flaresolverr_enabled

        # Load cookies from database if available
        self._load_cookies_from_db()

    def _load_cookies_from_db(self):
        """Load cookies from database if available"""
        if not self.unified_db:
            return

        try:
            cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
            if cookies:
                # Load into CloudflareHandler
                self.cf_handler._cookies = cookies
                self.log(f"Loaded {len(cookies)} cookies from database", "debug")
        except Exception as e:
            self.log(f"Error loading cookies from database: {e}", "warning")

    def _save_cookies_to_db(self, cookies: list, user_agent: str = None):
        """Save cookies to database

        Args:
            cookies: List of cookie dictionaries
            user_agent: User agent to associate with cookies (important for cf_clearance).
                       If not provided, uses self.user_agent as fallback.
        """
        if not self.unified_db:
            return

        try:
            # Use provided user_agent or fall back to self.user_agent
            ua = user_agent or self.user_agent
            self.unified_db.save_scraper_cookies(
                self.scraper_id,
                cookies,
                user_agent=ua,
                merge=True
            )
            self.log(f"Saved {len(cookies)} cookies to database (UA: {ua[:50]}...)", "debug")
        except Exception as e:
            self.log(f"Error saving cookies to database: {e}", "warning")

    def _cookies_expired(self):
        """Check if cookies are expired - delegates to CloudflareHandler"""
        return self.cf_handler.cookies_expired()

    def _get_cookies_for_requests(self):
        """Get cookies in format for requests library - delegates to CloudflareHandler"""
        return self.cf_handler.get_cookies_dict()

    def _get_cookies_via_flaresolverr(self, url="https://imginn.com/", max_retries=2):
        """Use FlareSolverr to bypass Cloudflare - delegates to CloudflareHandler

        Args:
            url: URL to fetch
            max_retries: Maximum number of retry attempts (default: 2)

        Returns:
            True if cookies obtained successfully, False otherwise
        """
        success = self.cf_handler.get_cookies_via_flaresolverr(url, max_retries)

        # Save cookies to database if successful
        if success and self.unified_db:
            cookies_list = self.cf_handler.get_cookies_list()
            if cookies_list:
                # CRITICAL: Get the user_agent from FlareSolverr solution, not self.user_agent
                # cf_clearance cookies are fingerprinted to the browser that solved the challenge
                flaresolverr_ua = self.cf_handler.get_user_agent()
                self._save_cookies_to_db(cookies_list, user_agent=flaresolverr_ua)

        return success

    def _enforce_rate_limit(self, scrape_type: str = "scrape"):
        """Enforce rate limiting between scrape operations to avoid Cloudflare blocks.

        Args:
            scrape_type: Type of scrape (posts, stories, tagged) for logging
        """
        import random

        if self._last_scrape_time is not None:
            elapsed = time.time() - self._last_scrape_time
            if elapsed < self._min_scrape_interval:
                # Add random jitter (5-15 seconds) to the delay
                jitter = random.uniform(5, 15)
                wait_time = self._min_scrape_interval - elapsed + jitter
                self.log(f"Rate limiting: waiting {wait_time:.1f}s before {scrape_type} (Cloudflare avoidance)", "info")
                time.sleep(wait_time)

        self._last_scrape_time = time.time()

    def _has_valid_cookies(self):
        """Check if we have valid cookies (either in file or database)"""
        if self.unified_db:
            cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
            return cookies and len(cookies) > 0
        elif self.cookie_file:
            return self.cookie_file.exists()
        return False

    def _start_browser(self):
        """Start browser if not already running (reusable across profiles)"""
        # Try to get fresh cookies via FlareSolverr if we don't have them or they're old
        # Do this BEFORE the browser reuse check so cookies are always checked
        if not self._has_valid_cookies() or self._cookies_expired():
            self.log("Cookies missing or expired, attempting FlareSolverr bypass...", "info")
            if self._get_cookies_via_flaresolverr():
                self.log("Successfully got fresh cookies from FlareSolverr", "info")
            else:
                self.log("FlareSolverr unavailable, will try with Playwright", "warning")

        if self.browser is not None:
            self.log("Browser already running, reusing...", "debug")
            return

        import os
        # Use environment variable if set, otherwise use standard location
        if 'PLAYWRIGHT_BROWSERS_PATH' not in os.environ:
            os.environ['PLAYWRIGHT_BROWSERS_PATH'] = '/root/.cache/ms-playwright'
        os.environ['DISPLAY'] = ':100'  # Use Xvfb virtual display

        self.log("Starting browser (Chromium)...", "info")
        self.playwright = sync_playwright().start()

        self.browser = self.playwright.chromium.launch(
            headless=self.headless,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--disable-dev-shm-usage',
                '--no-sandbox',
                '--disable-setuid-sandbox',
                '--disable-infobars',
                '--disable-background-timer-throttling',
                '--disable-backgrounding-occluded-windows',
                '--disable-renderer-backgrounding'
            ]
        )

        # CRITICAL: Browser fingerprint must match FlareSolverr for cookies to work
        # Get dynamic fingerprint settings from FlareSolverr
        context_options = get_playwright_context_options()

        # IMPORTANT: If cookies have a stored user_agent, use THAT user_agent
        # Cloudflare cf_clearance cookies are fingerprinted to the browser that solved the challenge
        try:
            stored_user_agent = self.unified_db.get_scraper_cookies_user_agent(self.scraper_id)
            if stored_user_agent:
                self.log(f"Using stored cookie user_agent: {stored_user_agent[:50]}...", "debug")
                context_options['user_agent'] = stored_user_agent
            else:
                self.log(f"Using fingerprint: UA={context_options['user_agent'][:50]}...", "debug")
        except Exception as e:
            self.log(f"Error getting stored user_agent, using default: {e}", "debug")

        self.context = self.browser.new_context(**context_options)

        # Load cookies
        self.load_cookies(self.context)

        self.page = self.context.new_page()

        # Add comprehensive anti-detection scripts (dynamically from cloudflare_handler)
        self.page.add_init_script(get_playwright_stealth_scripts())

        self.log("Browser started and ready", "info")

    def _stop_browser(self):
        """Stop the browser safely with proper error handling"""
        # Close context first
        if self.context:
            try:
                self.context.close()
                self.log("Browser context closed", "debug")
            except Exception as e:
                self.log(f"Error closing browser context: {e}", "warning")
            finally:
                self.context = None

        # Close browser
        if self.browser:
            try:
                self.browser.close()
                self.log("Browser closed", "debug")
            except Exception as e:
                self.log(f"Error closing browser: {e}", "warning")
            finally:
                self.browser = None

        # Stop playwright
        if self.playwright:
            try:
                self.playwright.stop()
            except Exception as e:
                self.log(f"Error stopping playwright: {e}", "warning")
            finally:
                self.playwright = None

        self.page = None

    def __del__(self):
        """Cleanup browser when instance is destroyed"""
        self._stop_browser()

    def __enter__(self):
        """Context manager entry - allows using 'with' statement"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit - ensures browser cleanup"""
        self._stop_browser()
        return False  # Don't suppress exceptions

    def get_profile_info(self, username: str) -> Optional[Dict]:
        """Extract profile info (avatar URL, bio, display name) from imginn profile page.

        Returns dict with keys: avatar_url, bio, display_name, or None on failure.
        """
        import time as _time
        import random as _random

        self._enforce_rate_limit("posts")
        self._start_browser()
        page = self.page
        if not page:
            return None

        try:
            url = f"https://imginn.com/{username.lower()}/?ref=index"
            self.log(f"Fetching profile info for @{username} from imginn", "info")
            page.goto(url, wait_until='domcontentloaded')

            wait_time = 5 + _random.uniform(0, 2)
            _time.sleep(wait_time)

            if not self.wait_for_cloudflare(page):
                self.log("Page didn't load for profile info extraction", "warning")
                return None

            self.save_cookies(self.context)
            _time.sleep(2)

            # Use JavaScript to extract profile info with multiple selector strategies
            profile_info = page.evaluate("""() => {
                const result = {};

                // --- Avatar ---
                // Strategy 1: img inside a profile/user info section
                const avatarSelectors = [
                    '.profile-avatar img',
                    '.user-avatar img',
                    '.avatar img',
                    '.profile-info img',
                    '.info img:first-of-type',
                    'header img',
                    '.user img',
                ];
                for (const sel of avatarSelectors) {
                    const el = document.querySelector(sel);
                    if (el && el.src && !el.src.includes('lazy') && !el.src.includes('data:')) {
                        result.avatar_url = el.src;
                        break;
                    }
                }
                // Strategy 2: find small/round img with scontent or profile in src
                if (!result.avatar_url) {
                    const imgs = document.querySelectorAll('img');
                    for (const img of imgs) {
                        const src = img.src || '';
                        if ((src.includes('scontent') || src.includes('profile') || src.includes('avatar')
                            || src.includes('imginn.com'))
                            && !src.includes('lazy') && !src.includes('data:')) {
                            const rect = img.getBoundingClientRect();
                            if (rect.width > 20 && rect.width < 250) {
                                result.avatar_url = src;
                                break;
                            }
                        }
                    }
                }

                // Clean avatar URL: strip query params (imginn CDN works without them
                // and the full URL often has malformed double-? from Instagram CDN paths)
                if (result.avatar_url && result.avatar_url.includes('?')) {
                    result.avatar_url = result.avatar_url.split('?')[0];
                }

                // --- Bio ---
                const bioSelectors = [
                    '.biography',
                    '.bio',
                    '.user-bio',
                    '.profile-bio',
                    '.profile-info .description',
                    '.info .bio',
                ];
                for (const sel of bioSelectors) {
                    const el = document.querySelector(sel);
                    if (el && el.textContent.trim().length > 2) {
                        result.bio = el.textContent.trim();
                        break;
                    }
                }

                // --- Display Name ---
                const nameSelectors = [
                    '.fullname',
                    '.display-name',
                    '.profile-name',
                    '.name',
                    '.user-info h1',
                    'h1',
                ];
                for (const sel of nameSelectors) {
                    const el = document.querySelector(sel);
                    if (el && el.textContent.trim().length > 1 && el.textContent.trim().length < 100) {
                        result.display_name = el.textContent.trim();
                        break;
                    }
                }

                return result;
            }""")

            # Save debug screenshot for future selector tuning
            try:
                screenshot_path = Path(f"/tmp/imginn_profile_{username}.png")
                page.screenshot(path=str(screenshot_path))
                self.log(f"Profile screenshot saved to {screenshot_path}", "debug")
            except Exception:
                pass

            if profile_info and any(profile_info.values()):
                self.log(f"Extracted profile info: avatar={'yes' if profile_info.get('avatar_url') else 'no'}, "
                         f"bio={'yes' if profile_info.get('bio') else 'no'}, "
                         f"name={profile_info.get('display_name', 'no')}", "info")
                return profile_info
            else:
                # Save page HTML for debugging
                try:
                    html_path = Path(f"/tmp/imginn_profile_{username}.html")
                    html_path.write_text(page.content()[:50000])
                    self.log(f"No profile info found - HTML saved to {html_path}", "warning")
                except Exception:
                    pass
                return None

        except Exception as e:
            self.log(f"Error getting profile info for @{username}: {e}", "error")
            return None

    def _extract_media_id_from_url(self, url: str) -> str:
        """Extract Instagram media ID from URL"""
        # URL format: https://imginn.com/p/MEDIA_ID/
        # or just /p/MEDIA_ID/
        match = re.search(r'/p/([^/]+)/?', url)
        if match:
            return match.group(1)
        return None

    def _update_file_timestamps(self, filepath: Path, post_date: datetime):
        """Update all timestamps for a file to match the post date"""
        try:
            # Convert datetime to timestamp
            timestamp = post_date.timestamp()

            # 1. Update file system timestamps (access time and modification time)
            os.utime(filepath, (timestamp, timestamp))
            self.log(f"Updated file timestamps to {post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")

            # 2. Try to update creation time (platform-specific)
            if platform.system() == 'Darwin':  # macOS
                # Use SetFile command on macOS
                date_str = post_date.strftime('%m/%d/%Y %H:%M:%S')
                try:
                    subprocess.run(
                        ['SetFile', '-d', date_str, str(filepath)],
                        capture_output=True,
                        text=True
                    )
                except (subprocess.SubprocessError, FileNotFoundError, OSError):
                    pass  # SetFile not available on this system
            elif platform.system() == 'Windows':
                # On Windows, use PowerShell with proper escaping to prevent injection
                filepath_escaped = str(filepath).replace("'", "''")
                date_escaped = post_date.isoformat().replace("'", "''")
                ps_command = f"(Get-Item -LiteralPath '{filepath_escaped}').CreationTime = Get-Date '{date_escaped}'"
                try:
                    subprocess.run(
                        ['powershell', '-Command', ps_command],
                        capture_output=True,
                        text=True
                    )
                except (subprocess.SubprocessError, FileNotFoundError, OSError):
                    pass  # PowerShell command failed
            # Linux doesn't support changing creation time

            # 3. Update EXIF data for images
            if str(filepath).lower().endswith(('.jpg', '.jpeg', '.png', '.heic')):
                self._update_exif_timestamps(filepath, post_date)

        except Exception as e:
            self.log(f"Error updating timestamps: {e}", "warning")

    def _update_exif_timestamps(self, filepath: Path, post_date: datetime):
        """Update EXIF timestamps in image files"""
        try:
            # Check if exiftool is available
            result = subprocess.run(['which', 'exiftool'], capture_output=True, text=True)
            if result.returncode == 0:
                # Format date for EXIF
                exif_date = post_date.strftime('%Y:%m:%d %H:%M:%S')

                # Update all date fields in EXIF including MetadataDate for Immich
                cmd = [
                    'exiftool', '-overwrite_original', '-quiet',
                    f'-AllDates={exif_date}',
                    f'-MetadataDate={exif_date}',
                    '-HistoryWhen=',
                    f'-FileModifyDate={exif_date}',
                    str(filepath)
                ]

                subprocess.run(cmd, capture_output=True, text=True)
                self.log(f"Updated EXIF timestamps", "debug")
        except Exception:
            # Silently skip if exiftool not available
            pass

    def _extract_post_date(self, page) -> datetime:
        """Try to extract post date from page"""
        try:
            # Wait a moment for dynamic content to load
            page.wait_for_timeout(500)

            # FIRST: Look for data-created attribute (Unix timestamp)
            elements_with_data_created = page.locator('[data-created]').all()
            self.log(f"Found {len(elements_with_data_created)} elements with data-created attribute", "debug")

            for elem in elements_with_data_created:
                timestamp_str = elem.get_attribute('data-created')
                if timestamp_str:
                    try:
                        # Convert Unix timestamp to datetime
                        timestamp = int(timestamp_str)
                        post_date = datetime.fromtimestamp(timestamp)
                        self.log(f"Found data-created timestamp: {timestamp} -> {post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                        return post_date
                    except Exception as e:
                        self.log(f"Failed to parse timestamp {timestamp_str}: {e}", "debug")
                        pass

            # If no data-created found, wait a bit more and try again
            if len(elements_with_data_created) == 0:
                self.log("No data-created elements found, waiting for dynamic content...", "debug")

                # Try to wait for the element to appear
                try:
                    page.wait_for_selector('[data-created]', timeout=2000)
                    elements_with_data_created = page.locator('[data-created]').all()
                    self.log(f"After waiting for selector: found {len(elements_with_data_created)} elements with data-created", "debug")
                except Exception:
                    # Still try one more time with a longer wait
                    page.wait_for_timeout(1500)
                    elements_with_data_created = page.locator('[data-created]').all()
                    self.log(f"After timeout wait: found {len(elements_with_data_created)} elements with data-created", "debug")

                for elem in elements_with_data_created:
                    timestamp_str = elem.get_attribute('data-created')
                    if timestamp_str:
                        try:
                            timestamp = int(timestamp_str)
                            post_date = datetime.fromtimestamp(timestamp)
                            self.log(f"Found data-created timestamp after wait: {timestamp} -> {post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                            return post_date
                        except Exception as e:
                            self.log(f"Failed to parse timestamp {timestamp_str}: {e}", "debug")

            # Fallback: Look for other date elements
            date_selectors = [
                'time[datetime]',
                'time',
                '.date',
                '[datetime]',
                'span.date',
                'div.date'
            ]

            for selector in date_selectors:
                elem = page.locator(selector).first
                if elem.count() > 0:
                    # Try datetime attribute first
                    datetime_str = elem.get_attribute('datetime')
                    if datetime_str:
                        # Parse ISO format
                        for fmt in ['%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d']:
                            try:
                                return datetime.strptime(datetime_str.split('.')[0].replace('Z', ''), fmt)
                            except Exception:
                                continue

                    # Try text content
                    text = elem.text_content()
                    if text:
                        # Parse various date formats
                        # Could be "2 days ago", "September 6, 2025", etc.
                        if "ago" in text.lower():
                            # Handle relative dates
                            if "hour" in text:
                                hours = int(re.search(r'(\d+)', text).group(1))
                                return datetime.now() - timedelta(hours=hours)
                            elif "day" in text:
                                days = int(re.search(r'(\d+)', text).group(1))
                                return datetime.now() - timedelta(days=days)
                            elif "week" in text:
                                weeks = int(re.search(r'(\d+)', text).group(1))
                                return datetime.now() - timedelta(weeks=weeks)
                        else:
                            # Try parsing absolute date
                            for fmt in ['%B %d, %Y', '%b %d, %Y', '%Y-%m-%d']:
                                try:
                                    return datetime.strptime(text, fmt)
                                except Exception:
                                    continue
        except Exception as e:
            self.log(f"Error extracting date: {e}", "debug")

        return None

    def _scan_existing_files(self, output_dir: Path, profile_name: str):
        """Scan directory for existing files and extract media IDs"""
        self.downloaded_files = scan_existing_files_for_media_ids(
            output_dir, profile_name, min_file_size=20000, recursive=False
        )
        if self.downloaded_files:
            self.log(f"Found {len(self.downloaded_files)} existing media IDs for {profile_name}", "debug")

    def _is_already_downloaded(self, media_id: str) -> bool:
        """Check if media_id has already been downloaded (uses centralized function)"""
        if not self.use_database or not self.unified_db:
            return False

        # Use centralized function for consistent cross-module detection
        return is_instagram_downloaded(self.unified_db, media_id)

    def _record_download(self, media_id: str, username: str, filename: str,
                        url: str = None, post_date=None, file_path: str = None,
                        content_type: str = 'post', metadata: dict = None,
                        deferred: bool = False):
        """Record a successful download in the database (uses centralized function)

        Args:
            deferred: If True, don't record to database now - add to pending_downloads list
                     for later recording after file move is complete
        """
        # If deferred, store for later recording instead of recording now
        if deferred:
            self.pending_downloads.append({
                'media_id': media_id,
                'username': username,
                'filename': filename,
                'url': url,
                'post_date': post_date.isoformat() if post_date else None,
                'file_path': file_path,
                'content_type': content_type,
                'metadata': metadata
            })
            self.log(f"Deferred recording for {media_id}", "debug")
            return True

        if not self.use_database or not self.unified_db:
            return False

        try:
            # Use centralized function for consistent cross-module storage
            result = record_instagram_download(
                db=self.unified_db,
                media_id=media_id,
                username=username,
                content_type=content_type,
                filename=filename,
                url=url,
                post_date=post_date,
                file_path=file_path,
                method='imginn',
                extra_metadata=metadata
            )
            if result:
                self.log(f"Recorded download for {media_id}", "debug")
            return result
        except Exception as e:
            self.log(f"Failed to record download: {e}", "debug")
            return False

    def get_pending_downloads(self):
        """Get list of downloads that were deferred for later recording

        Returns:
            List of download metadata dicts ready for database recording
        """
        return self.pending_downloads.copy()

    def clear_pending_downloads(self):
        """Clear the pending downloads list after they've been recorded"""
        self.pending_downloads = []

    def _get_processed_posts(self, username: str) -> set:
        """Get set of post/story IDs that have been processed from database

        NOTE: Checks ALL Instagram posts globally, not just this user's, because
        the same post can appear on multiple profiles (shared posts, tags, reposts)
        """
        processed = set()
        if not self.unified_db:
            return processed

        try:
            with self.unified_db.get_connection() as conn:
                cursor = conn.cursor()
                # Get all Instagram posts globally (same post can appear on multiple profiles)
                cursor.execute('''
                    SELECT url, filename, metadata FROM downloads
                    WHERE platform = 'instagram'
                ''')

                for row in cursor.fetchall():
                    url, filename, metadata_str = row
                    # Add full URL to processed set
                    if url:
                        processed.add(url)
                    # Also extract and add post ID from URL for backward compatibility
                    if url and '/p/' in url:
                        match = re.search(r'/p/([^/]+)/', url)
                        if match:
                            processed.add(match.group(1))

                    # For stories, extract media_id from filename
                    if filename and '_story' in filename:
                        # Extract the long media ID before _story
                        # Format: username_date_MEDIAID_storyN.ext
                        parts = filename.split('_story')
                        if len(parts) >= 2:
                            # Get everything before _story, then get the media ID (last underscore-separated part)
                            pre_story = parts[0]
                            # Split by underscore and skip first 3 parts (username_YYYYMMDD_HHMMSS)
                            id_parts = pre_story.split('_')
                            if len(id_parts) > 3:
                                # Join everything after date as the media_id
                                media_id_full = '_'.join(id_parts[3:])
                                processed.add(media_id_full)
                                # Also add the extracted Instagram media ID (18-digit number)
                                normalized_id = extract_instagram_media_id(media_id_full)
                                if normalized_id and normalized_id != media_id_full:
                                    processed.add(normalized_id)

                    # Also check metadata for media_id
                    if metadata_str:
                        try:
                            metadata = json.loads(metadata_str)
                            if 'post_id' in metadata:
                                processed.add(metadata['post_id'])
                            if 'media_id' in metadata:
                                media_id = metadata['media_id']
                                processed.add(media_id)
                                # Also add the extracted Instagram media ID
                                normalized_id = extract_instagram_media_id(media_id)
                                if normalized_id and normalized_id != media_id:
                                    processed.add(normalized_id)
                            if 'media_id_full' in metadata:
                                processed.add(metadata['media_id_full'])
                        except Exception:
                            pass

            if processed:
                self.log(f"Found {len(processed)} processed posts in database for {username}", "debug")
        except Exception as e:
            self.log(f"Error loading processed posts from database: {e}", "debug")

        return processed

    def save_cookies(self, context):
        """Save cookies to database or file"""
        cookies = context.cookies()

        # Save to database if available
        if self.unified_db:
            try:
                # CRITICAL: Include user_agent for cf_clearance cookies to work
                self.unified_db.save_scraper_cookies(
                    self.scraper_id,
                    cookies,
                    user_agent=self.user_agent,
                    merge=True
                )
                self.log(f"Saved {len(cookies)} cookies to database", "debug")
                return
            except Exception as e:
                self.log(f"Error saving cookies to database: {e}", "warning")

        # Fallback to file-based storage
        if self.cookie_file:
            storage_data = {
                'cookies': cookies,
                'timestamp': datetime.now().isoformat()
            }
            with open(self.cookie_file, 'w') as f:
                json.dump(storage_data, f, indent=2)
            self.log(f"Saved {len(cookies)} cookies to file", "debug")

    def load_cookies(self, context):
        """Load saved cookies from database or file"""
        # Try loading from database first
        if self.unified_db:
            try:
                cookies = self.unified_db.get_scraper_cookies(self.scraper_id)
                if cookies:
                    # Clean cookies - remove unsupported properties and convert expiry->expires
                    cleaned_cookies = []
                    for cookie in cookies:
                        cleaned = {k: v for k, v in cookie.items()
                                  if k not in ['partitionKey', '_crHasCrossSiteAncestor']}
                        # FlareSolverr uses 'expiry' but Playwright uses 'expires'
                        if 'expiry' in cleaned and 'expires' not in cleaned:
                            cleaned['expires'] = cleaned.pop('expiry')
                        cleaned_cookies.append(cleaned)

                    # CRITICAL: Clear existing cookies first to ensure new cf_clearance takes effect
                    # Otherwise old cookies may override new ones from FlareSolverr
                    try:
                        context.clear_cookies()
                        self.log("Cleared existing browser cookies", "debug")
                    except Exception as e:
                        self.log(f"Could not clear cookies: {e}", "debug")

                    context.add_cookies(cleaned_cookies)
                    self.log(f"Loaded {len(cleaned_cookies)} cookies from database", "info")
                    return True
            except Exception as e:
                self.log(f"Error loading cookies from database: {e}", "warning")

        # Fallback to file-based cookies
        if not self.cookie_file or not self.cookie_file.exists():
            return False

        try:
            with open(self.cookie_file, 'r') as f:
                data = json.load(f)

            # Check age (24 hours)
            saved_time = datetime.fromisoformat(data['timestamp'])
            if datetime.now() - saved_time > timedelta(hours=24):
                self.log("Cookies expired", "debug")
                return False

            # Clean cookies - remove unsupported properties and convert expiry->expires
            cleaned_cookies = []
            for cookie in data['cookies']:
                # Remove Chrome-specific properties that Playwright doesn't support
                cleaned = {k: v for k, v in cookie.items()
                          if k not in ['partitionKey', '_crHasCrossSiteAncestor']}
                # FlareSolverr uses 'expiry' but Playwright uses 'expires'
                if 'expiry' in cleaned and 'expires' not in cleaned:
                    cleaned['expires'] = cleaned.pop('expiry')
                cleaned_cookies.append(cleaned)

            context.add_cookies(cleaned_cookies)
            self.log(f"Loaded {len(cleaned_cookies)} cookies from file", "info")
            return True
        except Exception as e:
            self.log(f"Failed to load cookies: {e}", "warning")
            return False

    def wait_for_cloudflare(self, page):
        """Wait for Cloudflare to auto-solve or page to load - uses CloudflareHandler with ImgInn-specific checks"""
        self.log("Waiting for page to load...", "debug")

        max_wait = 120  # Extended wait - Cloudflare challenges can take up to 120 seconds
        flaresolverr_attempts = 0
        max_flaresolverr_attempts = 3

        for i in range(max_wait):
            time.sleep(1)

            # Check current URL and content
            try:
                current_url = page.url
                content = page.content().lower()
            except Exception as e:
                # Page is still navigating, wait and try again
                if "navigating" in str(e).lower():
                    self.log("Page still navigating, waiting...", "debug")
                    continue
                else:
                    # Some other error, re-raise it
                    raise

            # First check if the actual content is visible (not Cloudflare)
            # ImgInn pages will have profile content when loaded
            if 'imginn' in current_url.lower() and ('posts' in content or 'followers' in content or 'following' in content):
                # We have actual content, not a challenge
                self.log(f"Page loaded successfully after {i+1} seconds", "info")
                return True

            # Check for actual Cloudflare challenge or server error
            # NOTE: 'challenge-platform' is NOT a reliable indicator - it's embedded JS that stays on the page
            # even after successful bypass. Only check for visible interstitial text.
            challenge_indicators = ['checking your browser', 'just a moment', 'verify you are human', 'enable javascript']
            error_indicators = ['internal server error', 'error code 500', 'error code 502', 'error code 503']

            has_challenge = any(indicator in content for indicator in challenge_indicators)
            has_error = any(indicator in content for indicator in error_indicators)

            if has_error:
                self.log("Server error detected (500/502/503) - site is likely down", "error")
                # Save screenshot for debugging
                try:
                    debug_dir = Path("debug")
                    debug_dir.mkdir(exist_ok=True)
                    screenshot_path = debug_dir / f"server_error_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
                    page.screenshot(path=str(screenshot_path))
                    self.log(f"Screenshot saved to {screenshot_path}", "debug")
                except Exception:
                    pass
                return False

            if has_challenge:
                # Try FlareSolverr at specific intervals (0s, 15s, 30s)
                # Note: Turnstile checkbox clicking doesn't work - it's designed to block automation
                if i == 0 or (i in [15, 30] and flaresolverr_attempts < max_flaresolverr_attempts):
                    flaresolverr_attempts += 1
                    self.log(f"Cloudflare challenge detected, attempting FlareSolverr bypass (attempt {flaresolverr_attempts})...", "info")

                    # Get current browser user_agent for comparison
                    current_browser_ua = None
                    try:
                        current_browser_ua = page.evaluate('() => navigator.userAgent')
                    except Exception:
                        pass

                    # Try to get fresh cookies via FlareSolverr
                    if self._get_cookies_via_flaresolverr(page.url):
                        self.log("Got fresh cookies, reloading page...", "info")

                        # Check if user_agent changed - if so, restart browser
                        new_ua = None
                        try:
                            new_ua = self.unified_db.get_scraper_cookies_user_agent(self.scraper_id)
                            self.log(f"Stored cookie UA: {new_ua[:60] if new_ua else 'None'}...", "debug")
                            self.log(f"Browser UA: {current_browser_ua[:60] if current_browser_ua else 'None'}...", "debug")
                        except Exception as e:
                            self.log(f"Error getting stored UA: {e}", "debug")

                        if new_ua and current_browser_ua and new_ua != current_browser_ua:
                            self.log("User-agent changed, restarting browser with new fingerprint...", "info")
                            self._stop_browser()
                            self._start_browser()
                            page = self.page
                            try:
                                page.goto(current_url, wait_until='domcontentloaded', timeout=30000)
                            except Exception as e:
                                self.log(f"Error navigating after browser restart: {e}", "debug")
                        else:
                            # Reload cookies in browser context
                            try:
                                self.load_cookies(self.context)
                                # Reload the page with new cookies
                                page.reload(wait_until='domcontentloaded', timeout=10000)
                                # CRITICAL: Wait for Cloudflare background JS validation (5-7 seconds)
                                wait_time = 5 + random.uniform(0, 2)
                                self.log(f"Waiting {wait_time:.1f}s for Cloudflare background validation...", "debug")
                                time.sleep(wait_time)
                            except Exception as e:
                                self.log(f"Error reloading page with new cookies: {e}", "debug")
                    else:
                        self.log("FlareSolverr failed, waiting for challenge to resolve...", "warning")

                # Continue waiting for challenge to resolve
                continue

            # Check if we're on the correct page with content
            if '/p/' in current_url:  # Post page
                # Look for download button or image
                if 'download' in content or 'data-created' in content:
                    self.log(f"Post page loaded after {i+1} seconds", "info")
                    return True
            elif '/stories/' in current_url:  # Stories page
                # Stories pages have swiper, reels, or story content
                if 'swiper' in content or 'data-uid' in content or 'reel' in content:
                    self.log(f"Stories page loaded after {i+1} seconds", "info")
                    return True
                # Also check for counter/profile info which is on stories pages too
                if 'counter-item' in content or ('posts' in content and 'followers' in content):
                    self.log(f"Stories page loaded after {i+1} seconds", "info")
                    return True
            elif '/tagged/' in current_url:  # Tagged page
                # Tagged pages have items grid
                if 'class="item"' in content or 'data-uid' in content:
                    self.log(f"Tagged page loaded after {i+1} seconds", "info")
                    return True
                if 'posts' in content and 'followers' in content:
                    self.log(f"Tagged page loaded after {i+1} seconds", "info")
                    return True
            else:  # Profile page
                # Check if profile content is visible - ImgInn specific
                if 'imginn' in current_url.lower():
                    if ('posts' in content and 'followers' in content) or 'following' in content:
                        self.log(f"Profile page loaded after {i+1} seconds", "info")
                        return True
                    # Also check for actual post links
                    if 'href="/p/' in content or 'class="item"' in content:
                        self.log(f"Profile page loaded after {i+1} seconds", "info")
                        return True

            # Debug: Log what we're seeing if we've been waiting a while
            if i == 15:
                self.log(f"Debug: URL={current_url[:50]}, has posts={('posts' in content)}, has swiper={('swiper' in content)}", "debug")

            # Status updates (only if we haven't detected content yet)
            if i == 10:
                self.log("Still waiting (10s)... page loading", "debug")
            elif i == 20:
                self.log("Still waiting (20s)... page not ready yet", "info")
            elif i == 30:
                self.log("Still waiting (30s)... slow response from server", "info")
            elif i == 45:
                self.log("Still waiting (45s)... checking if blocked", "info")
            elif i == 60:
                self.log("Still waiting (60s)... page load is slow", "warning")
            elif i == 90:
                self.log("Still waiting (90s)... this is taking too long", "warning")

        # Timeout reached - page didn't load
        self._page_load_failures += 1
        level = "error" if self._page_load_failures >= self._page_load_failure_threshold else "warning"
        self.log(f"Page load timeout ({self._page_load_failures}x this session). URL: {page.url}", level)

        # Save screenshot for debugging
        try:
            debug_dir = Path("debug")
            debug_dir.mkdir(exist_ok=True)
            screenshot_path = debug_dir / f"cloudflare_block_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
            page.screenshot(path=str(screenshot_path))
            self.log(f"Screenshot saved to {screenshot_path}", "debug")
        except Exception:
            pass

        return False

    def _dismiss_consent_dialog(self, page):
        """Dismiss cookie consent / GDPR overlay if present (Google FundingChoices)."""
        try:
            # Google FundingChoices consent dialog
            consent_btn = page.locator(
                'button.fc-cta-consent, '          # "Consent" button
                'button.fc-cta-do-not-consent, '   # "Do not consent" button
                'button[aria-label="Consent"], '
                'button.fc-dismiss-button, '       # Dismiss/close button
                '.fc-dialog button.fc-primary-button'
            ).first
            if consent_btn.count() > 0 and consent_btn.is_visible():
                consent_btn.click(force=True)
                self.log("Dismissed consent dialog", "debug")
                time.sleep(0.5)
                return
            # Fallback: remove the overlay via JS if buttons aren't found
            overlay = page.locator('.fc-consent-root, .fc-dialog-overlay').first
            if overlay.count() > 0:
                page.evaluate("document.querySelectorAll('.fc-consent-root, .fc-dialog-overlay, .fc-dialog-container').forEach(el => el.remove())")
                self.log("Removed consent overlay via JS", "debug")
        except Exception:
            pass

    def _safe_go_back(self, page, username: str, tagged: bool = False):
        """Navigate back to profile page safely with timeout handling.

        Tries go_back() first with a short timeout, falls back to direct navigation.
        """
        try:
            page.go_back(timeout=10000)
        except Exception:
            self.log("go_back timed out, navigating directly to profile", "debug")
            try:
                suffix = f"/tagged/?ref=index" if tagged else "/?ref=index"
                page.goto(f"https://imginn.com/{username}{suffix}", timeout=15000)
            except Exception as nav_err:
                self.log(f"Direct navigation back also failed: {nav_err}", "warning")

    def _is_cloudflare_challenge(self, page) -> bool:
        """Check if current page is a Cloudflare challenge page.

        Returns:
            True if Cloudflare challenge detected, False otherwise
        """
        try:
            title = page.title().lower()
            content = page.content().lower()[:2000]  # Check first 2000 chars

            challenge_indicators = ['just a moment', 'checking your browser', 'verify you are human',
                                   'enable javascript', 'cloudflare']

            # Check title first (most reliable)
            if any(indicator in title for indicator in challenge_indicators):
                return True

            # Check content
            if any(indicator in content for indicator in challenge_indicators):
                return True

            return False
        except Exception:
            return False

    def _handle_cloudflare_on_post(self, page, post_url: str, max_retries: int = 2) -> bool:
        """Handle Cloudflare challenge on a post page by getting fresh cookies and retrying.

        Args:
            page: Playwright page object
            post_url: URL of the post to retry
            max_retries: Maximum number of retry attempts

        Returns:
            True if page loaded successfully (no Cloudflare), False if still blocked
        """
        if not self._is_cloudflare_challenge(page):
            return True  # No challenge, page is good

        self.log(f"Cloudflare challenge detected on post page, attempting bypass...", "warning")

        for attempt in range(max_retries):
            # Wait before FlareSolverr attempt - give Cloudflare time to cool down
            if attempt == 0:
                wait_time = random.uniform(15, 25)
            else:
                wait_time = random.uniform(30, 60)
            self.log(f"Waiting {wait_time:.1f}s before FlareSolverr attempt {attempt + 1}...", "info")
            time.sleep(wait_time)

            # Get fresh cookies via FlareSolverr using the post URL
            if self._get_cookies_via_flaresolverr(post_url):
                self.log(f"Got fresh cookies (attempt {attempt + 1}), reloading post...", "info")

                # Check if user_agent changed - if so, restart browser
                try:
                    current_browser_ua = page.evaluate('() => navigator.userAgent')
                    new_ua = self.unified_db.get_scraper_cookies_user_agent(self.scraper_id)

                    if new_ua and current_browser_ua and new_ua != current_browser_ua:
                        self.log("User-agent changed, restarting browser...", "info")
                        self._stop_browser()
                        self._start_browser()
                        page = self.page
                except Exception as e:
                    self.log(f"Error checking user_agent: {e}", "debug")

                # Reload cookies into browser context
                try:
                    self.load_cookies(self.context)
                except Exception as e:
                    self.log(f"Error loading cookies: {e}", "debug")

                # Navigate directly to the post URL
                try:
                    page.goto(post_url, wait_until='domcontentloaded', timeout=30000)

                    # Wait for Cloudflare background JS validation (5-7 seconds)
                    wait_time = 5 + random.uniform(0, 2)
                    self.log(f"Waiting {wait_time:.1f}s for Cloudflare background validation...", "debug")
                    time.sleep(wait_time)

                    # Check if still blocked
                    if not self._is_cloudflare_challenge(page):
                        self.log("Cloudflare bypass successful on post page", "info")
                        # IMPORTANT: Save browser cookies after successful bypass
                        # This captures any cookies set by Cloudflare's JS validation
                        try:
                            self.save_cookies(self.context)
                            self.log("Saved browser cookies after successful bypass", "debug")
                        except Exception as e:
                            self.log(f"Error saving cookies after bypass: {e}", "debug")
                        return True
                    else:
                        self.log(f"Still blocked after retry {attempt + 1}", "warning")
                except Exception as e:
                    self.log(f"Navigation failed after cookie refresh: {e}", "warning")
            else:
                self.log(f"FlareSolverr failed (attempt {attempt + 1})", "warning")

        self.log("Failed to bypass Cloudflare on post page after all retries", "error")
        return False

    def _check_post_phrases(self, page, phrase_config: dict) -> bool:
        """Check if post contains required phrases

        Args:
            page: Playwright page object
            phrase_config: Phrase search configuration
                {
                    'phrases': list of phrases to search for,
                    'case_sensitive': bool,
                    'match_all': bool (True = all phrases must match, False = any phrase)
                }

        Returns:
            True if post matches phrase criteria, False otherwise
        """
        try:
            # Get post caption/text
            caption_selectors = [
                '.caption',
                '.post-caption',
                'meta[property="og:description"]',
                'meta[name="description"]',
                '.content',
                'div[class*="caption"]',
                'span[class*="caption"]'
            ]

            post_text = ""
            for selector in caption_selectors:
                try:
                    element = page.locator(selector).first
                    if element.count() > 0:
                        text = element.text_content() or element.get_attribute('content') or ""
                        if text:
                            post_text += " " + text
                except Exception:
                    continue

            # Also check visible text in the main content area
            try:
                main_content = page.locator('main, article, .post-content, div[role="main"]').first
                if main_content.count() > 0:
                    post_text += " " + (main_content.text_content() or "")
            except Exception:
                pass

            if not post_text:
                self.log("Could not extract post text for phrase matching", "warning")
                return False

            # Clean up text
            post_text = ' '.join(post_text.split())  # Normalize whitespace

            phrases = phrase_config.get('phrases', [])
            if not phrases:
                return True  # No phrases to match = match all

            case_sensitive = phrase_config.get('case_sensitive', False)
            match_all = phrase_config.get('match_all', False)

            if not case_sensitive:
                post_text = post_text.lower()
                phrases = [p.lower() for p in phrases]

            self.log(f"Checking post text ({len(post_text)} chars) for phrases: {phrases}", "debug")

            # Check phrase matching
            matches = []
            for phrase in phrases:
                if phrase in post_text:
                    matches.append(phrase)
                    self.log(f"Found phrase: '{phrase}'", "debug")

            if match_all:
                # All phrases must be found
                result = len(matches) == len(phrases)
                if not result:
                    missing = [p for p in phrases if p not in matches]
                    self.log(f"Missing required phrases: {missing}", "debug")
            else:
                # At least one phrase must be found
                result = len(matches) > 0
                if not result:
                    self.log(f"No matching phrases found", "debug")

            return result

        except Exception as e:
            self.log(f"Error checking post phrases: {e}", "error")
            return False

    def download(self, username: str, content_type: str = "posts", days_back: int = 14, max_downloads: int = 50, output_dir: str = None, phrase_config: dict = None, defer_database: bool = False):
        """Download content from a user - compatible with media-downloader interface

        Args:
            username: Instagram username
            content_type: Type of content ("posts", "stories", or "tagged")
            days_back: How many days back to search
            max_downloads: Maximum posts to download
            output_dir: Output directory
            phrase_config: Optional phrase search configuration
                {
                    'enabled': bool,
                    'phrases': list of phrases to search for,
                    'case_sensitive': bool,
                    'match_all': bool (True = all phrases must match, False = any phrase)
                }
            defer_database: If True, defer database recording to pending_downloads list
                           for later recording after file move is complete
        """
        # Clear downloaded_files cache between accounts to prevent memory growth
        self.downloaded_files.clear()
        # Clear pending downloads for fresh batch
        self.pending_downloads = []

        # Set output directory
        if output_dir:
            output_path = Path(output_dir) / username
        else:
            output_path = Path(f"/opt/media-downloader/downloads/{username}")

        # Route to appropriate download method
        if content_type == "posts":
            files = self.download_posts(
                username=username,
                days_back=days_back,
                max_posts=max_downloads,
                output_dir=output_path,
                phrase_config=phrase_config,
                defer_database=defer_database
            )
        elif content_type == "stories":
            files = self.download_stories(
                username=username,
                days_back=days_back,
                max_stories=max_downloads,
                output_dir=output_path,
                defer_database=defer_database
            )
        elif content_type == "tagged":
            files = self.download_tagged(
                username=username,
                days_back=days_back,
                max_posts=max_downloads,
                output_dir=output_path,
                phrase_config=phrase_config,
                defer_database=defer_database
            )
        else:
            self.log(f"ImgInn does not support content type: {content_type}", "warning")
            return 0

        return len(files)

    def download_posts(self, username: str, days_back: int = 14, max_posts: int = 50, specific_post_url: str = None, output_dir: Path = None, phrase_config: dict = None, skip_database: bool = False, max_age_hours: int = None, defer_database: bool = False):
        """Download posts from a user with FastDL naming

        Args:
            username: Instagram username
            days_back: How many days back to search
            max_posts: Maximum posts to check
            specific_post_url: Download a specific post
            output_dir: Output directory
            phrase_config: Optional phrase search configuration
            skip_database: If True, don't record downloads in database (for temporary processing)
            max_age_hours: If specified, only download posts newer than N hours (overrides days_back)
            defer_database: If True, defer database recording to pending_downloads list
                           for later recording after file move is complete
        """
        # Rate limiting to avoid Cloudflare blocks
        self._enforce_rate_limit("posts")

        profile_name = username.lower()
        if output_dir is None:
            output_dir = Path(f"/opt/media-downloader/downloads/{profile_name}")
        output_dir.mkdir(parents=True, exist_ok=True)

        # Check site status before doing anything else
        self.log("Checking ImgInn site status...", "debug")
        site_status, error_msg = self.cf_handler.check_site_status("https://imginn.com/", timeout=10)

        if self.cf_handler.should_skip_download(site_status):
            self.log(f"Skipping download for @{profile_name} - ImgInn is unavailable: {error_msg}", "warning")
            self.activity_manager.update_status(f"Skipped - ImgInn unavailable ({error_msg})")
            return []
        elif site_status == SiteStatus.CLOUDFLARE_CHALLENGE:
            self.log("Cloudflare challenge detected, will attempt bypass during download", "info")

        # Scan existing files
        self._scan_existing_files(output_dir, profile_name)

        # Get processed posts from database
        processed_posts = self._get_processed_posts(profile_name)
        self.log(f"Loaded {len(processed_posts)} processed posts for {profile_name} from database", "info")
        if len(processed_posts) > 0 and len(processed_posts) < 20:
            self.log(f"Processed post IDs: {processed_posts}", "debug")

        downloaded_files = []
        # Use max_age_hours if specified, otherwise use days_back
        if max_age_hours is not None:
            cutoff_date = datetime.now() - timedelta(hours=max_age_hours)
        else:
            cutoff_date = datetime.now() - timedelta(days=days_back)

        # Update activity status
        if specific_post_url and profile_name == 'unknown':
            self.activity_manager.update_status(f"Fetching post...")
        else:
            self.activity_manager.update_status("Checking posts")

        # Start or reuse browser
        self._start_browser()
        page = self.page

        try:
            # If specific post URL provided, go directly to it
            if specific_post_url:
                self.log(f"Navigating to specific post", "info")
                page.goto(specific_post_url, wait_until='domcontentloaded')
            else:
                # Navigate to profile
                self.log(f"Navigating to @{username} profile", "info")
                page.goto(f"https://imginn.com/{username}/?ref=index", wait_until='domcontentloaded')

            # CRITICAL: Wait 5-7 seconds for Cloudflare background JS challenges to complete
            # Per browserless.io: "Allow 5+ seconds post-page load for background JavaScript challenges"
            import random
            wait_time = 5 + random.uniform(0, 2)  # 5-7 seconds
            self.log(f"Waiting {wait_time:.1f}s for Cloudflare background validation...", "debug")
            time.sleep(wait_time)

            # Wait for page to load
            if not self.wait_for_cloudflare(page):
                self._page_load_failures += 1
                level = "error" if self._page_load_failures >= self._page_load_failure_threshold else "warning"
                self.log(f"Page didn't load properly ({self._page_load_failures}x this session)", level)
                return []

            # Save cookies
            self.save_cookies(self.context)

            # Wait for JavaScript to load posts (ImgInn loads posts dynamically)
            self.log("Waiting for posts to load via JavaScript...", "info")
            try:
                # Wait for post links to appear (up to 10 seconds)
                page.wait_for_selector('a[href*="/p/"]', timeout=10000)
                self.log("Posts loaded successfully", "info")
            except Exception:
                # Timeout - posts might not exist, or page structure changed
                self.log("Timeout waiting for posts to appear", "warning")
                time.sleep(2)  # Give it a bit more time anyway

            # If specific post, process it directly
            if specific_post_url:
                self.log("Processing specific post", "info")

                # Extract media ID from URL
                media_id = self._extract_media_id_from_url(specific_post_url)
                if not media_id:
                    self.log("Could not extract media ID", "warning")
                    return []

                self.log(f"URL Media ID: {media_id}", "debug")

                # Process this single post (bypass date filter for specific posts)
                post_links = [None]  # Dummy list for iteration
                bypass_date_filter = True
            else:
                # Find posts on profile page
                self.log("Finding posts...", "info")

                # Debug: Check what's actually on the page
                page_content = page.content()
                if 'no posts' in page_content.lower() or 'page not found' in page_content.lower():
                    self.log("Page shows 'no posts' or 'not found'", "warning")

                post_links = page.locator('a[href*="/p/"]').all()

                self.log(f"Found {len(post_links)} posts", "info")

                if not post_links:
                    # Debug: Save screenshot to see what's wrong
                    try:
                        screenshot_path = Path(f"/tmp/imginn_no_posts_{username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
                        page.screenshot(path=str(screenshot_path))
                        self.log(f"No posts found - screenshot saved to {screenshot_path}", "warning")
                    except Exception:
                        pass
                    self.log("No posts found", "warning")
                    return []

                bypass_date_filter = False

                self.log(f"Processing posts (max {max_posts})", "info")

                # Collect all post URLs upfront to avoid stale element issues
                post_urls_to_process = []
                if not specific_post_url:
                    for idx, pl in enumerate(post_links[:max_posts]):
                        try:
                            href = pl.get_attribute('href', timeout=5000)
                            if href:
                                if not href.startswith('http'):
                                    href = f"https://imginn.com{href}"
                                post_urls_to_process.append(href)
                        except Exception as e:
                            self.log(f"Post {idx+1}: Failed to get URL: {str(e)[:50]}", "debug")
                            continue
                    self.log(f"Collected {len(post_urls_to_process)} post URLs", "debug")

            # Track consecutive old posts to handle pinned posts
            consecutive_old_posts = 0
            max_consecutive_old_posts = 5  # Allow up to 5 old posts (pinned) before stopping

            # Set initial progress so dashboard shows 0/N immediately
            total_posts = len(post_urls_to_process) if not specific_post_url else 1
            self.activity_manager.update_status(
                "Downloading posts",
                progress_current=0,
                progress_total=total_posts
            )

            for i, post_url in enumerate(post_urls_to_process if not specific_post_url else [specific_post_url]):
                # Update progress at start of each iteration (fires even on skips)
                self.activity_manager.update_status(
                    "Downloading posts",
                    progress_current=i + 1,
                    progress_total=total_posts
                )

                try:
                    # Handle specific post vs regular posts
                    if specific_post_url:
                        # Already on the specific post page
                        post_url = specific_post_url
                        media_id = self._extract_media_id_from_url(post_url)
                    else:
                        # URL already collected and formatted
                        media_id = self._extract_media_id_from_url(post_url)

                        if not media_id:
                            self.log(f"Post {i+1}: Could not extract media ID", "warning")
                            continue

                        # Check if post was already processed (from database)
                        if media_id in processed_posts:
                            # Skip if in database - trust the database tracking
                            self.log(f"Post {i+1}: {media_id} already processed (database), skipping", "debug")
                            continue

                        # Rate limiting between post downloads to avoid Cloudflare blocks
                        if i > 0:
                            post_delay = random.uniform(3, 8)
                            self.log(f"Rate limit: waiting {post_delay:.1f}s before post {i+1}", "debug")
                            time.sleep(post_delay)

                        self.log(f"Post {i+1}: Processing {media_id}", "info")

                        # Navigate directly to post URL (more reliable than clicking which can timeout)
                        try:
                            page.goto(post_url, wait_until='domcontentloaded', timeout=30000)
                        except Exception as nav_err:
                            self.log(f"Post {i+1}: Navigation failed: {nav_err}", "warning")
                            continue

                        # Wait for page to load
                        time.sleep(2)

                        # Wait for navigation to complete
                        try:
                            page.wait_for_load_state('networkidle', timeout=5000)
                        except Exception:
                            # Continue even if network isn't idle - page might still be usable
                            self.log("Network didn't idle, but continuing", "debug")

                    # Check if on post page
                    if "/p/" not in page.url:
                        self.log(f"Not a downloadable post (URL: {page.url})", "warning")
                        self._safe_go_back(page, username)
                        continue

                    # IMPORTANT: Wait for post page content to fully render
                    # This ensures download buttons are from the POST PAGE, not profile page preview
                    try:
                        # Wait for the post container to be visible (imginn uses main-content now)
                        page.wait_for_selector('div.main-content, div.post, div.content, div.single-post', timeout=3000)
                        time.sleep(1)  # Additional wait for download buttons to render
                    except Exception:
                        self.log("Post container not found, checking for Cloudflare...", "debug")

                    # Check for Cloudflare challenge and handle it
                    cloudflare_bypassed = False
                    if self._is_cloudflare_challenge(page):
                        self.log(f"Cloudflare challenge detected on post {media_id}", "warning")
                        if not self._handle_cloudflare_on_post(page, post_url):
                            # Cloudflare bypass failed - skip this post WITHOUT marking as processed
                            # so it can be retried on next run
                            self.log(f"Skipping post {media_id} due to Cloudflare block (will retry later)", "warning")
                            try:
                                page.goto(f"https://imginn.com/{username}/?ref=index")
                                time.sleep(3)
                            except Exception:
                                pass
                            continue
                        cloudflare_bypassed = True

                    self.log(f"Navigated to post page: {page.url}", "debug")
                    self._dismiss_consent_dialog(page)

                    # Extract actual username from post page if we don't have it (e.g., specific_post_url with unknown user)
                    if profile_name == 'unknown' or specific_post_url:
                        try:
                            username_elem = page.locator('div.username a').first
                            if username_elem.count() > 0:
                                username_href = username_elem.get_attribute('href')
                                if username_href:
                                    # Extract username from href like "/evalongoria/" -> "evalongoria"
                                    extracted_username = username_href.strip('/').lower()
                                    if extracted_username and extracted_username != 'unknown':
                                        profile_name = extracted_username
                                        self.log(f"Extracted username from post page: @{profile_name}", "info")
                                        # Update activity status with real username
                                        self.activity_manager.update_status("Downloading posts")
                        except Exception as e:
                            self.log(f"Could not extract username from post page: {e}", "debug")

                    # Extract post date - ALWAYS extract for proper file naming
                    post_date = self._extract_post_date(page)

                    # Use post date for filename, or current date
                    if post_date:
                        date_str = post_date.strftime('%Y%m%d_%H%M%S')
                        self.log(f"Original post date: {post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                    else:
                        date_str = datetime.now().strftime('%Y%m%d_%H%M%S')
                        self.log(f"No original date found, using current time", "debug")

                    # Check date filter AFTER extracting date (bypass for specific posts)
                    if not bypass_date_filter and post_date and post_date < cutoff_date:
                        consecutive_old_posts += 1
                        self.log(f"Post too old ({post_date.strftime('%Y-%m-%d')}), skipping (consecutive old: {consecutive_old_posts}/{max_consecutive_old_posts})", "info")

                        # Mark this old post as checked in database to avoid re-checking
                        # Only mark if doing phrase search (has phrase_config)
                        if phrase_config and media_id:
                            self._record_download(
                                media_id=media_id,
                                username=profile_name,
                                filename=f"_old_post_{media_id}",
                                url=post_url,
                                post_date=post_date,
                                content_type='post',
                                metadata={'marker': True, 'reason': 'old_post'}
                            )

                        self._safe_go_back(page, username)

                        # Stop only after 5 consecutive old posts (handles pinned posts at top)
                        if consecutive_old_posts >= max_consecutive_old_posts:
                            self.log(f"Found {consecutive_old_posts} consecutive old posts - stopping", "info")
                            break
                        else:
                            continue  # Skip this old post but keep checking (might be pinned)

                    # Reset consecutive old posts counter - we found a post within date range
                    consecutive_old_posts = 0

                    # Check for phrase matching if configured
                    if phrase_config and phrase_config.get('enabled'):
                        if not self._check_post_phrases(page, phrase_config):
                            self.log(f"Post does not match phrase criteria, skipping download", "info")

                            # Mark this post as checked (but not downloaded) in database
                            # This prevents re-checking the same post every run
                            if media_id:
                                self._record_download(
                                    media_id=media_id,
                                    username=profile_name,
                                    filename=f"_phrase_checked_{media_id}",
                                    url=post_url,
                                    post_date=post_date,
                                    content_type='post',
                                    metadata={'marker': True, 'reason': 'phrase_checked'}
                                )

                            self._safe_go_back(page, username)
                            continue
                        else:
                            self.log(f"Post matches phrase criteria, using high-res download", "info")

                    # Check for carousel
                    carousel_next = page.locator('div[role="button"][aria-label*="Next"], .swiper-button-next').first
                    has_carousel = carousel_next.count() > 0

                    if has_carousel:
                        self.log(f"Carousel detected - will download all carousel images", "info")
                        self._dismiss_consent_dialog(page)

                        # CRITICAL: Wait for POST PAGE carousel download buttons to be ready
                        # This prevents downloading from the profile page preview
                        try:
                            # Wait for download buttons with POST PAGE URLs (have "scontent" or "post" in them)
                            page.wait_for_selector('a.btn[href*="scontent"], a[download], a.download', timeout=3000)
                            time.sleep(1.5)  # Additional wait for all carousel images to load
                            self.log("Carousel download buttons ready on post page", "debug")
                        except Exception:
                            self.log("Download buttons not found, but continuing", "debug")
                    else:
                        self.log("Single image post", "debug")

                    # Handle downloads - always use download buttons from post page
                    image_count = 0
                    max_images = 10

                    # Download images (carousel or single)
                    if has_carousel:
                        # First, let's find all carousel slides
                        all_slides = page.locator('.swiper-slide').all()
                        self.log(f"Found {len(all_slides)} carousel slides", "debug")

                        # Download each slide's image
                        for slide_index in range(min(len(all_slides), max_images)):
                            self.log(f"Processing carousel slide {slide_index + 1}/{len(all_slides)}", "debug")

                            # Get the current slide element to scope our searches
                            current_slide = all_slides[slide_index]

                            # Click next to navigate to this slide (except for first one)
                            if slide_index > 0:
                                next_btn = page.locator('div[role="button"][aria-label*="Next"], .swiper-button-next').first
                                if next_btn.count() > 0 and next_btn.is_visible():
                                    try:
                                        next_btn.click(force=True)
                                    except Exception:
                                        self.log(f"Carousel next button click timed out at slide {slide_index + 1}, stopping carousel", "warning")
                                        break
                                    time.sleep(2)  # Wait for slide transition and image to load

                            # First, try to find a download button for this carousel item
                            # IMPORTANT: Search within CURRENT SLIDE only, not entire page
                            download_btn = None
                            download_url = None
                            webp_fallback_url = None

                            # Look for download button on the current slide - prefer high-res, fallback to .webp
                            download_selectors = [
                                'a.btn[href*="scontent"][href*=".jpg"]',  # High-res jpg
                                'a.btn[href*="scontent"][href*=".mp4"]',  # Video
                                'a.btn[href*="scontent"]',  # Any scontent
                                'a[download][href*=".jpg"]',
                                'a[download][href*=".mp4"]',
                                'a.download',
                                'a[download]',
                                'a[href*="/post"]'
                            ]

                            # Search for download buttons - first try within slide, then try page-level
                            # Imginn often has download buttons outside the .swiper-slide elements
                            search_contexts = [current_slide, page]

                            for search_context in search_contexts:
                                if download_url:  # Already found, skip other contexts
                                    break

                                for selector in download_selectors:
                                    btn = search_context.locator(selector).first
                                    if btn.count() > 0:
                                        temp_url = btn.get_attribute('href')
                                        if temp_url and temp_url != '#' and temp_url != 'javascript:void(0)':
                                            if not temp_url.startswith('http'):
                                                temp_url = f"https://imginn.com{temp_url}"

                                            # Store .webp as fallback, but keep looking for better
                                            if '.webp' in temp_url.lower():
                                                if not webp_fallback_url:
                                                    webp_fallback_url = temp_url
                                                    self.log(f"Found .webp link (fallback): {temp_url[:80]}...", "debug")
                                                continue

                                            # Found non-.webp link, use it
                                            download_btn = btn
                                            download_url = temp_url
                                            self.log(f"Found high-res download for carousel slide {slide_index + 1}: {download_url[:80]}...", "debug")
                                            break

                            # Use .webp fallback if no high-res found
                            used_webp_fallback = False
                            if not download_url and webp_fallback_url:
                                download_url = webp_fallback_url
                                used_webp_fallback = True
                                self.log(f"Using .webp fallback for carousel slide {slide_index + 1}", "info")

                            # If we found a download button, use it for high-res
                            if download_url:
                                try:
                                    import requests
                                    from urllib.parse import urlparse, unquote

                                    response = requests.get(download_url, timeout=30, headers={
                                        'User-Agent': self.user_agent,
                                        'Referer': 'https://imginn.com/'
                                    }, cookies=self._get_cookies_for_requests())
                                    response.raise_for_status()

                                    # Extract filename and media ID from the actual file
                                    url_path = urlparse(download_url).path
                                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                    if original_name.startswith('post'):
                                        original_name = original_name[4:]

                                    # The media ID is the filename without extension
                                    actual_media_id = Path(original_name).stem
                                    ext = Path(original_name).suffix or '.jpg'

                                    # Build filename for carousel image using actual media ID
                                    filename = f"{profile_name}_{date_str}_{actual_media_id}_{slide_index + 1}{ext}"
                                    filepath = output_dir / filename

                                    # Save file
                                    with open(filepath, 'wb') as f:
                                        f.write(response.content)

                                    # Check for duplicate hash before recording
                                    if self.unified_db:
                                        from pathlib import Path as PathLib
                                        # Check for duplicate hash (hash blacklist persists even if original deleted)
                                        file_hash = self.unified_db.get_file_hash(str(filepath))
                                        if file_hash:
                                            existing = self.unified_db.get_download_by_file_hash(file_hash)
                                            if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
                                                self.log(f"⚠ Duplicate content detected (hash match): {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                # Delete the duplicate regardless of whether original file still exists
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate (hash blacklist): {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                    # Update timestamps
                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    # Log with appropriate quality label
                                    quality_label = "fallback" if used_webp_fallback else "high-res"
                                    self.log(f"Downloaded ({quality_label}): {filename} ({len(response.content)} bytes)", "info")
                                    downloaded_files.append(str(filepath))
                                    image_count += 1

                                    # Add to tracking
                                    self.downloaded_files.add(actual_media_id)

                                    # Mark in database (or defer for later)
                                    if not skip_database or defer_database:
                                        unique_url = f"{post_url}#{filename}"
                                        self._record_download(
                                            media_id=actual_media_id,
                                            username=profile_name,
                                            filename=filename,
                                            url=unique_url,
                                            post_date=post_date,
                                            file_path=str(filepath),
                                            content_type='post',
                                            deferred=defer_database
                                        )

                                    continue  # Skip to next slide

                                except Exception as e:
                                    self.log(f"Failed to download high-res carousel image {slide_index + 1}: {e}, falling back to standard res", "warning")

                            # Fallback: Find the current slide's media (img or video) if no download button
                            # current_slide already defined at top of loop

                            # Try img first, then video
                            media_src = None
                            slide_img = current_slide.locator('img').first
                            if slide_img.count() > 0:
                                media_src = slide_img.get_attribute('src')

                                # If it's a lazy placeholder, wait for it to load
                                if media_src and 'lazy.jpg' in media_src:
                                    self.log(f"Slide {slide_index + 1} is lazy, waiting for load...", "debug")
                                    # Trigger load by making it visible
                                    current_slide.scroll_into_view_if_needed()
                                    time.sleep(1)
                                    # Get src again
                                    media_src = slide_img.get_attribute('src')
                            else:
                                # Check for video tag
                                slide_video = current_slide.locator('video source, video').first
                                if slide_video.count() > 0:
                                    media_src = slide_video.get_attribute('src')
                                    self.log(f"Found video for slide {slide_index + 1}", "debug")

                            if media_src and 'lazy.jpg' not in media_src and '483011604' not in media_src:
                                    self.log(f"Downloading carousel media {slide_index + 1} (standard res): {media_src[:80]}...", "debug")

                                    # Download this media
                                    try:
                                        import requests
                                        from urllib.parse import urlparse, unquote

                                        if not media_src.startswith('http'):
                                            media_src = f"https:{media_src}" if media_src.startswith('//') else f"https://imginn.com{media_src}"

                                        response = requests.get(media_src, timeout=30, headers={
                                            'User-Agent': self.user_agent,
                                            'Referer': 'https://imginn.com/'
                                        }, cookies=self._get_cookies_for_requests())
                                        response.raise_for_status()

                                        # Extract filename and media ID from the actual file
                                        url_path = urlparse(media_src).path
                                        original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                        if original_name.startswith('post'):
                                            original_name = original_name[4:]

                                        # The media ID is the filename without extension
                                        actual_media_id = Path(original_name).stem
                                        ext = Path(original_name).suffix or '.jpg'

                                        # Build filename for carousel image using actual media ID
                                        filename = f"{profile_name}_{date_str}_{actual_media_id}_{slide_index + 1}{ext}"
                                        filepath = output_dir / filename

                                        # Save file
                                        with open(filepath, 'wb') as f:
                                            f.write(response.content)

                                        # Check for duplicate hash before recording
                                        if self.unified_db:
                                            from pathlib import Path as PathLib
                                            file_hash = self.unified_db.get_file_hash(str(filepath))
                                            if file_hash:
                                                existing = self.unified_db.get_download_by_file_hash(file_hash)
                                                if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                    existing_path = PathLib(existing['file_path'])
                                                    if existing_path.exists():
                                                        self.log(f"⚠ Duplicate file detected: {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                        try:
                                                            filepath.unlink()
                                                            self.log(f"Deleted duplicate: {filename}", "debug")
                                                            continue
                                                        except Exception as e:
                                                            self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                        # Update timestamps
                                        if post_date:
                                            self._update_file_timestamps(filepath, post_date)

                                        self.log(f"Downloaded: {filename} ({len(response.content)} bytes)", "info")
                                        downloaded_files.append(str(filepath))
                                        image_count += 1

                                        # Add to tracking
                                        self.downloaded_files.add(actual_media_id)

                                        # Mark in database (or defer for later)
                                        if not skip_database or defer_database:
                                            unique_url = f"{post_url}#{filename}"
                                            self._record_download(
                                                media_id=actual_media_id,
                                                username=profile_name,
                                                filename=filename,
                                                url=unique_url,
                                                post_date=post_date,
                                                file_path=str(filepath),
                                                content_type='post',
                                                deferred=defer_database
                                            )

                                    except Exception as e:
                                        self.log(f"Failed to download carousel media {slide_index + 1}: {e}", "error")
                            else:
                                    self.log(f"Slide {slide_index + 1} has no valid media (img/video)", "warning")

                        # Skip the old carousel download logic
                        pass

                    # This duplicate block is not needed - single image logic is already handled above

                    # OLD carousel logic removed - handled above
                    if False:
                        # Wait for carousel content to load
                        time.sleep(1)

                        # Find download button AND image elements
                        # ImgInn sometimes has the full image in an img tag, not just download button
                        download_selectors = [
                            'a[download]',
                            'a.download-btn',
                            'a[href*="scontent"]',
                            'a[href*="s3.imginn.com"]',
                            'a.download',
                            'a[href*="/post"][href*=".jpg"]',
                            'a[href*="/post"][href*=".mp4"]',
                            'button.download',
                            'a.btn-download'
                        ]

                        # For carousel images, we need to find the actual post image, not the profile thumbnail
                        # Look for images that are NOT the profile pic and NOT lazy placeholders
                        img_src = None

                        # Try to find the carousel image (exclude profile pic and lazy images)
                        possible_images = page.locator('img[src*="post"], img[src*="scontent"]:not([src*="profile"])').all()
                        for img_elem in possible_images:
                            src = img_elem.get_attribute('src')
                            if src and 'lazy.jpg' not in src and '483011604' not in src:  # Exclude profile pic
                                img_src = src
                                self.log(f"Found carousel image src: {img_src[:100]}...", "debug")
                                break

                        # If no good image found, wait and try again
                        if not img_src or 'lazy.jpg' in img_src:
                            time.sleep(1)
                            # Try once more after waiting
                            main_image = page.locator('img[src*="post"]:not([src*="lazy"])').first
                            if main_image.count() > 0:
                                img_src = main_image.get_attribute('src')
                                if img_src:
                                    self.log(f"Found carousel image after wait: {img_src[:100]}...", "debug")

                        download_btn = None
                        for selector in download_selectors:
                            btn = page.locator(selector).first
                            if btn.count() > 0:
                                download_btn = btn
                                break

                        if download_btn and download_btn.count() > 0:
                            try:
                                # For ImgInn, we should click the download button to get the full-size image
                                # The href often points to a thumbnail, not the full image
                                download_url = download_btn.get_attribute('href')
                                self.log(f"Download button href: {download_url[:100] if download_url else 'None'}...", "debug")

                                # Try clicking the button for browser download first
                                try:
                                    self.log(f"Attempting browser download (clicking button)", "debug")
                                    with page.expect_download(timeout=5000) as download_info:
                                        download_btn.click()
                                        download = download_info.value
                                        original_name = download.suggested_filename
                                        media_id_from_file = Path(original_name).stem
                                        ext = Path(original_name).suffix or '.jpg'
                                        download_method = 'browser'
                                        response = None
                                        self.log(f"Browser download completed: {original_name}", "debug")
                                except Exception:
                                    # Fallback to direct download if clicking doesn't work
                                    self.log(f"Browser download failed, trying direct download", "debug")

                                    # For carousels, if no download URL or it's invalid, use image src
                                    if has_carousel and (not download_url or download_url == "None" or download_url == "null"):
                                        if img_src:
                                            self.log(f"No download button for carousel, using image src", "debug")
                                            download_url = img_src

                                    # Be more lenient with download URLs - accept any https URL that looks like it could be an image/video
                                    if download_url and download_url.startswith('http'):
                                        # Make sure it's not just the post page URL
                                        if '/p/' not in download_url or download_url.endswith(('.jpg', '.jpeg', '.png', '.heic', '.mp4', '.webm')):
                                            import requests
                                            response = requests.get(download_url, timeout=30, headers={
                                                'User-Agent': self.user_agent,
                                                'Referer': 'https://imginn.com/'
                                            }, cookies=self._get_cookies_for_requests())
                                            response.raise_for_status()
                                            self.log(f"Downloaded {len(response.content)} bytes", "debug")
                                            download_method = 'direct'

                                            # Extract filename from URL
                                            from urllib.parse import urlparse, unquote
                                            url_path = urlparse(download_url).path
                                            original_name = unquote(url_path.split('/')[-1].split('?')[0])

                                            # Remove 'post' prefix if present
                                            if original_name.startswith('post'):
                                                original_name = original_name[4:]

                                            media_id_from_file = Path(original_name).stem  # This is the actual media ID
                                            ext = Path(original_name).suffix or '.jpg'
                                        else:
                                            # Try to use image src instead
                                            if img_src:
                                                self.log(f"Download URL is post page, using image src instead", "debug")
                                                download_url = img_src
                                                if not download_url.startswith('http'):
                                                    download_url = f"https://imginn.com{download_url}"

                                                import requests
                                                response = requests.get(download_url, timeout=30, headers={
                                                    'User-Agent': self.user_agent,
                                                    'Referer': 'https://imginn.com/'
                                                }, cookies=self._get_cookies_for_requests())
                                                response.raise_for_status()
                                                download_method = 'direct'

                                                from urllib.parse import urlparse, unquote
                                                url_path = urlparse(download_url).path
                                                original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                                if original_name.startswith('post'):
                                                    original_name = original_name[4:]
                                                media_id_from_file = Path(original_name).stem
                                                ext = Path(original_name).suffix or '.jpg'
                                            else:
                                                raise Exception("No valid download URL found")
                                    else:
                                        raise Exception("No valid download URL found")

                                # Update our tracked media ID with the correct one from the file
                                if media_id_from_file:
                                    media_id = media_id_from_file
                                    self.log(f"Media ID from file: {media_id}", "debug")

                                # For carousels, if we don't get a unique media ID, generate one
                                normalized_media_id = extract_instagram_media_id(media_id) if media_id else None
                                if has_carousel and (not media_id or media_id in self.downloaded_files or (normalized_media_id and normalized_media_id in self.downloaded_files)):
                                    # Generate unique ID for this carousel image
                                    media_id = f"{media_id_base}_carousel_{carousel_index}"
                                    normalized_media_id = extract_instagram_media_id(media_id)
                                    self.log(f"Generated carousel media ID: {media_id}", "debug")

                                # Check if this media ID is already downloaded (both original and normalized)
                                if media_id in self.downloaded_files or (normalized_media_id and normalized_media_id in self.downloaded_files):
                                    self.log(f"Already have {media_id}, skipping download but continuing carousel", "debug")
                                    # Still count this as an image even if skipped
                                    image_count += 1
                                    if has_carousel:
                                        carousel_index += 1
                                else:
                                    self.log(f"Downloading new file for {media_id}", "debug")
                                    # Build filename with FastDL format
                                    if has_carousel:
                                        # For carousel items, append index (simpler format)
                                        filename = f"{profile_name}_{date_str}_{media_id_base}_{carousel_index}{ext}"
                                    else:
                                        filename = f"{profile_name}_{date_str}_{media_id}{ext}"

                                    filepath = output_dir / filename

                                    # Save the downloaded content
                                    if download_method == 'direct':
                                        with open(filepath, 'wb') as f:
                                            f.write(response.content)
                                    else:
                                        download.save_as(filepath)

                                    # Check for duplicate hash before recording
                                    if self.unified_db:
                                        from pathlib import Path as PathLib
                                        # Check for duplicate hash (hash blacklist persists even if original deleted)
                                        file_hash = self.unified_db.get_file_hash(str(filepath))
                                        if file_hash:
                                            existing = self.unified_db.get_download_by_file_hash(file_hash)
                                            if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
                                                self.log(f"⚠ Duplicate content detected (hash match): {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                # Delete the duplicate regardless of whether original file still exists
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate (hash blacklist): {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                    # Update file timestamps to match post date
                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    self.log(f"Downloaded: {filename}", "info")
                                    downloaded_files.append(str(filepath))
                                    image_count += 1

                                    # Add to tracking
                                    self.downloaded_files.add(media_id)

                                    # Increment carousel index for next image
                                    if has_carousel:
                                        carousel_index += 1

                                    # Mark as downloaded in database (or defer for later)
                                    # Use per-slide URL for carousels so each slide gets a unique url_hash
                                    record_url = f"{post_url}?img_index={carousel_index + 1}" if has_carousel else post_url
                                    if not skip_database or defer_database:
                                        self._record_download(
                                            media_id=media_id,
                                            username=profile_name,
                                            filename=filename,
                                            url=record_url,
                                            post_date=post_date,
                                            file_path=str(filepath),
                                            content_type='post',
                                            deferred=defer_database
                                        )

                            except Exception as e:
                                self.log(f"Download failed: {e}", "error")
                                import traceback
                                self.log(f"Traceback: {traceback.format_exc()}", "debug")
                                break
                        else:
                            # No download button found, try using the image src as fallback
                            page_url = page.url
                            self.log(f"No download button found on {page_url}, trying image src", "warning")

                            # Use the image src we found earlier
                            if img_src:
                                try:
                                    self.log(f"Using image src as fallback: {img_src[:100]}...", "debug")
                                    import requests
                                    from urllib.parse import urlparse, unquote

                                    # Ensure full URL
                                    if not img_src.startswith('http'):
                                        img_src = f"https://imginn.com{img_src}"

                                    response = requests.get(img_src, timeout=30, headers={
                                        'User-Agent': self.user_agent,
                                        'Referer': 'https://imginn.com/'
                                    }, cookies=self._get_cookies_for_requests())
                                    response.raise_for_status()

                                    # Extract filename from URL
                                    url_path = urlparse(img_src).path
                                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                    if original_name.startswith('post'):
                                        original_name = original_name[4:]

                                    media_id = Path(original_name).stem
                                    ext = Path(original_name).suffix or '.jpg'

                                    # Build filename with carousel index if needed
                                    if has_carousel and carousel_index > 1:
                                        filename = f"{profile_name}_{date_str}_{media_id}_{carousel_index}{ext}"
                                    else:
                                        filename = f"{profile_name}_{date_str}_{media_id}{ext}"
                                    filepath = output_dir / filename

                                    # Save file
                                    with open(filepath, 'wb') as f:
                                        f.write(response.content)

                                    self.log(f"Downloaded via image src: {filename} ({len(response.content)} bytes)", "info")
                                    downloaded_files.append(str(filepath))

                                    # Check for duplicate hash before recording
                                    if self.unified_db:
                                        from pathlib import Path as PathLib
                                        # Check for duplicate hash (hash blacklist persists even if original deleted)
                                        file_hash = self.unified_db.get_file_hash(str(filepath))
                                        if file_hash:
                                            existing = self.unified_db.get_download_by_file_hash(file_hash)
                                            if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
                                                self.log(f"⚠ Duplicate content detected (hash match): {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                # Delete the duplicate regardless of whether original file still exists
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate (hash blacklist): {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                    # Update timestamps
                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    image_count += 1
                                    self.downloaded_files.add(media_id)

                                    # Mark in database (or defer for later)
                                    # Use per-slide URL for carousels so each slide gets a unique url_hash
                                    record_url = f"{post_url}?img_index={carousel_index + 1}" if has_carousel else post_url
                                    if not skip_database or defer_database:
                                        self._record_download(
                                            media_id=media_id,
                                            username=profile_name,
                                            filename=filename,
                                            url=record_url,
                                            post_date=post_date,
                                            file_path=str(filepath),
                                            content_type='post',
                                            deferred=defer_database
                                        )
                                except Exception as e:
                                    self.log(f"Failed to download via image src: {e}", "error")
                                    # Don't break here - might be a temporary issue with one image
                                    if not has_carousel:
                                        break
                            else:
                                self.log(f"No image src available as fallback", "debug")
                                # For carousels, we might still have more images after clicking next
                                if not has_carousel:
                                    break

                        # Check for next image in carousel
                        if has_carousel and image_count < max_images:
                            next_btn = page.locator('div[role="button"][aria-label*="Next"], .swiper-button-next').first
                            if next_btn.count() > 0 and next_btn.is_visible():
                                # Store current image src to detect when it changes
                                current_img_src = img_src if img_src else ""

                                self.log(f"Clicking next for carousel image {carousel_index}", "debug")
                                try:
                                    next_btn.click(force=True)
                                except Exception:
                                    self.log(f"Carousel next button click timed out at image {carousel_index}, stopping carousel", "warning")
                                    break

                                # Wait for the image to change
                                time.sleep(2)  # Give more time for slide transition and new image to load
                            else:
                                self.log("No more carousel images", "debug")
                                break
                        else:
                            break
                    else:
                        # Single image - download from post page using download button
                        download_url = None
                        webp_fallback_url = None
                        download_selectors = [
                            'a.btn[href*="scontent"][href*=".jpg"]',  # High-res jpg
                            'a.btn[href*="scontent"][href*=".mp4"]',  # Video
                            'a.btn[href*="scontent"]',  # Any scontent
                            'a[download][href*=".jpg"]',
                            'a[download][href*=".mp4"]',
                            'a.download',
                            'a[href*="/post"]'
                        ]

                        for selector in download_selectors:
                            btn = page.locator(selector).first
                            if btn.count() > 0:
                                temp_url = btn.get_attribute('href')
                                if temp_url and temp_url != '#' and temp_url != 'javascript:void(0)':
                                    if not temp_url.startswith('http'):
                                        temp_url = f"https://imginn.com{temp_url}"

                                    # Store .webp as fallback, but keep looking for better
                                    if '.webp' in temp_url.lower():
                                        if not webp_fallback_url:
                                            webp_fallback_url = temp_url
                                            self.log(f"Found .webp link (fallback): {temp_url[:80]}...", "debug")
                                        continue

                                    # Found non-.webp link, use it
                                    download_url = temp_url
                                    self.log(f"Found high-res download for single image: {download_url[:80]}...", "debug")
                                    break

                        # Use .webp fallback if no high-res found
                        if not download_url and webp_fallback_url:
                            download_url = webp_fallback_url
                            self.log(f"Using .webp fallback for single image", "info")

                        if download_url:
                            try:
                                import requests
                                from urllib.parse import urlparse, unquote

                                response = requests.get(download_url, timeout=30, headers={
                                    'User-Agent': self.user_agent,
                                    'Referer': 'https://imginn.com/'
                                }, cookies=self._get_cookies_for_requests())
                                response.raise_for_status()

                                # Extract filename and media ID from the actual file
                                url_path = urlparse(download_url).path
                                original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                if original_name.startswith('post'):
                                    original_name = original_name[4:]

                                # The media ID is the filename without extension
                                actual_media_id = Path(original_name).stem
                                ext = Path(original_name).suffix or '.jpg'

                                # Build filename
                                filename = f"{profile_name}_{date_str}_{actual_media_id}{ext}"
                                filepath = output_dir / filename

                                # Save file
                                with open(filepath, 'wb') as f:
                                    f.write(response.content)

                                self.log(f"Downloaded (high-res): {filename} ({len(response.content)} bytes)", "info")
                                downloaded_files.append(str(filepath))

                                # Check for duplicate hash before recording
                                if self.unified_db:
                                    from pathlib import Path as PathLib
                                    file_hash = self.unified_db.get_file_hash(str(filepath))
                                    if file_hash:
                                        existing = self.unified_db.get_download_by_file_hash(file_hash)
                                        if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                            existing_path = PathLib(existing['file_path'])
                                            if existing_path.exists():
                                                self.log(f"⚠ Duplicate file detected: {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate: {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                # Update timestamps
                                if post_date:
                                    self._update_file_timestamps(filepath, post_date)

                                image_count = 1

                                # Add to tracking
                                self.downloaded_files.add(actual_media_id)

                                # Mark in database (or defer for later)
                                if not skip_database or defer_database:
                                    self._record_download(
                                        media_id=actual_media_id,
                                        username=profile_name,
                                        filename=filename,
                                        url=post_url,
                                        post_date=post_date,
                                        file_path=str(filepath),
                                        content_type='post',
                                        deferred=defer_database
                                    )

                            except Exception as e:
                                self.log(f"Failed to download single image: {e}", "warning")
                        else:
                            # No download button found - try video/image src as fallback
                            self.log("No download button found, trying video/image src fallback", "debug")
                            media_src = None

                            # Try video first - multiple selectors for different page structures
                            video_selectors = [
                                'video source[src]',
                                'video[src]',
                                'video source[type*="mp4"]',
                                '.video-container video',
                                '.post-video video',
                                'div[class*="video"] video',
                                'video'
                            ]
                            for v_selector in video_selectors:
                                video_elem = page.locator(v_selector).first
                                if video_elem.count() > 0:
                                    # Try src attribute first, then check source child
                                    media_src = video_elem.get_attribute('src')
                                    if not media_src:
                                        source_elem = video_elem.locator('source').first
                                        if source_elem.count() > 0:
                                            media_src = source_elem.get_attribute('src')
                                    if media_src and media_src != '#':
                                        self.log(f"Found video src via '{v_selector}': {media_src[:80]}...", "debug")
                                        break

                            # If no video found, wait a bit and try again (videos may lazy-load)
                            if not media_src:
                                time.sleep(2)
                                for v_selector in video_selectors:
                                    video_elem = page.locator(v_selector).first
                                    if video_elem.count() > 0:
                                        media_src = video_elem.get_attribute('src')
                                        if not media_src:
                                            source_elem = video_elem.locator('source').first
                                            if source_elem.count() > 0:
                                                media_src = source_elem.get_attribute('src')
                                        if media_src and media_src != '#':
                                            self.log(f"Found video src after wait via '{v_selector}': {media_src[:80]}...", "debug")
                                            break

                            # Try image if no video
                            if not media_src:
                                img_elem = page.locator('img[src*="scontent"]:not([src*="profile"]), img[src*="post"]').first
                                if img_elem.count() > 0:
                                    media_src = img_elem.get_attribute('src')
                                    if media_src and 'lazy.jpg' not in media_src:
                                        self.log(f"Found image src: {media_src[:80]}...", "debug")
                                    else:
                                        media_src = None

                            if media_src:
                                try:
                                    import requests
                                    from urllib.parse import urlparse, unquote

                                    if not media_src.startswith('http'):
                                        media_src = f"https://imginn.com{media_src}"

                                    response = requests.get(media_src, timeout=30, headers={
                                        'User-Agent': self.user_agent,
                                        'Referer': 'https://imginn.com/'
                                    }, cookies=self._get_cookies_for_requests())
                                    response.raise_for_status()

                                    # Extract filename from URL
                                    url_path = urlparse(media_src).path
                                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                    if original_name.startswith('post'):
                                        original_name = original_name[4:]

                                    actual_media_id = Path(original_name).stem
                                    ext = Path(original_name).suffix or '.mp4'

                                    filename = f"{profile_name}_{date_str}_{actual_media_id}{ext}"
                                    filepath = output_dir / filename

                                    with open(filepath, 'wb') as f:
                                        f.write(response.content)

                                    self.log(f"Downloaded (fallback): {filename} ({len(response.content)} bytes)", "info")
                                    downloaded_files.append(str(filepath))

                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    image_count = 1
                                    self.downloaded_files.add(actual_media_id)

                                    if not skip_database or defer_database:
                                        self._record_download(
                                            media_id=actual_media_id,
                                            username=profile_name,
                                            filename=filename,
                                            url=post_url,
                                            post_date=post_date,
                                            file_path=str(filepath),
                                            content_type='post',
                                            deferred=defer_database
                                        )
                                except Exception as e:
                                    self.log(f"Failed to download via fallback: {e}", "error")
                            else:
                                self.log("No download button or media src found for single post", "warning")
                                # Debug: capture screenshot and page content when download fails
                                try:
                                    debug_dir = Path("debug")
                                    debug_dir.mkdir(exist_ok=True)
                                    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                                    screenshot_path = debug_dir / f"no_media_{media_id}_{timestamp}.png"
                                    page.screenshot(path=str(screenshot_path))
                                    self.log(f"Debug screenshot saved: {screenshot_path}", "debug")
                                    # Also log page title and some content
                                    title = page.title()
                                    self.log(f"Page title: {title}", "debug")

                                    # Check if this is a Cloudflare block - don't mark as processed if so
                                    if self._is_cloudflare_challenge(page):
                                        self.log(f"Cloudflare block detected - NOT marking {media_id} as processed (will retry later)", "warning")
                                        # Skip to next post without marking as processed
                                        try:
                                            page.goto(f"https://imginn.com/{username}/?ref=index")
                                            time.sleep(3)
                                        except Exception:
                                            pass
                                        continue
                                except Exception as e:
                                    self.log(f"Failed to capture debug screenshot: {e}", "debug")

                    # Mark post as processed in database even if no downloads
                    # (might be already downloaded or failed - but NOT if Cloudflare blocked)
                    if image_count == 0:
                        # Still mark the post URL as processed to avoid re-checking
                        self._record_download(
                            media_id=media_id,
                            username=profile_name,
                            filename=f"{media_id}_skipped",
                            url=post_url,
                            post_date=post_date,
                            content_type='post',
                            metadata={'marker': True, 'reason': 'skipped'}
                        )

                    # Go back to profile
                    self._safe_go_back(page, username)

                    # If we just bypassed Cloudflare, wait longer to let session stabilize
                    if cloudflare_bypassed:
                        cooldown = random.uniform(15, 25)
                        self.log(f"Post-bypass cooldown: waiting {cooldown:.1f}s to stabilize session", "info")
                        time.sleep(cooldown)
                    else:
                        time.sleep(random.uniform(1, 3))

                    # Check if back on profile
                    if username not in page.url:
                        page.goto(f"https://imginn.com/{username}/?ref=index")
                        time.sleep(3)

                except Exception as e:
                    self.log(f"Error processing post: {e}", "error")
                    try:
                        page.goto(f"https://imginn.com/{username}/?ref=index")
                        time.sleep(3)
                    except Exception:
                        pass

            self.log(f"Downloaded {len(downloaded_files)} files", "info")
        except Exception as e:
            self.log(f"Error: {e}", "error")

        # Don't close browser here - reuse it for next profile
        # Call _stop_browser() explicitly when done with all profiles
        return downloaded_files

    def download_tagged(self, username: str, days_back: int = 14, max_posts: int = 50, output_dir: Path = None, phrase_config: dict = None, defer_database: bool = False):
        """Download tagged posts from a user

        Args:
            username: Instagram username
            days_back: How many days back to search
            max_posts: Maximum posts to check
            output_dir: Output directory
            phrase_config: Optional phrase search configuration
            defer_database: If True, defer database recording to pending_downloads list
                           for later recording after file move is complete
        """
        # Rate limiting to avoid Cloudflare blocks
        self._enforce_rate_limit("tagged")

        profile_name = username.lower()
        if output_dir is None:
            output_dir = Path(f"/opt/media-downloader/downloads/{profile_name}")
        output_dir.mkdir(parents=True, exist_ok=True)

        # Check site status before doing anything else
        self.log("Checking ImgInn site status...", "debug")
        site_status, error_msg = self.cf_handler.check_site_status("https://imginn.com/", timeout=10)

        if self.cf_handler.should_skip_download(site_status):
            self.log(f"Skipping tagged download for @{profile_name} - ImgInn is unavailable: {error_msg}", "warning")
            return []
        elif site_status == SiteStatus.CLOUDFLARE_CHALLENGE:
            self.log("Cloudflare challenge detected, will attempt bypass during download", "info")

        # Scan existing files
        self._scan_existing_files(output_dir, profile_name)

        # Get processed posts from database
        processed_posts = self._get_processed_posts(profile_name)
        self.log(f"Loaded {len(processed_posts)} processed tagged posts for {profile_name} from database", "info")

        downloaded_files = []
        cutoff_date = datetime.now() - timedelta(days=days_back)

        # Start or reuse browser
        self._start_browser()
        page = self.page

        try:
            # Navigate to tagged page directly
            self.log(f"Navigating to @{username} tagged posts page", "info")
            page.goto(f"https://imginn.com/tagged/{username}/?ref=index", wait_until='domcontentloaded')

            # CRITICAL: Wait for Cloudflare background JS challenges
            import random
            wait_time = 5 + random.uniform(0, 2)
            self.log(f"Waiting {wait_time:.1f}s for Cloudflare background validation...", "debug")
            time.sleep(wait_time)

            # Wait for page to load
            if not self.wait_for_cloudflare(page):
                self._page_load_failures += 1
                level = "error" if self._page_load_failures >= self._page_load_failure_threshold else "warning"
                self.log(f"Page didn't load properly ({self._page_load_failures}x this session)", level)
                return []

            # Save cookies
            self.save_cookies(self.context)

            # Wait for JavaScript to load posts (ImgInn loads posts dynamically on tagged page)
            self.log("Waiting for tagged posts to load via JavaScript...", "info")
            try:
                # Wait for post links to appear (up to 10 seconds)
                page.wait_for_selector('a[href*="/p/"]', timeout=10000)
                self.log("Tagged posts loaded successfully", "info")
            except Exception:
                # Timeout - posts might not exist, or page structure changed
                self.log("Timeout waiting for tagged posts to appear", "warning")
                time.sleep(2)  # Give it a bit more time anyway

            # Scroll to load more posts (ImgInn uses infinite scroll on tagged page)
            self.log("Scrolling to load more tagged posts...", "info")
            previous_count = 0
            scroll_attempts = 0
            max_scroll_attempts = 10  # Scroll up to 10 times to load posts

            while scroll_attempts < max_scroll_attempts:
                # Get current count of post links
                current_count = page.locator('a[href*="/p/"]').count()

                if current_count == previous_count and scroll_attempts > 0:
                    # No new posts loaded after scroll, we've reached the end
                    self.log(f"No more tagged posts to load (total: {current_count})", "debug")
                    break

                if current_count >= max_posts:
                    # We have enough posts
                    self.log(f"Loaded {current_count} tagged posts (reached max_posts limit)", "debug")
                    break

                previous_count = current_count

                # Scroll to bottom of page
                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                time.sleep(1.5)  # Wait for new posts to load

                scroll_attempts += 1
                self.log(f"Scroll {scroll_attempts}: Found {current_count} tagged posts", "debug")

            # Find posts on tagged page
            self.log("Finding tagged posts...", "info")

            # Debug: Check what's actually on the page
            page_content = page.content()
            if 'no posts' in page_content.lower() or 'page not found' in page_content.lower():
                self.log("Page shows 'no posts' or 'not found'", "warning")

            post_links = page.locator('a[href*="/p/"]').all()

            self.log(f"Found {len(post_links)} tagged posts", "info")

            if not post_links:
                # Debug: Save screenshot to see what's wrong
                try:
                    screenshot_path = Path(f"/tmp/imginn_no_tagged_{username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
                    page.screenshot(path=str(screenshot_path))
                    self.log(f"No tagged posts found - screenshot saved to {screenshot_path}", "warning")
                except Exception:
                    pass
                self.log("No tagged posts found", "warning")
                return []

            # Extract all post URLs upfront to avoid stale element issues
            # (elements become stale after page.go_back())
            post_urls = []
            for idx, post_link in enumerate(post_links[:max_posts]):
                try:
                    href = post_link.get_attribute('href', timeout=5000)
                    if href:
                        # Ensure full URL
                        if not href.startswith('http'):
                            href = f"https://imginn.com{href}"
                        post_urls.append(href)
                except Exception as e:
                    self.log(f"Tagged {idx+1}: Failed to get URL: {str(e)[:50]}", "debug")
                    continue

            self.log(f"Processing {len(post_urls)} tagged posts (max {max_posts})", "info")

            # Track consecutive old posts to handle pinned posts
            consecutive_old_posts = 0
            max_consecutive_old_posts = 5  # Allow up to 5 old posts (pinned) before stopping

            # Set initial progress so dashboard shows 0/N immediately
            self.activity_manager.update_status(
                "Downloading tagged",
                progress_current=0,
                progress_total=len(post_urls)
            )

            for i, post_url in enumerate(post_urls):
                # Update progress at start of each iteration (fires even on skips)
                self.activity_manager.update_status(
                    "Downloading tagged",
                    progress_current=i + 1,
                    progress_total=len(post_urls)
                )

                try:
                    # Extract media ID from URL
                    media_id = self._extract_media_id_from_url(post_url)

                    if not media_id:
                        self.log(f"Could not extract media ID from {post_url}", "warning")
                        continue

                    self.log(f"[{i+1}/{len(post_urls)}] Checking tagged post {media_id}", "debug")

                    # Check if already processed (either downloaded or checked for phrases/age)
                    if media_id in processed_posts or post_url in processed_posts:
                        self.log(f"Post {media_id} already processed, skipping", "debug")
                        continue

                    # Rate limiting between post downloads to avoid Cloudflare blocks
                    if i > 0:
                        post_delay = random.uniform(3, 8)
                        self.log(f"Rate limit: waiting {post_delay:.1f}s before tagged post {i+1}", "debug")
                        time.sleep(post_delay)

                    # For tagged posts, ALWAYS navigate to post page for high-res download
                    # (Never use profile download which gives low-res .webp)
                    page.goto(post_url, wait_until='domcontentloaded')

                    # Wait for page to load
                    time.sleep(2)

                    # Wait for navigation to complete
                    try:
                        page.wait_for_load_state('networkidle', timeout=5000)
                    except Exception:
                        # Continue even if network isn't idle - page might still be usable
                        self.log("Network didn't idle, but continuing", "debug")

                    # Check if on post page
                    if "/p/" not in page.url:
                        self.log(f"Not a downloadable post (URL: {page.url})", "warning")
                        self._safe_go_back(page, username, tagged=True)
                        continue

                    # IMPORTANT: Wait for post page content to fully render
                    # This ensures download buttons are from the POST PAGE, not tagged page preview
                    try:
                        # Wait for the post container to be visible
                        page.wait_for_selector('div.main-content, div.post, div.content, div.single-post', timeout=3000)
                        time.sleep(1)  # Additional wait for download buttons to render
                    except Exception:
                        self.log("Post container not found, checking for Cloudflare...", "debug")

                    # Check for Cloudflare challenge and handle it
                    cloudflare_bypassed = False
                    if self._is_cloudflare_challenge(page):
                        self.log(f"Cloudflare challenge detected on tagged post {media_id}", "warning")
                        if not self._handle_cloudflare_on_post(page, post_url):
                            # Cloudflare bypass failed - skip this post WITHOUT marking as processed
                            # so it can be retried on next run
                            self.log(f"Skipping tagged post {media_id} due to Cloudflare block (will retry later)", "warning")
                            try:
                                page.goto(f"https://imginn.com/tagged/{username}/?ref=index")
                                time.sleep(3)
                            except Exception:
                                pass
                            continue
                        cloudflare_bypassed = True

                    self.log(f"Navigated to tagged post page: {page.url}", "debug")
                    self._dismiss_consent_dialog(page)

                    # Extract the actual poster's username (not the tagged user)
                    # On tagged pages, posts are FROM other users who tagged this user
                    poster_username = profile_name  # Default to tagged user
                    try:
                        username_elem = page.locator('div.username a').first
                        if username_elem.count() > 0:
                            username_href = username_elem.get_attribute('href')
                            if username_href:
                                # Extract username from href like "/evalongoria.of/" -> "evalongoria.of"
                                poster_username = username_href.strip('/').lower()
                                self.log(f"Poster username: @{poster_username}", "debug")
                    except Exception as e:
                        self.log(f"Could not extract poster username, using default: {e}", "debug")

                    # Extract post date - ALWAYS extract for proper file naming
                    post_date = self._extract_post_date(page)

                    # Use post date for filename, or current date
                    if post_date:
                        date_str = post_date.strftime('%Y%m%d_%H%M%S')
                        self.log(f"Original post date: {post_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
                    else:
                        date_str = datetime.now().strftime('%Y%m%d_%H%M%S')
                        self.log(f"No original date found, using current time", "debug")

                    # Check date filter
                    if post_date and post_date < cutoff_date:
                        consecutive_old_posts += 1
                        self.log(f"Tagged post too old ({post_date.strftime('%Y-%m-%d')}), skipping (consecutive old: {consecutive_old_posts}/{max_consecutive_old_posts})", "info")

                        # Clean up temp file if exists
                        if 'temp_download_path' in locals() and temp_download_path and temp_download_path.exists():
                            temp_download_path.unlink()
                            self.log(f"Deleted temp file for old post", "debug")

                        # Mark this old post as checked in database - use poster_username for tagged content
                        if phrase_config and media_id:
                            self._record_download(
                                media_id=media_id,
                                username=poster_username,
                                filename=f"_old_post_{media_id}",
                                url=post_url,
                                post_date=post_date,
                                content_type='tagged',
                                metadata={'marker': True, 'reason': 'old_post'}
                            )

                        self._safe_go_back(page, username, tagged=True)

                        # Stop only after 5 consecutive old posts (handles pinned posts at top)
                        if consecutive_old_posts >= max_consecutive_old_posts:
                            self.log(f"Found {consecutive_old_posts} consecutive old tagged posts - stopping", "info")
                            break
                        else:
                            continue  # Skip this old post but keep checking (might be pinned)

                    # Reset consecutive old posts counter - we found a post within date range
                    consecutive_old_posts = 0

                    # Check for phrase matching if configured
                    if phrase_config and phrase_config.get('enabled'):
                        if not self._check_post_phrases(page, phrase_config):
                            self.log(f"Tagged post does not match phrase criteria, skipping download", "info")
                            # Clean up temp file if exists
                            if 'temp_download_path' in locals() and temp_download_path and temp_download_path.exists():
                                temp_download_path.unlink()
                                self.log(f"Deleted temp file for non-matching post", "debug")

                            # Mark this post as checked (but not downloaded) in database - use poster_username
                            if media_id:
                                self._record_download(
                                    media_id=media_id,
                                    username=poster_username,
                                    filename=f"_phrase_checked_{media_id}",
                                    url=post_url,
                                    post_date=post_date,
                                    content_type='tagged',
                                    metadata={'marker': True, 'reason': 'phrase_checked'}
                                )

                            self._safe_go_back(page, username, tagged=True)
                            continue
                        else:
                            self.log(f"Tagged post matches phrase criteria, using high-res download", "info")

                    # Check for carousel
                    carousel_next = page.locator('div[role="button"][aria-label*="Next"], .swiper-button-next').first
                    has_carousel = carousel_next.count() > 0

                    if has_carousel:
                        self.log(f"Carousel detected in tagged post - will download all carousel images", "info")
                        self._dismiss_consent_dialog(page)

                        # CRITICAL: Wait for POST PAGE carousel download buttons to be ready
                        # This prevents downloading from the tagged page preview
                        try:
                            # Wait for download buttons with POST PAGE URLs (have "scontent" or "post" in them)
                            page.wait_for_selector('a.btn[href*="scontent"], a[download], a.download', timeout=3000)
                            time.sleep(1.5)  # Additional wait for all carousel images to load
                            self.log("Carousel download buttons ready on post page", "debug")
                        except Exception:
                            self.log("Download buttons not found, but continuing", "debug")
                    else:
                        self.log("Single image tagged post", "debug")

                    # Handle downloads - always use download buttons from post page
                    image_count = 0
                    max_images = 10

                    # Download images (carousel or single)
                    if has_carousel:
                        all_slides = page.locator('.swiper-slide').all()
                        self.log(f"Found {len(all_slides)} carousel slides in tagged post", "debug")

                        # Download each slide's image
                        for slide_index in range(min(len(all_slides), max_images)):
                            self.log(f"Processing carousel slide {slide_index + 1}/{len(all_slides)}", "debug")

                            # Get the current slide element to scope our searches
                            current_slide = all_slides[slide_index]

                            # Click next to navigate to this slide (except for first one)
                            if slide_index > 0:
                                next_btn = page.locator('div[role="button"][aria-label*="Next"], .swiper-button-next').first
                                if next_btn.count() > 0 and next_btn.is_visible():
                                    try:
                                        next_btn.click(force=True)
                                    except Exception:
                                        self.log(f"Carousel next button click timed out at slide {slide_index + 1}, stopping carousel", "warning")
                                        break
                                    time.sleep(2)  # Wait for slide transition and image to load

                            # Look for download button - prefer high-res, fallback to .webp
                            # IMPORTANT: Search within CURRENT SLIDE only, not entire page
                            download_url = None
                            webp_fallback_url = None
                            slide_downloaded = False  # Track if this specific slide was downloaded
                            download_selectors = [
                                'a.btn[href*="scontent"][href*=".jpg"]',  # High-res jpg
                                'a.btn[href*="scontent"][href*=".mp4"]',  # Video
                                'a.btn[href*="scontent"]',  # Any scontent
                                'a[download][href*=".jpg"]',
                                'a[download][href*=".mp4"]',
                                'a.download',
                                'a[href*="/post"]'
                            ]

                            # Search for download buttons - first try within slide, then try page-level
                            # Imginn often has download buttons outside the .swiper-slide elements
                            search_contexts = [current_slide, page]

                            for search_context in search_contexts:
                                if download_url:  # Already found, skip other contexts
                                    break

                                for selector in download_selectors:
                                    btn = search_context.locator(selector).first
                                    if btn.count() > 0:
                                        temp_url = btn.get_attribute('href')
                                        if temp_url and temp_url != '#' and temp_url != 'javascript:void(0)':
                                            if not temp_url.startswith('http'):
                                                temp_url = f"https://imginn.com{temp_url}"

                                            # Store .webp as fallback, but keep looking for better
                                            if '.webp' in temp_url.lower():
                                                if not webp_fallback_url:
                                                    webp_fallback_url = temp_url
                                                    self.log(f"Found .webp link (fallback): {temp_url[:80]}...", "debug")
                                                continue

                                            # Found non-.webp link, use it
                                            download_url = temp_url
                                            self.log(f"Found high-res download for carousel slide {slide_index + 1}: {download_url[:80]}...", "debug")
                                            break

                            # Use .webp fallback if no high-res found
                            used_webp_fallback = False
                            if not download_url and webp_fallback_url:
                                download_url = webp_fallback_url
                                used_webp_fallback = True
                                self.log(f"Using .webp fallback for carousel slide {slide_index + 1}", "info")

                            # If we found a download button, use it for high-res
                            if download_url:
                                try:
                                    import requests
                                    from urllib.parse import urlparse, unquote

                                    response = requests.get(download_url, timeout=30, headers={
                                        'User-Agent': self.user_agent,
                                        'Referer': 'https://imginn.com/'
                                    }, cookies=self._get_cookies_for_requests())
                                    response.raise_for_status()

                                    # Extract filename and media ID from the actual file
                                    url_path = urlparse(download_url).path
                                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                    if original_name.startswith('post'):
                                        original_name = original_name[4:]

                                    # The media ID is the filename without extension
                                    actual_media_id = Path(original_name).stem
                                    ext = Path(original_name).suffix or '.jpg'

                                    # Build filename for carousel image using actual media ID (use poster's username)
                                    filename = f"{poster_username}_{date_str}_{actual_media_id}_{slide_index + 1}{ext}"
                                    filepath = output_dir / filename

                                    # Save file
                                    with open(filepath, 'wb') as f:
                                        f.write(response.content)

                                    # Log with appropriate quality label
                                    quality_label = "fallback" if used_webp_fallback else "high-res"
                                    self.log(f"Downloaded tagged ({quality_label}): {filename} from @{poster_username} ({len(response.content)} bytes)", "info")
                                    downloaded_files.append(str(filepath))

                                    # Check for duplicate hash before recording
                                    if self.unified_db:
                                        from pathlib import Path as PathLib
                                        # Check for duplicate hash (hash blacklist persists even if original deleted)
                                        file_hash = self.unified_db.get_file_hash(str(filepath))
                                        if file_hash:
                                            existing = self.unified_db.get_download_by_file_hash(file_hash)
                                            if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                # Duplicate hash found - content was already downloaded (prevents redownload of deleted content)
                                                self.log(f"⚠ Duplicate content detected (hash match): {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                # Delete the duplicate regardless of whether original file still exists
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate (hash blacklist): {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                    # Update timestamps
                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    image_count += 1
                                    slide_downloaded = True  # Mark this slide as successfully downloaded

                                    # Add to tracking
                                    self.downloaded_files.add(actual_media_id)

                                    # Mark in database (or defer for later) - use poster_username for tagged content
                                    unique_url = f"{post_url}#{filename}"
                                    self._record_download(
                                        media_id=actual_media_id,
                                        username=poster_username,
                                        filename=filename,
                                        url=unique_url,
                                        post_date=post_date,
                                        file_path=str(filepath),
                                        content_type='tagged',
                                        deferred=defer_database
                                    )

                                except Exception as e:
                                    self.log(f"Failed to download carousel image {slide_index + 1}: {e}", "error")
                                    # Don't continue - try fallback method below

                            # Fallback: Download from current slide's img/video src if no download button worked
                            if not slide_downloaded:
                                self.log(f"Trying fallback: downloading from slide {slide_index + 1} media src", "debug")
                                # current_slide already defined at top of loop

                                # Try img first, then video
                                media_src = None
                                slide_img = current_slide.locator('img').first
                                if slide_img.count() > 0:
                                    media_src = slide_img.get_attribute('src')
                                else:
                                    # Check for video tag
                                    slide_video = current_slide.locator('video source, video').first
                                    if slide_video.count() > 0:
                                        media_src = slide_video.get_attribute('src')
                                        self.log(f"Found video for slide {slide_index + 1}", "debug")

                                if media_src:
                                    # Skip lazy placeholders
                                    if 'lazy.jpg' not in media_src and '483011604' not in media_src:
                                        try:
                                            import requests
                                            from urllib.parse import urlparse, unquote

                                            if not media_src.startswith('http'):
                                                media_src = f"https:{media_src}" if media_src.startswith('//') else f"https://imginn.com{media_src}"

                                            response = requests.get(media_src, timeout=30, headers={
                                                'User-Agent': self.user_agent,
                                                'Referer': 'https://imginn.com/'
                                            }, cookies=self._get_cookies_for_requests())
                                            response.raise_for_status()

                                            # Extract filename
                                            url_path = urlparse(media_src).path
                                            original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                            actual_media_id = Path(original_name).stem
                                            ext = Path(original_name).suffix or '.jpg'

                                            # Build filename
                                            filename = f"{poster_username}_{date_str}_{actual_media_id}_{slide_index + 1}{ext}"
                                            filepath = output_dir / filename

                                            # Save file
                                            with open(filepath, 'wb') as f:
                                                f.write(response.content)

                                            self.log(f"Downloaded tagged (fallback): {filename} from @{poster_username} ({len(response.content)} bytes)", "info")
                                            downloaded_files.append(str(filepath))

                                            # Check for duplicate hash before recording
                                            if self.unified_db:
                                                from pathlib import Path as PathLib
                                                file_hash = self.unified_db.get_file_hash(str(filepath))
                                                if file_hash:
                                                    existing = self.unified_db.get_download_by_file_hash(file_hash)
                                                    if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                                        existing_path = PathLib(existing['file_path'])
                                                        if existing_path.exists():
                                                            self.log(f"⚠ Duplicate file detected: {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                            try:
                                                                filepath.unlink()
                                                                self.log(f"Deleted duplicate: {filename}", "debug")
                                                                continue
                                                            except Exception as e:
                                                                self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                            # Update timestamps
                                            if post_date:
                                                self._update_file_timestamps(filepath, post_date)

                                            image_count += 1

                                            # Add to tracking
                                            self.downloaded_files.add(actual_media_id)

                                            # Mark in database (or defer for later) - use poster_username for tagged content
                                            unique_url = f"{post_url}#{filename}"
                                            self._record_download(
                                                media_id=actual_media_id,
                                                username=poster_username,
                                                filename=filename,
                                                url=unique_url,
                                                post_date=post_date,
                                                file_path=str(filepath),
                                                content_type='tagged',
                                                deferred=defer_database
                                            )

                                        except Exception as e:
                                            self.log(f"Failed to download from media src for slide {slide_index + 1}: {e}", "error")
                                else:
                                    self.log(f"No media (img/video) found for carousel slide {slide_index + 1}", "warning")

                    else:
                        # Single image - download from post page using download button
                        download_url = None
                        webp_fallback_url = None
                        download_selectors = [
                            'a.btn[href*="scontent"][href*=".jpg"]',  # High-res jpg
                            'a.btn[href*="scontent"][href*=".mp4"]',  # Video
                            'a.btn[href*="scontent"]',  # Any scontent
                            'a[download][href*=".jpg"]',
                            'a[download][href*=".mp4"]',
                            'a.download',
                            'a[href*="/post"]'
                        ]

                        for selector in download_selectors:
                            btn = page.locator(selector).first
                            if btn.count() > 0:
                                temp_url = btn.get_attribute('href')
                                if temp_url and temp_url != '#' and temp_url != 'javascript:void(0)':
                                    if not temp_url.startswith('http'):
                                        temp_url = f"https://imginn.com{temp_url}"

                                    # Store .webp as fallback, but keep looking for better
                                    if '.webp' in temp_url.lower():
                                        if not webp_fallback_url:
                                            webp_fallback_url = temp_url
                                            self.log(f"Found .webp link (fallback): {temp_url[:80]}...", "debug")
                                        continue

                                    # Found non-.webp link, use it
                                    download_url = temp_url
                                    self.log(f"Found high-res download for single image: {download_url[:80]}...", "debug")
                                    break

                        # Use .webp fallback if no high-res found
                        if not download_url and webp_fallback_url:
                            download_url = webp_fallback_url
                            self.log(f"Using .webp fallback for single image", "info")

                        if download_url:
                            try:
                                import requests
                                from urllib.parse import urlparse, unquote

                                response = requests.get(download_url, timeout=30, headers={
                                    'User-Agent': self.user_agent,
                                    'Referer': 'https://imginn.com/'
                                }, cookies=self._get_cookies_for_requests())
                                response.raise_for_status()

                                # Extract filename and media ID from the actual file
                                url_path = urlparse(download_url).path
                                original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                if original_name.startswith('post'):
                                    original_name = original_name[4:]

                                # The media ID is the filename without extension
                                actual_media_id = Path(original_name).stem
                                ext = Path(original_name).suffix or '.jpg'

                                # Build filename using poster's username
                                filename = f"{poster_username}_{date_str}_{actual_media_id}{ext}"
                                filepath = output_dir / filename

                                # Save file
                                with open(filepath, 'wb') as f:
                                    f.write(response.content)

                                self.log(f"Downloaded tagged (high-res): {filename} from @{poster_username} ({len(response.content)} bytes)", "info")
                                downloaded_files.append(str(filepath))

                                # Check for duplicate hash before recording
                                if self.unified_db:
                                    from pathlib import Path as PathLib
                                    file_hash = self.unified_db.get_file_hash(str(filepath))
                                    if file_hash:
                                        existing = self.unified_db.get_download_by_file_hash(file_hash)
                                        if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                            existing_path = PathLib(existing['file_path'])
                                            if existing_path.exists():
                                                self.log(f"⚠ Duplicate file detected: {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                                try:
                                                    filepath.unlink()
                                                    self.log(f"Deleted duplicate: {filename}", "debug")
                                                    continue
                                                except Exception as e:
                                                    self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                                # Update timestamps
                                if post_date:
                                    self._update_file_timestamps(filepath, post_date)

                                image_count = 1

                                # Add to tracking
                                self.downloaded_files.add(actual_media_id)

                                # Mark in database (or defer for later) - use poster_username for tagged content
                                self._record_download(
                                    media_id=actual_media_id,
                                    username=poster_username,
                                    filename=filename,
                                    url=post_url,
                                    post_date=post_date,
                                    file_path=str(filepath),
                                    content_type='tagged',
                                    deferred=defer_database
                                )

                            except Exception as e:
                                self.log(f"Failed to download single image: {e}", "warning")
                        else:
                            # No download button found - try video/image src as fallback
                            self.log("No download button found, trying video/image src fallback", "debug")
                            media_src = None

                            # Try video first - multiple selectors for different page structures
                            video_selectors = [
                                'video source[src]',
                                'video[src]',
                                'video source[type*="mp4"]',
                                '.video-container video',
                                '.post-video video',
                                'div[class*="video"] video',
                                'video'
                            ]
                            for v_selector in video_selectors:
                                video_elem = page.locator(v_selector).first
                                if video_elem.count() > 0:
                                    # Try src attribute first, then check source child
                                    media_src = video_elem.get_attribute('src')
                                    if not media_src:
                                        source_elem = video_elem.locator('source').first
                                        if source_elem.count() > 0:
                                            media_src = source_elem.get_attribute('src')
                                    if media_src and media_src != '#':
                                        self.log(f"Found video src via '{v_selector}': {media_src[:80]}...", "debug")
                                        break

                            # If no video found, wait a bit and try again (videos may lazy-load)
                            if not media_src:
                                time.sleep(2)
                                for v_selector in video_selectors:
                                    video_elem = page.locator(v_selector).first
                                    if video_elem.count() > 0:
                                        media_src = video_elem.get_attribute('src')
                                        if not media_src:
                                            source_elem = video_elem.locator('source').first
                                            if source_elem.count() > 0:
                                                media_src = source_elem.get_attribute('src')
                                        if media_src and media_src != '#':
                                            self.log(f"Found video src after wait via '{v_selector}': {media_src[:80]}...", "debug")
                                            break

                            # Try image if no video
                            if not media_src:
                                img_elem = page.locator('img[src*="scontent"]:not([src*="profile"]), img[src*="post"]').first
                                if img_elem.count() > 0:
                                    media_src = img_elem.get_attribute('src')
                                    if media_src and 'lazy.jpg' not in media_src:
                                        self.log(f"Found image src: {media_src[:80]}...", "debug")
                                    else:
                                        media_src = None

                            if media_src:
                                try:
                                    import requests
                                    from urllib.parse import urlparse, unquote

                                    if not media_src.startswith('http'):
                                        media_src = f"https://imginn.com{media_src}"

                                    response = requests.get(media_src, timeout=30, headers={
                                        'User-Agent': self.user_agent,
                                        'Referer': 'https://imginn.com/'
                                    }, cookies=self._get_cookies_for_requests())
                                    response.raise_for_status()

                                    url_path = urlparse(media_src).path
                                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                                    if original_name.startswith('post'):
                                        original_name = original_name[4:]

                                    actual_media_id = Path(original_name).stem
                                    ext = Path(original_name).suffix or '.mp4'

                                    filename = f"{poster_username}_{date_str}_{actual_media_id}{ext}"
                                    filepath = output_dir / filename

                                    with open(filepath, 'wb') as f:
                                        f.write(response.content)

                                    self.log(f"Downloaded (fallback): {filename} ({len(response.content)} bytes)", "info")
                                    downloaded_files.append(str(filepath))

                                    if post_date:
                                        self._update_file_timestamps(filepath, post_date)

                                    image_count = 1
                                    self.downloaded_files.add(actual_media_id)

                                    self._record_download(
                                        media_id=actual_media_id,
                                        username=poster_username,
                                        filename=filename,
                                        url=post_url,
                                        post_date=post_date,
                                        file_path=str(filepath),
                                        content_type='tagged',
                                        deferred=defer_database
                                    )
                                except Exception as e:
                                    self.log(f"Failed to download via fallback: {e}", "error")
                            else:
                                self.log("No download button or media src found for single post", "warning")
                                # Debug: capture screenshot and page content when download fails
                                try:
                                    debug_dir = Path("debug")
                                    debug_dir.mkdir(exist_ok=True)
                                    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                                    screenshot_path = debug_dir / f"no_media_tagged_{media_id}_{timestamp}.png"
                                    page.screenshot(path=str(screenshot_path))
                                    self.log(f"Debug screenshot saved: {screenshot_path}", "debug")
                                    # Also log page title
                                    title = page.title()
                                    self.log(f"Page title: {title}", "debug")

                                    # Check if this is a Cloudflare block - don't mark as processed if so
                                    if self._is_cloudflare_challenge(page):
                                        self.log(f"Cloudflare block detected - NOT marking tagged post {media_id} as processed (will retry later)", "warning")
                                        # Skip to next post without marking as processed
                                        try:
                                            page.goto(f"https://imginn.com/tagged/{username}/?ref=index")
                                            time.sleep(3)
                                        except Exception:
                                            pass
                                        continue
                                except Exception as e:
                                    self.log(f"Failed to capture debug screenshot: {e}", "debug")

                    # Navigate back to tagged page
                    if image_count > 0:
                        self.log(f"Successfully downloaded {image_count} image(s) from tagged post {media_id}", "info")

                    self._safe_go_back(page, username, tagged=True)

                    # If we just bypassed Cloudflare, wait longer to let session stabilize
                    if cloudflare_bypassed:
                        cooldown = random.uniform(15, 25)
                        self.log(f"Post-bypass cooldown: waiting {cooldown:.1f}s to stabilize session", "info")
                        time.sleep(cooldown)
                    else:
                        time.sleep(1)

                except KeyboardInterrupt:
                    self.log("Download interrupted by user", "warning")
                    break
                except Exception as e:
                    self.log(f"Error processing tagged post: {e}", "error")
                    self._safe_go_back(page, username, tagged=True)

            self.log(f"Downloaded {len(downloaded_files)} tagged files", "info")
        except Exception as e:
            self.log(f"Error: {e}", "error")

        # Don't close browser here - reuse it for next profile
        return downloaded_files

    def download_stories(self, username: str, days_back: int = 1, max_stories: int = 50, output_dir: Path = None, skip_database: bool = False, defer_database: bool = False):
        """Download stories from a user with FastDL naming

        Args:
            username: Instagram username
            days_back: How many days back to search (stories expire after 24h)
            max_stories: Maximum stories to download
            output_dir: Output directory
            skip_database: If True, don't record downloads in database (for temporary processing)
            defer_database: If True, defer database recording to pending_downloads list
                           for later recording after file move is complete
        """

        profile_name = username.lower()
        if output_dir is None:
            output_dir = Path(f"/opt/media-downloader/downloads/{profile_name}")
        output_dir.mkdir(parents=True, exist_ok=True)

        # Check site status before doing anything else
        self.log("Checking ImgInn site status...", "debug")
        site_status, error_msg = self.cf_handler.check_site_status("https://imginn.com/", timeout=10)

        if self.cf_handler.should_skip_download(site_status):
            self.log(f"Skipping stories download for @{profile_name} - ImgInn is unavailable: {error_msg}", "warning")
            return []
        elif site_status == SiteStatus.CLOUDFLARE_CHALLENGE:
            self.log("Cloudflare challenge detected, will attempt bypass during download", "info")

        # Scan existing files
        self._scan_existing_files(output_dir, profile_name)

        # Get processed stories from database
        processed_stories = self._get_processed_posts(profile_name)
        self.log(f"Loaded {len(processed_stories)} processed stories for {profile_name} from database", "info")

        downloaded_files = []
        cutoff_date = datetime.now() - timedelta(days=days_back)

        # Start or reuse browser
        self._start_browser()
        page = self.page

        try:
            # Navigate to stories page
            self.log(f"Navigating to @{username} stories page", "info")
            page.goto(f"https://imginn.com/stories/{username}/?ref=index", wait_until='domcontentloaded')

            # CRITICAL: Wait for Cloudflare background JS challenges
            import random
            wait_time = 5 + random.uniform(0, 2)
            self.log(f"Waiting {wait_time:.1f}s for Cloudflare background validation...", "debug")
            time.sleep(wait_time)

            # Wait for page to load
            if not self.wait_for_cloudflare(page):
                self.log("Stories page didn't load properly", "error")
                return []

            # Save cookies
            self.save_cookies(self.context)

            # Wait for stories container to load
            self.log("Waiting for stories to load...", "info")
            try:
                page.wait_for_selector('.swiper-container.reels', timeout=10000)
                self.log("Stories container loaded", "info")
            except Exception:
                self.log("No stories found - may have expired or page structure changed", "warning")
                return []

            # Find the Stories reel (first li.reel with data-uid and title "Stories")
            self.log("Looking for Stories reel...", "info")
            stories_reel = None
            reels = page.locator('li.reel[data-uid]').all()

            for reel in reels:
                try:
                    # Check if this is the "Stories" reel
                    title = reel.locator('.title').first.text_content()
                    if title and title.strip().lower() == "stories":
                        stories_reel = reel
                        self.log(f"Found Stories reel", "info")
                        break
                except Exception:
                    continue

            if not stories_reel:
                self.log("No active Stories found for this user", "warning")
                return []

            # Click the Stories reel to open viewer
            self.log("Opening Stories viewer...", "info")
            stories_reel.click()
            time.sleep(2)  # Wait for viewer to open

            # Find all download buttons in the story viewer
            self.log("Finding story download links...", "info")
            download_links = page.locator('div.action a.download').all()

            if not download_links:
                self.log("No story download links found", "warning")
                return []

            self.log(f"Found {len(download_links)} stories", "info")

            # Set initial progress so dashboard shows 0/N immediately
            stories_to_download = min(len(download_links), max_stories)
            self.activity_manager.update_status(
                "Downloading stories",
                progress_current=0,
                progress_total=stories_to_download
            )

            # Download each story
            story_index = 1
            for i, download_link in enumerate(download_links[:max_stories]):
                # Update progress at start of each iteration (fires even on skips)
                self.activity_manager.update_status(
                    "Downloading stories",
                    progress_current=i + 1,
                    progress_total=stories_to_download
                )

                try:
                    # Get download URL
                    download_url = download_link.get_attribute('href')
                    if not download_url or download_url == '#':
                        self.log(f"Story {story_index}: Invalid download URL", "warning")
                        continue

                    self.log(f"Story {story_index}: {download_url[:80]}...", "debug")

                    # Extract media ID from URL or generate unique ID
                    from urllib.parse import urlparse, unquote
                    url_path = urlparse(download_url).path
                    original_name = unquote(url_path.split('/')[-1].split('?')[0])
                    media_id_full = Path(original_name).stem  # Full filename stem for unique naming
                    ext = Path(original_name).suffix or '.jpg'

                    # Extract real Instagram media ID (18-digit number) for duplicate checking
                    media_id_for_tracking = extract_instagram_media_id(media_id_full)
                    self.log(f"Story {story_index}: Full ID: {media_id_full[:40]}..., Tracking ID: {media_id_for_tracking}", "debug")

                    # Check if already downloaded using the normalized media ID
                    if media_id_for_tracking in self.downloaded_files or media_id_for_tracking in processed_stories:
                        self.log(f"Story {story_index}: Already downloaded (tracking ID: {media_id_for_tracking}), skipping", "debug")
                        story_index += 1
                        continue

                    # Also check with full ID for backwards compatibility
                    if media_id_full in self.downloaded_files or media_id_full in processed_stories:
                        self.log(f"Story {story_index}: Already downloaded (full ID: {media_id_full[:30]}...), skipping", "debug")
                        story_index += 1
                        continue

                    # Use current date for stories (they expire after 24h)
                    story_date = datetime.now()
                    date_str = story_date.strftime('%Y%m%d_%H%M%S')

                    # Build filename: {profile}_{date}_{media_id}_story{index}{ext}
                    # Use full media ID in filename for uniqueness
                    filename = f"{profile_name}_{date_str}_{media_id_full}_story{story_index}{ext}"
                    filepath = output_dir / filename

                    # Download the story
                    try:
                        import requests

                        response = requests.get(download_url, timeout=30, headers={
                            'User-Agent': self.user_agent,
                            'Referer': 'https://imginn.com/'
                        }, cookies=self._get_cookies_for_requests())
                        response.raise_for_status()

                        # Save file
                        with open(filepath, 'wb') as f:
                            f.write(response.content)

                        self.log(f"Downloaded story: {filename} ({len(response.content)} bytes)", "info")
                        downloaded_files.append(str(filepath))

                        # Check for duplicate hash before recording
                        if self.unified_db:
                            from pathlib import Path as PathLib
                            file_hash = self.unified_db.get_file_hash(str(filepath))
                            if file_hash:
                                existing = self.unified_db.get_download_by_file_hash(file_hash)
                                if existing and existing.get('file_path') and str(filepath) != existing.get('file_path'):
                                    existing_path = PathLib(existing['file_path'])
                                    if existing_path.exists():
                                        self.log(f"⚠ Duplicate file detected: {filename} matches {existing['filename']} from {existing['platform']}/{existing['source']}", "warning")
                                        try:
                                            filepath.unlink()
                                            self.log(f"Deleted duplicate: {filename}", "debug")
                                            continue
                                        except Exception as e:
                                            self.log(f"Failed to delete duplicate {filename}: {e}", "warning")

                        # Update timestamps
                        self._update_file_timestamps(filepath, story_date)

                        # Add both tracking ID and full ID to tracking set for comprehensive duplicate prevention
                        self.downloaded_files.add(media_id_for_tracking)
                        self.downloaded_files.add(media_id_full)

                        # Mark in database with media_id in metadata (or defer for later)
                        # Use the normalized media ID for database tracking to prevent future duplicates
                        if not skip_database or defer_database:
                            self._record_download(
                                media_id=media_id_for_tracking,
                                username=profile_name,
                                filename=filename,
                                url=download_url,
                                post_date=story_date,
                                file_path=str(filepath),
                                content_type='stories',
                                metadata={'media_id_full': media_id_full},
                                deferred=defer_database
                            )

                        story_index += 1

                    except Exception as e:
                        self.log(f"Failed to download story {story_index}: {e}", "error")
                        story_index += 1
                        continue

                except Exception as e:
                    self.log(f"Error processing story {story_index}: {e}", "error")
                    story_index += 1
                    continue

            self.log(f"Downloaded {len(downloaded_files)} story files", "info")

        except Exception as e:
            self.log(f"Error downloading stories: {e}", "error")

        # Don't close browser here - reuse it for next profile
        return downloaded_files


def main():
    """Test the downloader with FastDL naming"""
    import sys

    print("=" * 60)
    print("ImgInn Downloader - FastDL Compatible Naming")
    print("=" * 60)
    print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 60)

    downloader = ImgInnDownloader(
        api_key=os.environ.get("IMGINN_API_KEY", ""),
        headless=False  # Use with xvfb
    )

    # Check for specific post URL in arguments
    if len(sys.argv) > 1 and 'imginn.com/p/' in sys.argv[-1]:
        # Download specific post without date filter
        files = downloader.download_posts(
            username="evalongoria",
            days_back=365,  # Use large value to bypass date filter
            max_posts=5,
            specific_post_url=sys.argv[-1]
        )
    else:
        # Download evalongoria posts from last 2 weeks
        files = downloader.download_posts(
            username="evalongoria",
            days_back=14,
            max_posts=50
        )

    print("\n" + "=" * 60)
    print("RESULTS")
    print("=" * 60)

    if files:
        print(f"Successfully downloaded {len(files)} files!")
        print("\n📁 Downloaded files (FastDL naming format):")
        for f in files:
            name = Path(f).name
            size = Path(f).stat().st_size / 1024
            # Show the naming format
            parts = name.split('_', 3)
            if len(parts) >= 4:
                print(f"  - {name}")
                print(f"    Profile: {parts[0]}")
                print(f"    Date: {parts[1]}_{parts[2]}")
                print(f"    Media ID: {parts[3].split('.')[0]}")
                print(f"    Size: {size:.1f} KB")
    else:
        print("No files downloaded")

    # Check total in folder
    download_dir = Path("/opt/media-downloader/downloads/evalongoria")
    if download_dir.exists():
        all_files = list(download_dir.glob("*"))
        total_size = sum(f.stat().st_size for f in all_files) / 1024
        print(f"\n📊 Total in folder: {len(all_files)} files ({total_size:.1f} KB)")


if __name__ == "__main__":
    main()