Files
media-downloader/modules/move_module.py
Todd 0d7b2b1aab Initial commit
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 22:42:55 -04:00

1714 lines
79 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Move Module - Handles file moving and timestamp management
"""
import os
import shutil
import time
import pwd
import grp
import gc
import sqlite3
from pathlib import Path
from datetime import datetime
from typing import Optional, List, Dict, Union, Any
from modules.base_module import LoggingMixin
from modules.universal_logger import get_logger
logger = get_logger('MoveManager') # For module-level functions
# Import UnifiedDatabase for file hash deduplication
try:
from .unified_database import UnifiedDatabase
except ImportError:
try:
from unified_database import UnifiedDatabase
except ImportError:
UnifiedDatabase = None
logger.warning("UnifiedDatabase not available - file hash deduplication disabled")
# Import date utilities for EXIF timestamp updates
try:
from modules.date_utils import DateHandler
DATE_UTILS_AVAILABLE = True
except ImportError:
try:
from .date_utils import DateHandler
DATE_UTILS_AVAILABLE = True
except ImportError:
DATE_UTILS_AVAILABLE = False
logger.debug("DateHandler not available - EXIF updates disabled")
def _extract_exif_date(filepath: Path) -> Optional[datetime]:
"""Extract date from EXIF metadata using exiftool
Checks DateTimeOriginal, CreateDate, then DateCreated in order of preference.
Returns None if no valid date found.
"""
import subprocess
try:
result = subprocess.run([
'exiftool', '-s', '-s', '-s',
'-DateTimeOriginal', '-CreateDate', '-DateCreated',
str(filepath)
], capture_output=True, text=True, timeout=10)
if result.returncode == 0 and result.stdout.strip():
# exiftool returns dates in format "YYYY:MM:DD HH:MM:SS"
for line in result.stdout.strip().split('\n'):
date_str = line.strip()
if date_str and date_str != '-':
try:
# Try parsing EXIF date format
return datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
except ValueError:
try:
# Try alternate format without time
return datetime.strptime(date_str, "%Y:%m:%d")
except ValueError:
continue
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
except Exception as e:
logger.debug(f"EXIF date extraction failed for {filepath}: {e}")
return None
# Pushover notifier handled by scheduler/notification system
class MoveManager(LoggingMixin):
"""Manages file moves with proper timestamp handling and notifications"""
def __init__(self, log_callback=None, notifier=None, unified_db=None, face_recognition_enabled=True, on_download_complete=None, event_emitter=None):
"""
Initialize the MoveManager
Args:
log_callback: Optional callback function for logging (tag, level, message)
notifier: Optional PushoverNotifier instance for batch notifications
unified_db: Optional UnifiedDatabase instance for file hash deduplication
face_recognition_enabled: Enable face recognition filtering (default: True)
on_download_complete: Optional callback when downloads complete (platform, source, count)
event_emitter: Optional ScraperEventEmitter instance for real-time scraping monitor
"""
# Initialize logging via mixin
self._init_logger('MoveManager', log_callback, default_module='Move')
self.notifier = notifier
self.unified_db = unified_db
self.face_recognition_enabled = face_recognition_enabled
self.on_download_complete = on_download_complete
self.event_emitter = event_emitter
self.current_session = {} # Store session context for event emission
self.face_module = None
self.stats = {
'moved': 0,
'skipped': 0,
'failed': 0,
'duplicates': 0, # Track files skipped due to duplicate content
'review_queue': 0 # Track files moved to review queue (no face match)
}
# Track moved files for batch notifications
self.batch_context = None
self.moved_files = []
self.review_queue_files = [] # Separate tracking for review queue items
self.repost_queue = [] # Queue for repost detection processing (done after moves complete)
self._last_move_had_face_recognition = False # Tracks if last move_file ran face recognition
# Background thread pool for non-blocking post-processing (thumbnails, dimensions)
from concurrent.futures import ThreadPoolExecutor
self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')
# Initialize face recognition if enabled
if self.face_recognition_enabled and self.unified_db:
try:
from modules.face_recognition_module import FaceRecognitionModule
self.face_module = FaceRecognitionModule(
unified_db=self.unified_db,
log_callback=lambda msg, lvl: self.log(msg, lvl)
)
self.log("Face recognition module initialized", "info")
except Exception as e:
self.log(f"Failed to initialize face recognition: {e}", "warning")
self.face_module = None
# Cache face recognition settings (read once from DB, reused per file)
self._face_recognition_tolerance = None
self._review_path = None
self._face_recognition_settings_cache = None # Raw JSON settings blob
self._video_face_settings_cache = None # Parsed video settings dict
# Initialize activity status manager for real-time updates
from modules.activity_status import get_activity_manager
self.activity_manager = get_activity_manager(unified_db)
# Log database status for debugging hash deduplication
if self.unified_db:
self.log("MoveManager initialized with UnifiedDatabase - hash deduplication ENABLED", "info")
else:
self.log("MoveManager initialized WITHOUT UnifiedDatabase - hash deduplication DISABLED", "warning")
if not UnifiedDatabase:
self.log("UnifiedDatabase class not imported - hash deduplication DISABLED", "warning")
def set_session_context(self, platform: str, account: str, session_id: str):
"""Set context for current scraping session for event emission
Args:
platform: Platform name (e.g., 'instagram', 'snapchat')
account: Account/username being scraped
session_id: Unique session identifier for this scraping run
"""
self.current_session = {
'platform': platform,
'account': account,
'session_id': session_id
}
self.log(f"Session context set: {platform}/{account} ({session_id})", "debug")
def release_models(self):
"""
Release ML models to free memory.
Call this after batch processing to prevent OOM in long-running services.
Models will be lazy-loaded again when needed.
"""
if self.face_module is not None:
self.face_module.release_model()
self.log("Face recognition model released to free memory", "info")
gc.collect()
def _is_image_file(self, file_path: Path) -> bool:
"""Check if file is an image (not video)"""
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.heic'}
return file_path.suffix.lower() in image_extensions
def _is_video_file(self, file_path: Path) -> bool:
"""Check if file is a video"""
video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv', '.m4v'}
return file_path.suffix.lower() in video_extensions
def _generate_thumbnail_cache(self, file_path: Path, content_hash: str = None) -> None:
"""
Pre-generate and cache thumbnail for a file.
This speeds up page loading by having thumbnails ready in the database.
Uses content hash (SHA256 of file content) as cache key so thumbnails
survive file moves (e.g., to recycle bin).
Args:
file_path: Path to the media file
content_hash: Optional pre-computed SHA256 content hash
"""
try:
import hashlib
import io
import subprocess
from PIL import Image
thumb_db_path = Path('/opt/media-downloader/database/thumbnails.db')
# Calculate content hash for cache key (survives file moves)
if content_hash:
file_hash = content_hash
else:
# Calculate SHA256 of file content
sha256 = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(65536), b''):
sha256.update(chunk)
file_hash = sha256.hexdigest()
file_mtime = file_path.stat().st_mtime
# Check if already cached
conn = sqlite3.connect(str(thumb_db_path), timeout=10.0)
try:
cursor = conn.cursor()
cursor.execute("SELECT 1 FROM thumbnails WHERE file_hash = ?", (file_hash,))
if cursor.fetchone():
return # Already cached
# Generate thumbnail
thumbnail_data = None
max_size = (300, 300)
if self._is_video_file(file_path):
# Video thumbnail using ffmpeg
try:
result = subprocess.run([
'ffmpeg', '-i', str(file_path),
'-ss', '00:00:01.000', '-vframes', '1',
'-f', 'image2pipe', '-vcodec', 'mjpeg', '-'
], capture_output=True, timeout=10)
if result.returncode == 0 and result.stdout:
img = Image.open(io.BytesIO(result.stdout))
img.thumbnail(max_size, Image.Resampling.LANCZOS)
if img.mode != 'RGB':
img = img.convert('RGB')
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=85)
thumbnail_data = buffer.getvalue()
except Exception as e:
self.log(f"Video thumbnail generation failed: {e}", "debug")
elif self._is_image_file(file_path):
# Image thumbnail
try:
img = Image.open(file_path)
img.thumbnail(max_size, Image.Resampling.LANCZOS)
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = background
buffer = io.BytesIO()
img.save(buffer, format='JPEG', quality=85)
thumbnail_data = buffer.getvalue()
except Exception as e:
self.log(f"Image thumbnail generation failed: {e}", "debug")
# Cache the thumbnail
if thumbnail_data:
from datetime import datetime
cursor.execute("""
INSERT OR REPLACE INTO thumbnails
(file_hash, file_path, thumbnail_data, created_at, file_mtime)
VALUES (?, ?, ?, ?, ?)
""", (file_hash, str(file_path), thumbnail_data, datetime.now().isoformat(), file_mtime))
conn.commit()
self.log(f"Cached thumbnail: {file_path.name}", "debug")
finally:
conn.close()
except Exception as e:
self.log(f"Thumbnail cache generation failed for {file_path.name}: {e}", "debug")
def _record_file_inventory_bg(self, destination: Path, source_name_file: str,
platform: str, source_name: str,
moved_to_review: bool, file_hash: str = None,
timestamp: 'datetime' = None) -> None:
"""Record file in inventory table with dimensions (runs in background thread).
Args:
destination: Final destination path
source_name_file: Original source filename
platform: Platform name
source_name: Source/username
moved_to_review: Whether file was sent to review queue
file_hash: Pre-computed file hash (or None)
timestamp: Post date extracted from filename (or None to use current time)
"""
try:
location = 'review' if moved_to_review else 'final'
content_type_val = 'image' if self._is_image_file(destination) else 'video'
from datetime import datetime, timezone
file_stat = destination.stat()
if timestamp:
inventory_created = timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
inventory_created = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Look up download method from downloads table
method = None
if platform == 'instagram':
try:
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT method FROM downloads
WHERE filename = ? AND platform = 'instagram'
LIMIT 1
''', (source_name_file,))
row = cursor.fetchone()
if row and row[0]:
method = row[0]
except Exception:
pass
# Extract media dimensions
width, height = None, None
try:
if content_type_val == 'image':
from PIL import Image
with Image.open(destination) as img:
width, height = img.size
elif content_type_val == 'video':
import subprocess
result = subprocess.run(
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
'-show_entries', 'stream=width,height', '-of', 'csv=p=0',
str(destination)],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0 and result.stdout.strip():
parts = result.stdout.strip().split(',')
if len(parts) >= 2:
width, height = int(parts[0]), int(parts[1])
except Exception as e:
self.log(f"Could not extract dimensions for {destination.name}: {e}", "debug")
self.unified_db.upsert_file_inventory(
file_path=str(destination),
filename=destination.name,
platform=platform,
source=source_name,
content_type=content_type_val,
file_size=file_stat.st_size,
file_hash=file_hash if file_hash else None,
width=width,
height=height,
location=location,
created_date=inventory_created,
method=method
)
self.log(f"Recorded in file_inventory: {destination.name} (location={location})", "debug")
# Queue for discovery scans (embedding) for files going to final
if location == 'final':
try:
self.unified_db.queue_file_for_discovery(
str(destination),
scan_types=['embedding'],
priority=5
)
self.log(f"Queued {destination.name} for discovery scans", "debug")
except Exception as e:
self.log(f"Failed to queue for discovery: {e}", "debug")
except Exception as e:
self.log(f"Failed to record in file_inventory: {e}", "debug")
def _get_face_recognition_settings(self) -> dict:
"""Get face recognition settings from database (cached per session).
Returns:
dict: Face recognition settings, or empty dict if unavailable.
"""
if self._face_recognition_settings_cache is not None:
return self._face_recognition_settings_cache
if self.unified_db:
try:
import json
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
result = cursor.fetchone()
if result:
self._face_recognition_settings_cache = json.loads(result[0])
return self._face_recognition_settings_cache
except Exception as e:
self.log(f"Failed to read face recognition settings: {e}", "debug")
self._face_recognition_settings_cache = {}
return self._face_recognition_settings_cache
def _get_face_recognition_tolerance(self, is_video: bool = False, source: str = None) -> float:
"""Get face recognition tolerance from database settings
Args:
is_video: If True, get video_tolerance instead of image tolerance
source: Optional source username (e.g., 'evalongoria') for source-based tolerance
Returns:
float: Tolerance value (0.0-1.0), defaults to 0.15 for images, 0.30 for videos
"""
settings = self._get_face_recognition_settings()
if settings:
# Get base tolerance (video or image)
if is_video:
base_tolerance = float(settings.get('video_tolerance', settings.get('tolerance', 0.30)))
else:
base_tolerance = float(settings.get('tolerance', 0.15))
# Check for source-specific tolerance
if source:
source_tolerances = settings.get('source_tolerances', {})
if source in source_tolerances:
source_tolerance = float(source_tolerances[source])
if is_video:
tolerance = max(source_tolerance, base_tolerance)
self.log(f"Using max of source ({source_tolerance}) and video ({base_tolerance}) tolerance for '{source}': {tolerance}", "debug")
else:
tolerance = source_tolerance
self.log(f"Using source-based tolerance for '{source}': {tolerance}", "debug")
return tolerance
return base_tolerance
# Default: 0.15 for images, 0.30 for videos
return 0.30 if is_video else 0.15
def _get_review_path(self) -> str:
"""Get review path from database settings
Returns:
str: Review path, defaults to '/opt/immich/review' if not set
"""
# Return cached value if available
if self._review_path is not None:
return self._review_path
# Try to read from database
if self.unified_db:
try:
import json
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key = 'face_recognition'")
result = cursor.fetchone()
if result:
settings = json.loads(result[0])
review_path = settings.get('review_path', '/opt/immich/review')
self._review_path = review_path
self.log(f"Using review path: {review_path}", "debug")
return review_path
except Exception as e:
self.log(f"Failed to read review path from database: {e}", "debug")
# Default to /opt/immich/review if not found
self._review_path = '/opt/immich/review'
return '/opt/immich/review'
def _get_video_face_recognition_settings(self) -> dict:
"""Get video face recognition settings from database (cached per session).
Returns:
dict: Settings with enable_video_recognition, video_face_frames, frame_positions
"""
if self._video_face_settings_cache is not None:
return self._video_face_settings_cache
default_settings = {
'enable_video_recognition': False,
'video_face_frames': 3,
'frame_positions': [0.1, 0.5, 0.9]
}
settings = self._get_face_recognition_settings()
if settings:
self._video_face_settings_cache = {
'enable_video_recognition': bool(settings.get('enable_video_recognition', False)),
'video_face_frames': int(settings.get('video_face_frames', 3)),
'frame_positions': settings.get('frame_positions', [0.1, 0.5, 0.9])
}
else:
self._video_face_settings_cache = default_settings
return self._video_face_settings_cache
def _apply_ownership(self, file_path: Path) -> None:
"""Apply configured file ownership to moved files
Args:
file_path: Path to file to change ownership
"""
if not self.unified_db:
return
try:
# Get ownership settings from database
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
result = cursor.fetchone()
if not result:
return # No ownership configured
import json
settings = json.loads(result[0])
# Check if ownership is enabled
if not settings.get('enabled', False):
return
owner = settings.get('owner', '').strip()
group = settings.get('group', '').strip()
if not owner and not group:
return # Nothing to change
# Get current file ownership
stat_info = file_path.stat()
uid = stat_info.st_uid
gid = stat_info.st_gid
# Resolve owner username to UID
if owner:
try:
uid = pwd.getpwnam(owner).pw_uid
except KeyError:
self.log(f"Warning: User '{owner}' not found, skipping ownership change", "warning")
return
# Resolve group name to GID
if group:
try:
gid = grp.getgrnam(group).gr_gid
except KeyError:
self.log(f"Warning: Group '{group}' not found, skipping ownership change", "warning")
return
# Change ownership
os.chown(file_path, uid, gid)
self.log(f"Changed ownership: {file_path.name}{owner}:{group}", "debug")
except PermissionError:
self.log(f"Permission denied changing ownership of {file_path.name} (run as root/sudo)", "warning")
except Exception as e:
self.log(f"Failed to apply ownership to {file_path.name}: {e}", "debug")
def _apply_ownership_to_path(self, dir_path: Path) -> None:
"""Apply configured file ownership to a directory and all parent directories
Args:
dir_path: Path to directory to change ownership (and parents)
"""
if not self.unified_db or not dir_path.exists():
return
try:
# Get ownership settings from database
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key = 'file_ownership'")
result = cursor.fetchone()
if not result:
return # No ownership configured
import json
settings = json.loads(result[0])
# Check if ownership is enabled
if not settings.get('enabled', False):
return
owner = settings.get('owner', '').strip()
group = settings.get('group', '').strip()
if not owner and not group:
return # Nothing to change
# Resolve owner username to UID
uid = -1 # -1 means don't change
gid = -1 # -1 means don't change
if owner:
try:
uid = pwd.getpwnam(owner).pw_uid
except KeyError:
self.log(f"Warning: User '{owner}' not found", "warning")
return
if group:
try:
gid = grp.getgrnam(group).gr_gid
except KeyError:
self.log(f"Warning: Group '{group}' not found", "warning")
return
# Apply ownership to this directory and all parents up to /opt/immich/md
base_path = Path("/opt/immich/md")
current_path = dir_path
while current_path != current_path.parent: # Stop at filesystem root
# Stop if we've gone above the base path
if not current_path.is_relative_to(base_path):
break
# Change ownership of this directory
try:
os.chown(current_path, uid, gid)
self.log(f"Changed directory ownership: {current_path}{owner}:{group}", "debug")
except PermissionError:
self.log(f"Permission denied changing ownership of {current_path}", "debug")
break
except Exception as e:
self.log(f"Failed to change ownership of {current_path}: {e}", "debug")
break
# Move to parent directory
current_path = current_path.parent
except Exception as e:
self.log(f"Failed to apply ownership to directories: {e}", "debug")
def move_file(self,
source: Union[str, Path],
destination: Union[str, Path],
timestamp: Optional[datetime] = None,
preserve_if_no_timestamp: bool = True,
content_type: str = None) -> bool:
"""
Move a single file with optional timestamp setting
Args:
source: Source file path
destination: Destination file path
timestamp: Optional datetime to set on the file
preserve_if_no_timestamp: If True and no timestamp provided, preserve original
content_type: Optional content type for tracking
Returns:
True if successful, False otherwise
"""
source = Path(source)
destination = Path(destination)
if not source.exists():
self.log(f"Source file not found: {source}", "error")
self.stats['failed'] += 1
return False
if destination.exists():
self.log(f"Skipping existing file: {destination.name}", "info")
self.stats['skipped'] += 1
# Update database even when skipping to prevent re-downloads
if self.unified_db and self.batch_context:
platform = self.batch_context.get('platform')
source_name = self.batch_context.get('source')
if platform and source_name:
try:
# Update database with final path (using existing destination)
updated = self.unified_db.update_file_location_by_filename(
filename=source.name,
platform=platform,
source=source_name,
final_path=str(destination)
)
if updated:
self.log(f"Updated database for skipped file: {destination.name}", "debug")
except Exception as e:
self.log(f"Failed to update database for skipped file: {e}", "debug")
return False
# Initialize file_hash before conditional block (used later in upsert_file_inventory)
file_hash = None
# File hash deduplication check (checks downloads, recycle_bin, and file_inventory)
if self.unified_db and UnifiedDatabase:
try:
self.activity_manager.update_status(f"Checking file hash: {source.name}")
file_hash = UnifiedDatabase.get_file_hash(str(source))
self.log(f"[HASH_CHECK] Calculated hash for {source.name}: {file_hash[:16] if file_hash else 'None'}...", "debug")
if file_hash:
# Check if this file hash already exists anywhere (downloads, recycle_bin, file_inventory)
is_duplicate = self.unified_db.is_file_hash_downloaded(file_hash)
self.log(f"[HASH_CHECK] Hash exists in system: {is_duplicate}", "debug")
if is_duplicate:
# Get details from downloads table first (most common case)
existing = self.unified_db.get_download_by_file_hash(file_hash)
if existing:
existing_path = existing.get('file_path', 'unknown')
existing_filename = existing.get('filename', 'unknown')
existing_platform = existing.get('platform', 'unknown')
existing_source = existing.get('source', 'unknown')
self.log(f"[HASH_CHECK] Existing file: {existing_filename} at {existing_path}", "debug")
self.log(f"[HASH_CHECK] Source file: {source.name} at {source}", "debug")
self.log(f"[HASH_CHECK] Paths match: {str(source) == existing_path}", "debug")
if existing_path and str(source) != existing_path:
# Check if existing file actually exists at that path
existing_file_exists = Path(existing_path).exists() if existing_path != 'unknown' else False
self.log(f"[HASH_CHECK] Existing file exists on disk: {existing_file_exists}", "debug")
if existing_file_exists:
# Duplicate found - keep first file, delete new
self.log(
f"Skipping duplicate file (same content): {source.name} "
f"[Already exists: {existing_filename} from {existing_platform}/{existing_source} at {existing_path}]",
"warning"
)
self.stats['duplicates'] += 1
# Delete the duplicate source file to save space
try:
source.unlink()
self.log(f"Deleted duplicate: {source.name}", "debug")
except Exception as e:
self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")
return False
else:
# Existing file path in DB but file doesn't exist - allow move and update DB
self.log(f"[HASH_CHECK] Existing record found but file missing at {existing_path}, allowing move to proceed", "debug")
else:
self.log(f"[HASH_CHECK] Source and existing paths match or no existing path, allowing move", "debug")
else:
# Hash exists in recycle_bin or file_inventory but not downloads
self.log(f"Skipping duplicate file: {source.name} [Hash exists in recycle bin or review queue]", "warning")
self.stats['duplicates'] += 1
# Delete the duplicate source file
try:
source.unlink()
self.log(f"Deleted duplicate: {source.name}", "debug")
except Exception as e:
self.log(f"Failed to delete duplicate {source.name}: {e}", "warning")
return False
except Exception as e:
# Don't fail the move if hash check fails, just log and continue
self.log(f"File hash check failed for {source.name}: {e}", "debug")
# Instagram perceptual duplicate detection (visually similar content with overlays)
if self.batch_context:
platform = self.batch_context.get('platform', '')
source_name = self.batch_context.get('source', '')
content_type = self.batch_context.get('content_type', '')
# Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
platform_lower = platform.lower()
is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])
if is_instagram:
self.activity_manager.update_status(f"Checking perceptual hash: {source.name}")
perceptual_result = self._check_perceptual_duplicate(str(source), platform, source_name, content_type)
if perceptual_result == "skip":
# This file is a lower quality perceptual duplicate - skip it
self.log(f"Skipping perceptual duplicate (has overlays or lower quality): {source.name}", "info")
return False
# If perceptual_result is None or file_path, continue processing
# Track if file is being moved to review queue
moved_to_review = False
original_intended_path = str(destination)
queued_for_repost_check = False # Track if we added this file to repost queue
# Instagram story repost detection (only if enabled in settings)
if self._is_instagram_story(source) and self.batch_context:
source_username = self.batch_context.get('source', '')
platform = self.batch_context.get('platform', '')
# Check if this is an Instagram downloader (instagram, fastdl, imginn, instaloader, toolzu)
platform_lower = platform.lower()
is_instagram = any(ig_platform in platform_lower for ig_platform in ['instagram', 'fastdl', 'imginn', 'instaloader', 'toolzu'])
if is_instagram:
self.activity_manager.update_status(f"Checking repost detection: {source.name}")
result = self._check_repost_and_replace(str(source), source_username, str(destination))
if result == "queued":
queued_for_repost_check = True
elif result:
source = Path(result)
self.log(f"Replaced repost with original: {source.name}", "info")
# Update batch_context to reflect the ORIGINAL source
# This ensures database records it correctly as from the original user
replacement_filename = source.name.lower()
# Extract original username from filename (e.g., globalgiftfoundation_20251109_...)
import re
match = re.match(r'^([a-z0-9._]+)_\d{8}', replacement_filename)
if match:
original_source = match.group(1)
self.batch_context['source'] = original_source
# Update content_type based on filename
if 'story' in replacement_filename:
self.batch_context['content_type'] = 'story'
elif 'post' in replacement_filename:
self.batch_context['content_type'] = 'post'
elif 'reel' in replacement_filename:
self.batch_context['content_type'] = 'reel'
self.log(f"Updated batch_context: source={original_source}", "debug")
# Face recognition check
if self.face_module:
# Check if video face recognition is enabled
video_settings = self._get_video_face_recognition_settings()
# Process videos with multi-frame face recognition (if enabled)
if self._is_video_file(source):
if video_settings['enable_video_recognition']:
try:
# Get source username from batch context for source-based tolerance
source_username = self.batch_context.get('source') if self.batch_context else None
tolerance = self._get_face_recognition_tolerance(is_video=True, source=source_username)
frame_positions = video_settings['frame_positions']
self.log(f"Checking video with {len(frame_positions)} frames (tolerance: {tolerance}): {source.name}", "info")
self.activity_manager.update_status(f"Checking facial recognition: {source.name}")
# Use multi-frame checking for videos
result = self.face_module.check_video_multiframe(
str(source),
tolerance=tolerance,
positions=frame_positions
)
# Store result for event emission
self._last_face_result = result
# Log face recognition result to database
# Use best_candidate for matched_person when no match found (to show who was closest)
if self.unified_db:
try:
self.unified_db.log_face_recognition_scan(
file_path=str(destination),
has_match=result['has_match'],
matched_person=result.get('person_name') or result.get('best_candidate'),
confidence=result.get('confidence'),
face_count=result.get('face_count', 0),
scan_type='auto'
)
except Exception as log_err:
self.log(f"Failed to log face recognition result: {log_err}", "debug")
if not result['has_match']:
# No face match - move to review queue
review_path = self._get_review_path()
base_path = Path("/opt/immich/md")
if destination.is_relative_to(base_path):
relative_path = destination.relative_to(base_path)
review_dest = Path(review_path) / relative_path
else:
review_dest = Path(review_path) / source.name
best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
self.log(f"No face match in video {source.name} (checked {result['frames_checked']} frames){best_info} - moving to review queue", "info")
destination = review_dest
moved_to_review = True
self.stats['review_queue'] += 1
else:
# Face matched - continue to original destination
self.log(f"Face match in video: {result['person_name']} ({result['confidence']:.1%}, frame {result['best_frame_index']+1}/{result['frames_checked']}) - proceeding to final destination", "info")
# Track that face recognition ran (batch loop handles pacing)
self._last_move_had_face_recognition = True
except Exception as e:
self.log(f"Video face recognition failed for {source.name}: {e} - moving to review queue", "warning")
# Re-check file existence - if file was deleted, skip the move
if not source.exists():
self.log(f"Source file no longer exists after video face check failure: {source.name}", "warning")
self.stats['failed'] += 1
return False
# On error, move to review queue as safety measure
review_path = self._get_review_path()
base_path = Path("/opt/immich/md")
if destination.is_relative_to(base_path):
relative_path = destination.relative_to(base_path)
review_dest = Path(review_path) / relative_path
else:
review_dest = Path(review_path) / source.name
destination = review_dest
moved_to_review = True
self.stats['review_queue'] += 1
else:
# Video face recognition disabled - skip and move to review queue
review_path = self._get_review_path()
base_path = Path("/opt/immich/md")
if destination.is_relative_to(base_path):
relative_path = destination.relative_to(base_path)
review_dest = Path(review_path) / relative_path
else:
review_dest = Path(review_path) / source.name
self.log(f"Video face recognition disabled - moving {source.name} to review queue", "debug")
destination = review_dest
moved_to_review = True
self.stats['review_queue'] += 1
# Process images with face recognition
elif self._is_image_file(source):
try:
is_video = False # Only processing images now
# Get source username from batch context for source-based tolerance
source_username = self.batch_context.get('source') if self.batch_context else None
tolerance = self._get_face_recognition_tolerance(is_video=False, source=source_username)
self.log(f"Checking image (tolerance: {tolerance}): {source.name}", "debug")
result = self.face_module.check_image(str(source), tolerance=tolerance, is_video=is_video)
# Store result for event emission
self._last_face_result = result
# Log face recognition result to database
# Use best_candidate for matched_person when no match found (to show who was closest)
if self.unified_db:
try:
self.unified_db.log_face_recognition_scan(
file_path=str(destination), # Use final destination path
has_match=result['has_match'],
matched_person=result.get('person_name') or result.get('best_candidate'),
confidence=result.get('confidence'),
face_count=result.get('face_count', 0),
scan_type='auto'
)
except Exception as log_err:
self.log(f"Failed to log face recognition result: {log_err}", "debug")
if not result['has_match']:
# No face match - move to review queue instead
# Maintain folder structure in review queue
review_path = self._get_review_path()
base_path = Path("/opt/immich/md")
if destination.is_relative_to(base_path):
# Get relative path from base
relative_path = destination.relative_to(base_path)
# Recreate under review directory
review_dest = Path(review_path) / relative_path
else:
# Fallback to flat structure if not under base path
review_dest = Path(review_path) / source.name
file_type = "video" if is_video else "image"
best_info = f" (best: {result.get('best_candidate')} at {result.get('confidence', 0):.1%})" if result.get('best_candidate') else ""
self.log(f"No face match for {file_type} {source.name}{best_info} - moving to review queue at {review_dest}", "info")
# Update destination to review path
destination = review_dest
moved_to_review = True
self.stats['review_queue'] += 1
else:
# Face matched - continue to original destination
file_type = "video" if is_video else "image"
self.log(f"Face match in {file_type}: {result['person_name']} ({result['confidence']:.1%}) - proceeding to final destination", "debug")
except Exception as e:
# Don't fail the move if face check fails, just log and continue
self.log(f"Face recognition check failed for {source.name}: {e}", "debug")
# Re-check file existence - if file was deleted, skip the move
if not source.exists():
self.log(f"Source file no longer exists after face check failure: {source.name}", "warning")
self.stats['failed'] += 1
return False
finally:
# Track that face recognition ran
self._last_move_had_face_recognition = True
try:
# Ensure destination directory exists
destination.parent.mkdir(parents=True, exist_ok=True)
# Apply ownership to created directories
self._apply_ownership_to_path(destination.parent)
if preserve_if_no_timestamp and not timestamp:
# Try to extract EXIF date from source file first
# This fixes issues where filesystem mtime is wrong but EXIF has correct date
exif_date = None
if self._is_image_file(source):
exif_date = _extract_exif_date(source)
if exif_date:
self.log(f"Extracted EXIF date for {source.name}: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
# Copy file with metadata preserved (single pass)
shutil.copy2(str(source), str(destination))
if exif_date:
# Override timestamps with EXIF date (more accurate than filesystem)
if DATE_UTILS_AVAILABLE:
DateHandler.update_file_timestamps(destination, exif_date)
self.log(f"Set timestamps from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
else:
timestamp_unix = exif_date.timestamp()
os.utime(destination, (timestamp_unix, timestamp_unix))
self.log(f"Set filesystem timestamp from EXIF: {exif_date.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
else:
# Copy file content
shutil.copy(str(source), str(destination))
# Set timestamp if provided - use DateHandler for comprehensive update
# This sets EXIF metadata (including MetadataDate for Immich) AND filesystem times
if timestamp:
if DATE_UTILS_AVAILABLE:
# Use centralized date handler for EXIF + filesystem timestamps
DateHandler.update_file_timestamps(destination, timestamp)
self.log(f"Set all timestamps to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
else:
# Fallback to filesystem-only timestamps
timestamp_unix = timestamp.timestamp()
os.utime(destination, (timestamp_unix, timestamp_unix))
self.log(f"Set filesystem timestamp to {timestamp.strftime('%Y-%m-%d %H:%M:%S')}", "debug")
self.log(f"Moved: {source.name}{destination.name}", "info")
self.stats['moved'] += 1
self.activity_manager.update_status(f"Moving images: {source.name}")
# Pre-generate thumbnail in background (non-blocking)
self._bg_executor.submit(self._generate_thumbnail_cache, destination, file_hash)
# Apply file ownership if configured
self._apply_ownership(destination)
# Update database with final file location and hash
if self.unified_db and self.batch_context:
platform = self.batch_context.get('platform')
source_name = self.batch_context.get('source')
content_type_ctx = self.batch_context.get('content_type')
# For tagged content, extract actual poster from filename (e.g., "rtlliving_20251124_..." -> "rtlliving")
if content_type_ctx == 'tagged' and source.name:
import re
# Use date pattern to correctly extract usernames that may contain underscores
# Pattern: username_YYYYMMDD_...
date_pattern = re.match(r'^(.+?)_(\d{8})_', source.name)
if date_pattern:
extracted_source = date_pattern.group(1).lower()
# Validate: Instagram usernames are 1-30 chars, alphanumeric + underscore + period
if extracted_source and re.match(r'^[a-z0-9_.]{1,30}$', extracted_source):
if extracted_source != source_name:
self.log(f"Tagged content: using poster @{extracted_source} instead of @{source_name}", "debug")
source_name = extracted_source
else:
self.log(f"Tagged content: extracted '{extracted_source}' doesn't look like valid username, keeping @{source_name}", "debug")
if platform and source_name:
try:
# Update database with final path and hash
updated = self.unified_db.update_file_location_by_filename(
filename=source.name,
platform=platform,
source=source_name,
final_path=str(destination)
)
# Batch all path updates in a single transaction
with self.unified_db.get_connection(for_write=True) as conn:
cursor = conn.cursor()
# If moved to review queue, add intended_path to metadata
if updated and moved_to_review:
import json
cursor.execute('''
SELECT metadata FROM downloads
WHERE filename = ? AND platform = ? AND source = ?
''', (source.name, platform, source_name))
row = cursor.fetchone()
if row:
metadata = json.loads(row['metadata']) if row['metadata'] else {}
metadata['intended_path'] = original_intended_path
cursor.execute('''
UPDATE downloads
SET metadata = ?
WHERE filename = ? AND platform = ? AND source = ?
''', (json.dumps(metadata), source.name, platform, source_name))
self.log(f"Saved intended destination to metadata: {original_intended_path}", "debug")
# Update perceptual hash path
try:
cursor.execute('''
UPDATE instagram_perceptual_hashes
SET file_path = ?
WHERE filename = ? AND platform = ? AND source = ?
''', (str(destination), source.name, platform, source_name))
if cursor.rowcount > 0:
self.log(f"Updated perceptual hash path: {destination}", "debug")
except Exception as e:
self.log(f"Failed to update perceptual hash path: {e}", "debug")
# Update face recognition scans path
try:
cursor.execute('''
UPDATE face_recognition_scans
SET file_path = ?
WHERE file_path = ?
''', (str(destination), original_intended_path))
if cursor.rowcount > 0:
self.log(f"Updated face recognition scan path: {original_intended_path} -> {destination}", "debug")
except Exception as e:
self.log(f"Failed to update face recognition scan path: {e}", "debug")
if updated:
self.log(f"Updated database with final location: {destination}", "debug")
else:
self.log(f"No database record found to update for {source.name}", "debug")
except Exception as e:
self.log(f"Failed to update database location: {e}", "debug")
# Record in file_inventory in background (dimensions + discovery queue)
# This avoids blocking the move pipeline on ffprobe/PIL calls
self._bg_executor.submit(
self._record_file_inventory_bg,
destination, source.name, platform, source_name,
moved_to_review, file_hash, timestamp
)
# Track for batch notification if in batch mode
# Auto-detect content type from path if not provided
if not content_type and self.batch_context:
# Prefer the batch-level content_type (set by caller who knows what's being downloaded)
# This prevents path-based inference from overriding e.g. 'media' with 'post'
# just because the temp dir contains "posts/"
batch_ct = self.batch_context.get('content_type')
if batch_ct:
# Normalize plural forms to singular for proper notification grammar
_SINGULAR = {'posts': 'post', 'stories': 'story', 'reels': 'reel',
'videos': 'video', 'images': 'image', 'items': 'item'}
content_type = _SINGULAR.get(batch_ct, batch_ct)
else:
# No batch content_type set, try to infer from source path
path_str = str(source).lower()
if 'story' in path_str or 'stories' in path_str:
content_type = 'story'
elif 'reel' in path_str:
content_type = 'reel'
elif 'post' in path_str:
content_type = 'post'
elif 'video' in path_str:
content_type = 'video'
elif 'image' in path_str or 'photo' in path_str:
content_type = 'image'
# Track with full path for image attachment in notifications
self.track_moved_file(str(destination), content_type=content_type, is_review=moved_to_review)
# Update repost queue with actual final destination (in case it changed due to review queue)
if queued_for_repost_check and self.repost_queue:
# Find the most recent queue entry for this file and update with actual final destination
for entry in reversed(self.repost_queue):
if Path(entry['file_path']).name == destination.name:
# Update with actual final path (might be review queue)
entry['file_path'] = str(destination)
self.log(f"Updated repost queue entry with final destination: {destination.name}", "debug")
break
# Emit WebSocket event for real-time scraping monitor
if self.event_emitter and self.current_session:
try:
# Determine media type
media_type = 'video' if self._is_video_file(destination) else 'image'
# Determine destination type (media/review/recycle)
dest_str = str(destination).lower()
if moved_to_review or '/review' in dest_str:
destination_type = 'review'
elif '/recycle' in dest_str:
destination_type = 'recycle'
else:
destination_type = 'media'
# Generate thumbnail URL
import urllib.parse
thumbnail_url = f"/api/files/thumbnail?path={urllib.parse.quote(str(destination))}"
# Get face match info if available
face_match = {'matched': False}
if hasattr(self, '_last_face_result') and self._last_face_result:
if self._last_face_result.get('has_match'):
face_match = {
'matched': True,
'person_name': self._last_face_result.get('person_name'),
'confidence': self._last_face_result.get('confidence')
}
# Emit file_moved event
self.event_emitter.emit_file_moved(
session_id=self.current_session.get('session_id', 'unknown'),
platform=self.current_session.get('platform', 'unknown'),
account=self.current_session.get('account', 'unknown'),
filename=destination.name,
media_type=media_type,
destination_type=destination_type,
destination_path=str(destination),
thumbnail_url=thumbnail_url,
face_match=face_match
)
except Exception as emit_err:
self.log(f"Failed to emit file_moved event: {emit_err}", "debug")
return True
except Exception as e:
self.log(f"Failed to move {source.name}: {e}", "error")
self.stats['failed'] += 1
return False
def move_files_batch(self,
source_dir: Union[str, Path],
dest_dir: Union[str, Path],
file_timestamps: Dict[str, Optional[datetime]] = None,
extensions: List[str] = None,
preserve_if_no_timestamp: bool = True) -> Dict:
"""
Move multiple files from source to destination with timestamp management
Args:
source_dir: Source directory
dest_dir: Destination directory
file_timestamps: Dict mapping filename to datetime (or None to preserve)
extensions: List of file extensions to move (e.g., ['.jpg', '.mp4'])
preserve_if_no_timestamp: If True, preserve timestamp for files not in dict
Returns:
Statistics dictionary
"""
source_dir = Path(source_dir)
dest_dir = Path(dest_dir)
file_timestamps = file_timestamps or {}
# Skip if source and destination are the same
if source_dir.resolve() == dest_dir.resolve():
self.log("Source and destination are the same, skipping move", "info")
return {"moved": 0, "failed": 0, "errors": []}
if not source_dir.exists():
self.log(f"Source directory not found: {source_dir}", "error")
return self.stats
# Find all files to move
files_to_move = []
for pattern in ['*'] if not extensions else [f'*{ext}' for ext in extensions]:
files_to_move.extend(source_dir.rglob(pattern))
# Filter to only files (not directories)
files_to_move = [f for f in files_to_move if f.is_file()]
# Filter by extension if specified
if extensions:
files_to_move = [f for f in files_to_move if f.suffix.lower() in extensions]
self.log(f"Found {len(files_to_move)} files to move", "info")
total_files = len(files_to_move)
for file_idx, source_file in enumerate(files_to_move):
# Determine destination path
relative_path = source_file.relative_to(source_dir)
dest_file = dest_dir / relative_path
# Get timestamp for this file
timestamp = file_timestamps.get(source_file.name)
# Update file-level progress
self.activity_manager.update_status(
f"Processing {source_file.name}",
progress_current=file_idx + 1,
progress_total=total_files
)
# Move the file
self._last_move_had_face_recognition = False
self.move_file(
source_file,
dest_file,
timestamp=timestamp,
preserve_if_no_timestamp=preserve_if_no_timestamp
)
# Periodic GC after face recognition batches to free ML tensors
if self._last_move_had_face_recognition and (file_idx + 1) % 10 == 0:
gc.collect()
return self.stats
def move_with_metadata(self, file_info: Dict) -> bool:
"""
Move a file using metadata dictionary
Args:
file_info: Dictionary containing:
- source: Source file path
- destination: Destination file path
- timestamp: Optional datetime object
- preserve_original: If True and no timestamp, preserve original
Returns:
True if successful
"""
source = file_info.get('source')
destination = file_info.get('destination')
timestamp = file_info.get('timestamp')
preserve = file_info.get('preserve_original', True)
if not source or not destination:
self.log("Missing source or destination in file_info", "error")
return False
return self.move_file(source, destination, timestamp, preserve)
def get_stats(self) -> Dict:
"""Get movement statistics"""
return self.stats.copy()
def reset_stats(self):
"""Reset statistics"""
self.stats = {
'moved': 0,
'skipped': 0,
'failed': 0,
'duplicates': 0
}
def start_batch(self, platform: str, source: str = None, content_type: str = None, search_term: str = None):
"""
Start a batch move operation for notifications
Args:
platform: Platform name (instagram, tiktok, forum, etc.)
source: Source/username
content_type: Type of content (post, story, reel, thread, etc.)
search_term: Optional search term (for forum searches)
"""
self.batch_context = {
'platform': platform,
'source': source,
'content_type': content_type,
'search_term': search_term
}
self.moved_files = []
# Clear per-batch settings caches so DB changes take effect between batches
self._face_recognition_settings_cache = None
self._video_face_settings_cache = None
self._review_path = None
self.log(f"Started batch move for {platform}/{source or 'unknown'}/{content_type or 'items'}", "debug")
def end_batch(self):
"""
End batch move operation and send notification
Returns:
Number of files moved in this batch
"""
if not self.batch_context:
return 0
# Wait for background tasks (thumbnails, inventory) to complete
# before sending notifications
self._bg_executor.shutdown(wait=True)
from concurrent.futures import ThreadPoolExecutor
self._bg_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='move_bg')
moved_count = len(self.moved_files)
review_count = len(self.review_queue_files)
platform = self.batch_context.get('platform', 'unknown')
source = self.batch_context.get('source')
content_type = self.batch_context.get('content_type') or 'item' # Handle None explicitly
search_term = self.batch_context.get('search_term')
# Send batch notification for matched files (moved to final destination)
if self.notifier and moved_count > 0:
try:
# Prepare download list for notification
# Use individual file content types if available, otherwise fall back to batch content type
downloads = []
for file_info in self.moved_files:
downloads.append({
'source': source,
'content_type': file_info.get('content_type') or content_type,
'filename': file_info.get('filename'),
'file_path': file_info.get('file_path') # Full path for image attachment
})
# Send batch notification
success = self.notifier.notify_batch_download(
platform=platform,
downloads=downloads,
search_term=search_term
)
if success:
self.log(f"Sent notification: {moved_count} {content_type}(s) from {source or platform}", "info")
else:
self.log("Failed to send notification", "warning")
except Exception as e:
self.log(f"Failed to send batch notification: {e}", "error")
# Send separate notification for review queue items (no face match)
if self.notifier and review_count > 0:
try:
# Prepare review queue list for notification
downloads = []
for file_info in self.review_queue_files:
downloads.append({
'source': source,
'content_type': file_info.get('content_type') or content_type,
'filename': file_info.get('filename'),
'file_path': file_info.get('file_path') # Full path for image attachment
})
# Send review queue notification with lower priority
success = self.notifier.notify_batch_download(
platform=platform,
downloads=downloads,
search_term=search_term,
is_review_queue=True # Flag to indicate this is review queue notification
)
if success:
self.log(f"Sent review queue notification: {review_count} {content_type}(s) from {source or platform}", "info")
else:
self.log("Review queue notification not sent (may be disabled in settings)", "debug")
except Exception as e:
self.log(f"Failed to send review queue notification: {e}", "error")
# Process repost queue (download originals, find matches, replace)
# This happens after all files are moved and notifications sent
if platform in ['instagram', 'fastdl', 'imginn', 'toolzu', 'instaloader']:
self.process_repost_queue()
# Call download complete callback to trigger UI updates
total_count = moved_count + review_count
if total_count > 0 and self.on_download_complete:
try:
self.on_download_complete(platform, source, total_count)
except Exception as e:
self.log(f"Error in download complete callback: {e}", "error")
# Clear batch context
self.batch_context = None
self.moved_files = []
self.review_queue_files = []
return moved_count + review_count
def _is_instagram_story(self, file_path: Path) -> bool:
"""Check if file is an Instagram story based on path"""
path_str = str(file_path).lower()
return 'story' in path_str or 'stories' in path_str
def _is_repost_detection_enabled(self) -> bool:
"""Check if repost detection is enabled in settings"""
try:
if not self.unified_db:
return False
with self.unified_db.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT value FROM settings WHERE key = 'repost_detection'")
result = cursor.fetchone()
if result:
import json
settings = json.loads(result[0])
return settings.get('enabled', False) # Default: DISABLED
except (sqlite3.Error, json.JSONDecodeError, KeyError) as e:
self.log(f"Error checking repost detection setting: {e}", "debug")
return False # Default: DISABLED
def _check_repost_and_replace(self, file_path: str, source_username: str, destination_path: str = None) -> Optional[str]:
"""
Queue file for repost detection (processed after batch completes)
Args:
file_path: Current temp file path (for OCR scanning)
source_username: Username who posted this
destination_path: Final permanent storage path (used in queue)
Returns None (queueing only, no immediate replacement)
"""
# Check if feature is enabled
if not self._is_repost_detection_enabled():
return None # Feature disabled - skip
try:
from modules.instagram_repost_detector import InstagramRepostDetector
# Initialize detector for OCR check only
detector = InstagramRepostDetector(
unified_db=self.unified_db,
log_callback=lambda msg, lvl: self.log(msg, lvl)
)
# Quick OCR check to see if it's a repost (using temp file)
original_username = detector._extract_username_from_repost(file_path)
if original_username and original_username.lower() != source_username.lower():
# Queue for processing after moves complete
# IMPORTANT: Store destination_path (permanent location) NOT file_path (temp)
path_to_queue = destination_path if destination_path else file_path
self.repost_queue.append({
'file_path': path_to_queue,
'source_username': source_username,
'detected_username': original_username
})
self.log(f"Queued repost detection: {Path(path_to_queue).name} → @{original_username}", "info")
return "queued" # Signal that file was queued
return None # No repost detected, no immediate replacement
except Exception as e:
self.log(f"Repost queue check failed: {e}", "debug")
return None
def process_repost_queue(self):
"""Process all queued reposts (called after batch completes)"""
if not self.repost_queue:
return
self.log(f"Processing {len(self.repost_queue)} queued reposts...", "info")
try:
from modules.instagram_repost_detector import InstagramRepostDetector
detector = InstagramRepostDetector(
unified_db=self.unified_db,
log_callback=lambda msg, lvl: self.log(msg, lvl)
)
# Group by detected username to avoid re-downloading same user
users_to_process = {}
for item in self.repost_queue:
username = item['detected_username']
if username not in users_to_process:
users_to_process[username] = []
users_to_process[username].append(item)
# Process each user's reposts
processed = 0
replaced = 0
for username, items in users_to_process.items():
self.log(f"Processing {len(items)} repost(s) from @{username}", "info")
for item in items:
try:
# Process the repost (download if needed, find match, replace)
replacement = detector.check_and_replace_repost(
item['file_path'],
item['source_username']
)
if replacement:
self.log(f"✓ Replaced repost with original from @{username}", "success")
replaced += 1
processed += 1
except Exception as e:
self.log(f"Failed to process repost for @{username}: {e}", "error")
self.log(f"Repost queue processed: {processed} checked, {replaced} replaced", "info")
except Exception as e:
self.log(f"Repost queue processing failed: {e}", "error")
finally:
# Clear the queue
self.repost_queue = []
def _check_perceptual_duplicate(self, file_path: str, platform: str, source: str, content_type: str = None) -> Optional[str]:
"""
Check if file is a perceptual duplicate (visually similar with overlays) for Instagram
Returns:
- None if not a duplicate or feature disabled
- "skip" if this file should be skipped (lower quality duplicate)
- file_path if this file should be kept
"""
try:
from modules.instagram_perceptual_duplicate_detector import InstagramPerceptualDuplicateDetector
# Initialize detector (uses its own universal logger now)
detector = InstagramPerceptualDuplicateDetector(
unified_db=self.unified_db
)
# Run detection
result = detector.check_and_handle_duplicate(file_path, platform, source, content_type)
return result
except Exception as e:
self.log(f"Perceptual duplicate detection failed: {e}", "error")
import traceback
self.log(traceback.format_exc(), "error")
return None
def track_moved_file(self, file_path: str, metadata: Dict[str, Any] = None, content_type: str = None, is_review: bool = False):
"""
Track a moved file for batch notification
Args:
file_path: Full path to the moved file (for image attachment)
metadata: Optional metadata dictionary
content_type: Optional specific content type for this file (overrides batch default)
is_review: True if file was moved to review queue (no face match)
"""
if self.batch_context:
file_info = {
'file_path': file_path,
'filename': Path(file_path).name, # Extract filename for logging
'metadata': metadata or {},
'content_type': content_type # Track individual file's content type
}
if is_review:
self.review_queue_files.append(file_info)
else:
self.moved_files.append(file_info)
def move_files_simple(source_dir: Union[str, Path],
dest_dir: Union[str, Path],
extensions: List[str] = None,
file_timestamps: Dict[str, datetime] = None,
log_callback=None) -> Dict:
"""
Simple function interface for moving files
Args:
source_dir: Source directory
dest_dir: Destination directory
extensions: List of file extensions to move
file_timestamps: Optional dict mapping filenames to timestamps
log_callback: Optional logging callback
Returns:
Statistics dict
"""
manager = MoveManager(log_callback=log_callback)
return manager.move_files_batch(
source_dir,
dest_dir,
file_timestamps=file_timestamps,
extensions=extensions
)
if __name__ == "__main__":
# Test the module
import tempfile
print("Testing MoveManager...")
# Create test environment
with tempfile.TemporaryDirectory() as tmpdir:
test_dir = Path(tmpdir)
src_dir = test_dir / "source"
dst_dir = test_dir / "destination"
src_dir.mkdir()
# Create test files
test_file1 = src_dir / "test1.jpg"
test_file2 = src_dir / "test2.mp4"
test_file1.write_text("content1")
test_file2.write_text("content2")
# Test moving with timestamps
manager = MoveManager()
# Move with specific timestamp
specific_time = datetime(2025, 8, 26, 17, 2, 24)
manager.move_file(test_file1, dst_dir / "test1.jpg", timestamp=specific_time)
# Move preserving original
manager.move_file(test_file2, dst_dir / "test2.mp4", timestamp=None)
# Check results
dst_file1 = dst_dir / "test1.jpg"
if dst_file1.exists():
mtime = datetime.fromtimestamp(os.stat(dst_file1).st_mtime)
print(f"✅ File 1 moved with timestamp: {mtime}")
print(f"Stats: {manager.get_stats()}")